1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyA16,
167     ImmTyLWE,
168     ImmTyExpTgt,
169     ImmTyExpCompr,
170     ImmTyExpVM,
171     ImmTyFORMAT,
172     ImmTyHwreg,
173     ImmTyOff,
174     ImmTySendMsg,
175     ImmTyInterpSlot,
176     ImmTyInterpAttr,
177     ImmTyAttrChan,
178     ImmTyOpSel,
179     ImmTyOpSelHi,
180     ImmTyNegLo,
181     ImmTyNegHi,
182     ImmTySwizzle,
183     ImmTyGprIdxMode,
184     ImmTyHigh,
185     ImmTyBLGP,
186     ImmTyCBSZ,
187     ImmTyABID,
188     ImmTyEndpgm,
189   };
190 
191 private:
192   struct TokOp {
193     const char *Data;
194     unsigned Length;
195   };
196 
197   struct ImmOp {
198     int64_t Val;
199     ImmTy Type;
200     bool IsFPImm;
201     Modifiers Mods;
202   };
203 
204   struct RegOp {
205     unsigned RegNo;
206     Modifiers Mods;
207   };
208 
209   union {
210     TokOp Tok;
211     ImmOp Imm;
212     RegOp Reg;
213     const MCExpr *Expr;
214   };
215 
216 public:
217   bool isToken() const override {
218     if (Kind == Token)
219       return true;
220 
221     // When parsing operands, we can't always tell if something was meant to be
222     // a token, like 'gds', or an expression that references a global variable.
223     // In this case, we assume the string is an expression, and if we need to
224     // interpret is a token, then we treat the symbol name as the token.
225     return isSymbolRefExpr();
226   }
227 
228   bool isSymbolRefExpr() const {
229     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230   }
231 
232   bool isImm() const override {
233     return Kind == Immediate;
234   }
235 
236   bool isInlinableImm(MVT type) const;
237   bool isLiteralImm(MVT type) const;
238 
239   bool isRegKind() const {
240     return Kind == Register;
241   }
242 
243   bool isReg() const override {
244     return isRegKind() && !hasModifiers();
245   }
246 
247   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
248     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
249   }
250 
251   bool isRegOrImmWithInt16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
253   }
254 
255   bool isRegOrImmWithInt32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
257   }
258 
259   bool isRegOrImmWithInt64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
261   }
262 
263   bool isRegOrImmWithFP16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
265   }
266 
267   bool isRegOrImmWithFP32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
269   }
270 
271   bool isRegOrImmWithFP64InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
273   }
274 
275   bool isVReg() const {
276     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
277            isRegClass(AMDGPU::VReg_64RegClassID) ||
278            isRegClass(AMDGPU::VReg_96RegClassID) ||
279            isRegClass(AMDGPU::VReg_128RegClassID) ||
280            isRegClass(AMDGPU::VReg_160RegClassID) ||
281            isRegClass(AMDGPU::VReg_192RegClassID) ||
282            isRegClass(AMDGPU::VReg_256RegClassID) ||
283            isRegClass(AMDGPU::VReg_512RegClassID) ||
284            isRegClass(AMDGPU::VReg_1024RegClassID);
285   }
286 
287   bool isVReg32() const {
288     return isRegClass(AMDGPU::VGPR_32RegClassID);
289   }
290 
291   bool isVReg32OrOff() const {
292     return isOff() || isVReg32();
293   }
294 
295   bool isNull() const {
296     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
297   }
298 
299   bool isSDWAOperand(MVT type) const;
300   bool isSDWAFP16Operand() const;
301   bool isSDWAFP32Operand() const;
302   bool isSDWAInt16Operand() const;
303   bool isSDWAInt32Operand() const;
304 
305   bool isImmTy(ImmTy ImmT) const {
306     return isImm() && Imm.Type == ImmT;
307   }
308 
309   bool isImmModifier() const {
310     return isImm() && Imm.Type != ImmTyNone;
311   }
312 
313   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
314   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
315   bool isDMask() const { return isImmTy(ImmTyDMask); }
316   bool isDim() const { return isImmTy(ImmTyDim); }
317   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
318   bool isDA() const { return isImmTy(ImmTyDA); }
319   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
320   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
321   bool isLWE() const { return isImmTy(ImmTyLWE); }
322   bool isOff() const { return isImmTy(ImmTyOff); }
323   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
324   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
325   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
326   bool isOffen() const { return isImmTy(ImmTyOffen); }
327   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
328   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
329   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
330   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
331   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
332 
333   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
334   bool isGDS() const { return isImmTy(ImmTyGDS); }
335   bool isLDS() const { return isImmTy(ImmTyLDS); }
336   bool isDLC() const { return isImmTy(ImmTyDLC); }
337   bool isGLC() const { return isImmTy(ImmTyGLC); }
338   // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced
339   // value of the GLC operand.
340   bool isGLC_1() const { return isImmTy(ImmTyGLC); }
341   bool isSLC() const { return isImmTy(ImmTySLC); }
342   bool isSWZ() const { return isImmTy(ImmTySWZ); }
343   bool isTFE() const { return isImmTy(ImmTyTFE); }
344   bool isD16() const { return isImmTy(ImmTyD16); }
345   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
346   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
347   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
348   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
349   bool isFI() const { return isImmTy(ImmTyDppFi); }
350   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
351   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
352   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
353   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
354   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
355   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
356   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
357   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
358   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
359   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
360   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
361   bool isHigh() const { return isImmTy(ImmTyHigh); }
362 
363   bool isMod() const {
364     return isClampSI() || isOModSI();
365   }
366 
367   bool isRegOrImm() const {
368     return isReg() || isImm();
369   }
370 
371   bool isRegClass(unsigned RCID) const;
372 
373   bool isInlineValue() const;
374 
375   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
376     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
377   }
378 
379   bool isSCSrcB16() const {
380     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
381   }
382 
383   bool isSCSrcV2B16() const {
384     return isSCSrcB16();
385   }
386 
387   bool isSCSrcB32() const {
388     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
389   }
390 
391   bool isSCSrcB64() const {
392     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
393   }
394 
395   bool isBoolReg() const;
396 
397   bool isSCSrcF16() const {
398     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
399   }
400 
401   bool isSCSrcV2F16() const {
402     return isSCSrcF16();
403   }
404 
405   bool isSCSrcF32() const {
406     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
407   }
408 
409   bool isSCSrcF64() const {
410     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
411   }
412 
413   bool isSSrcB32() const {
414     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
415   }
416 
417   bool isSSrcB16() const {
418     return isSCSrcB16() || isLiteralImm(MVT::i16);
419   }
420 
421   bool isSSrcV2B16() const {
422     llvm_unreachable("cannot happen");
423     return isSSrcB16();
424   }
425 
426   bool isSSrcB64() const {
427     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
428     // See isVSrc64().
429     return isSCSrcB64() || isLiteralImm(MVT::i64);
430   }
431 
432   bool isSSrcF32() const {
433     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
434   }
435 
436   bool isSSrcF64() const {
437     return isSCSrcB64() || isLiteralImm(MVT::f64);
438   }
439 
440   bool isSSrcF16() const {
441     return isSCSrcB16() || isLiteralImm(MVT::f16);
442   }
443 
444   bool isSSrcV2F16() const {
445     llvm_unreachable("cannot happen");
446     return isSSrcF16();
447   }
448 
449   bool isSSrcOrLdsB32() const {
450     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
451            isLiteralImm(MVT::i32) || isExpr();
452   }
453 
454   bool isVCSrcB32() const {
455     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
456   }
457 
458   bool isVCSrcB64() const {
459     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
460   }
461 
462   bool isVCSrcB16() const {
463     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
464   }
465 
466   bool isVCSrcV2B16() const {
467     return isVCSrcB16();
468   }
469 
470   bool isVCSrcF32() const {
471     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
472   }
473 
474   bool isVCSrcF64() const {
475     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
476   }
477 
478   bool isVCSrcF16() const {
479     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
480   }
481 
482   bool isVCSrcV2F16() const {
483     return isVCSrcF16();
484   }
485 
486   bool isVSrcB32() const {
487     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
488   }
489 
490   bool isVSrcB64() const {
491     return isVCSrcF64() || isLiteralImm(MVT::i64);
492   }
493 
494   bool isVSrcB16() const {
495     return isVCSrcB16() || isLiteralImm(MVT::i16);
496   }
497 
498   bool isVSrcV2B16() const {
499     return isVSrcB16() || isLiteralImm(MVT::v2i16);
500   }
501 
502   bool isVSrcF32() const {
503     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
504   }
505 
506   bool isVSrcF64() const {
507     return isVCSrcF64() || isLiteralImm(MVT::f64);
508   }
509 
510   bool isVSrcF16() const {
511     return isVCSrcF16() || isLiteralImm(MVT::f16);
512   }
513 
514   bool isVSrcV2F16() const {
515     return isVSrcF16() || isLiteralImm(MVT::v2f16);
516   }
517 
518   bool isVISrcB32() const {
519     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
520   }
521 
522   bool isVISrcB16() const {
523     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
524   }
525 
526   bool isVISrcV2B16() const {
527     return isVISrcB16();
528   }
529 
530   bool isVISrcF32() const {
531     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
532   }
533 
534   bool isVISrcF16() const {
535     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
536   }
537 
538   bool isVISrcV2F16() const {
539     return isVISrcF16() || isVISrcB32();
540   }
541 
542   bool isAISrcB32() const {
543     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
544   }
545 
546   bool isAISrcB16() const {
547     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
548   }
549 
550   bool isAISrcV2B16() const {
551     return isAISrcB16();
552   }
553 
554   bool isAISrcF32() const {
555     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
556   }
557 
558   bool isAISrcF16() const {
559     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
560   }
561 
562   bool isAISrcV2F16() const {
563     return isAISrcF16() || isAISrcB32();
564   }
565 
566   bool isAISrc_128B32() const {
567     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
568   }
569 
570   bool isAISrc_128B16() const {
571     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
572   }
573 
574   bool isAISrc_128V2B16() const {
575     return isAISrc_128B16();
576   }
577 
578   bool isAISrc_128F32() const {
579     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
580   }
581 
582   bool isAISrc_128F16() const {
583     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
584   }
585 
586   bool isAISrc_128V2F16() const {
587     return isAISrc_128F16() || isAISrc_128B32();
588   }
589 
590   bool isAISrc_512B32() const {
591     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
592   }
593 
594   bool isAISrc_512B16() const {
595     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
596   }
597 
598   bool isAISrc_512V2B16() const {
599     return isAISrc_512B16();
600   }
601 
602   bool isAISrc_512F32() const {
603     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
604   }
605 
606   bool isAISrc_512F16() const {
607     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
608   }
609 
610   bool isAISrc_512V2F16() const {
611     return isAISrc_512F16() || isAISrc_512B32();
612   }
613 
614   bool isAISrc_1024B32() const {
615     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
616   }
617 
618   bool isAISrc_1024B16() const {
619     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
620   }
621 
622   bool isAISrc_1024V2B16() const {
623     return isAISrc_1024B16();
624   }
625 
626   bool isAISrc_1024F32() const {
627     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
628   }
629 
630   bool isAISrc_1024F16() const {
631     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
632   }
633 
634   bool isAISrc_1024V2F16() const {
635     return isAISrc_1024F16() || isAISrc_1024B32();
636   }
637 
638   bool isKImmFP32() const {
639     return isLiteralImm(MVT::f32);
640   }
641 
642   bool isKImmFP16() const {
643     return isLiteralImm(MVT::f16);
644   }
645 
646   bool isMem() const override {
647     return false;
648   }
649 
650   bool isExpr() const {
651     return Kind == Expression;
652   }
653 
654   bool isSoppBrTarget() const {
655     return isExpr() || isImm();
656   }
657 
658   bool isSWaitCnt() const;
659   bool isHwreg() const;
660   bool isSendMsg() const;
661   bool isSwizzle() const;
662   bool isSMRDOffset8() const;
663   bool isSMEMOffset() const;
664   bool isSMRDLiteralOffset() const;
665   bool isDPP8() const;
666   bool isDPPCtrl() const;
667   bool isBLGP() const;
668   bool isCBSZ() const;
669   bool isABID() const;
670   bool isGPRIdxMode() const;
671   bool isS16Imm() const;
672   bool isU16Imm() const;
673   bool isEndpgm() const;
674 
675   StringRef getExpressionAsToken() const {
676     assert(isExpr());
677     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
678     return S->getSymbol().getName();
679   }
680 
681   StringRef getToken() const {
682     assert(isToken());
683 
684     if (Kind == Expression)
685       return getExpressionAsToken();
686 
687     return StringRef(Tok.Data, Tok.Length);
688   }
689 
690   int64_t getImm() const {
691     assert(isImm());
692     return Imm.Val;
693   }
694 
695   void setImm(int64_t Val) {
696     assert(isImm());
697     Imm.Val = Val;
698   }
699 
700   ImmTy getImmTy() const {
701     assert(isImm());
702     return Imm.Type;
703   }
704 
705   unsigned getReg() const override {
706     assert(isRegKind());
707     return Reg.RegNo;
708   }
709 
710   SMLoc getStartLoc() const override {
711     return StartLoc;
712   }
713 
714   SMLoc getEndLoc() const override {
715     return EndLoc;
716   }
717 
718   SMRange getLocRange() const {
719     return SMRange(StartLoc, EndLoc);
720   }
721 
722   Modifiers getModifiers() const {
723     assert(isRegKind() || isImmTy(ImmTyNone));
724     return isRegKind() ? Reg.Mods : Imm.Mods;
725   }
726 
727   void setModifiers(Modifiers Mods) {
728     assert(isRegKind() || isImmTy(ImmTyNone));
729     if (isRegKind())
730       Reg.Mods = Mods;
731     else
732       Imm.Mods = Mods;
733   }
734 
735   bool hasModifiers() const {
736     return getModifiers().hasModifiers();
737   }
738 
739   bool hasFPModifiers() const {
740     return getModifiers().hasFPModifiers();
741   }
742 
743   bool hasIntModifiers() const {
744     return getModifiers().hasIntModifiers();
745   }
746 
747   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
748 
749   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
750 
751   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
752 
753   template <unsigned Bitwidth>
754   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
755 
756   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
757     addKImmFPOperands<16>(Inst, N);
758   }
759 
760   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
761     addKImmFPOperands<32>(Inst, N);
762   }
763 
764   void addRegOperands(MCInst &Inst, unsigned N) const;
765 
766   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
767     addRegOperands(Inst, N);
768   }
769 
770   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
771     if (isRegKind())
772       addRegOperands(Inst, N);
773     else if (isExpr())
774       Inst.addOperand(MCOperand::createExpr(Expr));
775     else
776       addImmOperands(Inst, N);
777   }
778 
779   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
780     Modifiers Mods = getModifiers();
781     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
782     if (isRegKind()) {
783       addRegOperands(Inst, N);
784     } else {
785       addImmOperands(Inst, N, false);
786     }
787   }
788 
789   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
790     assert(!hasIntModifiers());
791     addRegOrImmWithInputModsOperands(Inst, N);
792   }
793 
794   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
795     assert(!hasFPModifiers());
796     addRegOrImmWithInputModsOperands(Inst, N);
797   }
798 
799   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
800     Modifiers Mods = getModifiers();
801     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
802     assert(isRegKind());
803     addRegOperands(Inst, N);
804   }
805 
806   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
807     assert(!hasIntModifiers());
808     addRegWithInputModsOperands(Inst, N);
809   }
810 
811   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
812     assert(!hasFPModifiers());
813     addRegWithInputModsOperands(Inst, N);
814   }
815 
816   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
817     if (isImm())
818       addImmOperands(Inst, N);
819     else {
820       assert(isExpr());
821       Inst.addOperand(MCOperand::createExpr(Expr));
822     }
823   }
824 
825   static void printImmTy(raw_ostream& OS, ImmTy Type) {
826     switch (Type) {
827     case ImmTyNone: OS << "None"; break;
828     case ImmTyGDS: OS << "GDS"; break;
829     case ImmTyLDS: OS << "LDS"; break;
830     case ImmTyOffen: OS << "Offen"; break;
831     case ImmTyIdxen: OS << "Idxen"; break;
832     case ImmTyAddr64: OS << "Addr64"; break;
833     case ImmTyOffset: OS << "Offset"; break;
834     case ImmTyInstOffset: OS << "InstOffset"; break;
835     case ImmTyOffset0: OS << "Offset0"; break;
836     case ImmTyOffset1: OS << "Offset1"; break;
837     case ImmTyDLC: OS << "DLC"; break;
838     case ImmTyGLC: OS << "GLC"; break;
839     case ImmTySLC: OS << "SLC"; break;
840     case ImmTySWZ: OS << "SWZ"; break;
841     case ImmTyTFE: OS << "TFE"; break;
842     case ImmTyD16: OS << "D16"; break;
843     case ImmTyFORMAT: OS << "FORMAT"; break;
844     case ImmTyClampSI: OS << "ClampSI"; break;
845     case ImmTyOModSI: OS << "OModSI"; break;
846     case ImmTyDPP8: OS << "DPP8"; break;
847     case ImmTyDppCtrl: OS << "DppCtrl"; break;
848     case ImmTyDppRowMask: OS << "DppRowMask"; break;
849     case ImmTyDppBankMask: OS << "DppBankMask"; break;
850     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
851     case ImmTyDppFi: OS << "FI"; break;
852     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
853     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
854     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
855     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
856     case ImmTyDMask: OS << "DMask"; break;
857     case ImmTyDim: OS << "Dim"; break;
858     case ImmTyUNorm: OS << "UNorm"; break;
859     case ImmTyDA: OS << "DA"; break;
860     case ImmTyR128A16: OS << "R128A16"; break;
861     case ImmTyA16: OS << "A16"; break;
862     case ImmTyLWE: OS << "LWE"; break;
863     case ImmTyOff: OS << "Off"; break;
864     case ImmTyExpTgt: OS << "ExpTgt"; break;
865     case ImmTyExpCompr: OS << "ExpCompr"; break;
866     case ImmTyExpVM: OS << "ExpVM"; break;
867     case ImmTyHwreg: OS << "Hwreg"; break;
868     case ImmTySendMsg: OS << "SendMsg"; break;
869     case ImmTyInterpSlot: OS << "InterpSlot"; break;
870     case ImmTyInterpAttr: OS << "InterpAttr"; break;
871     case ImmTyAttrChan: OS << "AttrChan"; break;
872     case ImmTyOpSel: OS << "OpSel"; break;
873     case ImmTyOpSelHi: OS << "OpSelHi"; break;
874     case ImmTyNegLo: OS << "NegLo"; break;
875     case ImmTyNegHi: OS << "NegHi"; break;
876     case ImmTySwizzle: OS << "Swizzle"; break;
877     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
878     case ImmTyHigh: OS << "High"; break;
879     case ImmTyBLGP: OS << "BLGP"; break;
880     case ImmTyCBSZ: OS << "CBSZ"; break;
881     case ImmTyABID: OS << "ABID"; break;
882     case ImmTyEndpgm: OS << "Endpgm"; break;
883     }
884   }
885 
886   void print(raw_ostream &OS) const override {
887     switch (Kind) {
888     case Register:
889       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
890       break;
891     case Immediate:
892       OS << '<' << getImm();
893       if (getImmTy() != ImmTyNone) {
894         OS << " type: "; printImmTy(OS, getImmTy());
895       }
896       OS << " mods: " << Imm.Mods << '>';
897       break;
898     case Token:
899       OS << '\'' << getToken() << '\'';
900       break;
901     case Expression:
902       OS << "<expr " << *Expr << '>';
903       break;
904     }
905   }
906 
907   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
908                                       int64_t Val, SMLoc Loc,
909                                       ImmTy Type = ImmTyNone,
910                                       bool IsFPImm = false) {
911     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
912     Op->Imm.Val = Val;
913     Op->Imm.IsFPImm = IsFPImm;
914     Op->Imm.Type = Type;
915     Op->Imm.Mods = Modifiers();
916     Op->StartLoc = Loc;
917     Op->EndLoc = Loc;
918     return Op;
919   }
920 
921   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
922                                         StringRef Str, SMLoc Loc,
923                                         bool HasExplicitEncodingSize = true) {
924     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
925     Res->Tok.Data = Str.data();
926     Res->Tok.Length = Str.size();
927     Res->StartLoc = Loc;
928     Res->EndLoc = Loc;
929     return Res;
930   }
931 
932   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
933                                       unsigned RegNo, SMLoc S,
934                                       SMLoc E) {
935     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
936     Op->Reg.RegNo = RegNo;
937     Op->Reg.Mods = Modifiers();
938     Op->StartLoc = S;
939     Op->EndLoc = E;
940     return Op;
941   }
942 
943   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
944                                        const class MCExpr *Expr, SMLoc S) {
945     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
946     Op->Expr = Expr;
947     Op->StartLoc = S;
948     Op->EndLoc = S;
949     return Op;
950   }
951 };
952 
953 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
954   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
955   return OS;
956 }
957 
958 //===----------------------------------------------------------------------===//
959 // AsmParser
960 //===----------------------------------------------------------------------===//
961 
962 // Holds info related to the current kernel, e.g. count of SGPRs used.
963 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
964 // .amdgpu_hsa_kernel or at EOF.
965 class KernelScopeInfo {
966   int SgprIndexUnusedMin = -1;
967   int VgprIndexUnusedMin = -1;
968   MCContext *Ctx = nullptr;
969 
970   void usesSgprAt(int i) {
971     if (i >= SgprIndexUnusedMin) {
972       SgprIndexUnusedMin = ++i;
973       if (Ctx) {
974         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
975         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
976       }
977     }
978   }
979 
980   void usesVgprAt(int i) {
981     if (i >= VgprIndexUnusedMin) {
982       VgprIndexUnusedMin = ++i;
983       if (Ctx) {
984         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
985         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
986       }
987     }
988   }
989 
990 public:
991   KernelScopeInfo() = default;
992 
993   void initialize(MCContext &Context) {
994     Ctx = &Context;
995     usesSgprAt(SgprIndexUnusedMin = -1);
996     usesVgprAt(VgprIndexUnusedMin = -1);
997   }
998 
999   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1000     switch (RegKind) {
1001       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1002       case IS_AGPR: // fall through
1003       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1004       default: break;
1005     }
1006   }
1007 };
1008 
1009 class AMDGPUAsmParser : public MCTargetAsmParser {
1010   MCAsmParser &Parser;
1011 
1012   // Number of extra operands parsed after the first optional operand.
1013   // This may be necessary to skip hardcoded mandatory operands.
1014   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1015 
1016   unsigned ForcedEncodingSize = 0;
1017   bool ForcedDPP = false;
1018   bool ForcedSDWA = false;
1019   KernelScopeInfo KernelScope;
1020 
1021   /// @name Auto-generated Match Functions
1022   /// {
1023 
1024 #define GET_ASSEMBLER_HEADER
1025 #include "AMDGPUGenAsmMatcher.inc"
1026 
1027   /// }
1028 
1029 private:
1030   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1031   bool OutOfRangeError(SMRange Range);
1032   /// Calculate VGPR/SGPR blocks required for given target, reserved
1033   /// registers, and user-specified NextFreeXGPR values.
1034   ///
1035   /// \param Features [in] Target features, used for bug corrections.
1036   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1037   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1038   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1039   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1040   /// descriptor field, if valid.
1041   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1042   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1043   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1044   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1045   /// \param VGPRBlocks [out] Result VGPR block count.
1046   /// \param SGPRBlocks [out] Result SGPR block count.
1047   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1048                           bool FlatScrUsed, bool XNACKUsed,
1049                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1050                           SMRange VGPRRange, unsigned NextFreeSGPR,
1051                           SMRange SGPRRange, unsigned &VGPRBlocks,
1052                           unsigned &SGPRBlocks);
1053   bool ParseDirectiveAMDGCNTarget();
1054   bool ParseDirectiveAMDHSAKernel();
1055   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1056   bool ParseDirectiveHSACodeObjectVersion();
1057   bool ParseDirectiveHSACodeObjectISA();
1058   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1059   bool ParseDirectiveAMDKernelCodeT();
1060   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1061   bool ParseDirectiveAMDGPUHsaKernel();
1062 
1063   bool ParseDirectiveISAVersion();
1064   bool ParseDirectiveHSAMetadata();
1065   bool ParseDirectivePALMetadataBegin();
1066   bool ParseDirectivePALMetadata();
1067   bool ParseDirectiveAMDGPULDS();
1068 
1069   /// Common code to parse out a block of text (typically YAML) between start and
1070   /// end directives.
1071   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1072                            const char *AssemblerDirectiveEnd,
1073                            std::string &CollectString);
1074 
1075   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1076                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1077   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1078                            unsigned &RegNum, unsigned &RegWidth,
1079                            bool RestoreOnFailure = false);
1080   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1081                            unsigned &RegNum, unsigned &RegWidth,
1082                            SmallVectorImpl<AsmToken> &Tokens);
1083   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1084                            unsigned &RegWidth,
1085                            SmallVectorImpl<AsmToken> &Tokens);
1086   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1087                            unsigned &RegWidth,
1088                            SmallVectorImpl<AsmToken> &Tokens);
1089   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1090                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1091   bool ParseRegRange(unsigned& Num, unsigned& Width);
1092   unsigned getRegularReg(RegisterKind RegKind,
1093                          unsigned RegNum,
1094                          unsigned RegWidth,
1095                          SMLoc Loc);
1096 
1097   bool isRegister();
1098   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1099   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1100   void initializeGprCountSymbol(RegisterKind RegKind);
1101   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1102                              unsigned RegWidth);
1103   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1104                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1105   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1106                  bool IsGdsHardcoded);
1107 
1108 public:
1109   enum AMDGPUMatchResultTy {
1110     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1111   };
1112   enum OperandMode {
1113     OperandMode_Default,
1114     OperandMode_NSA,
1115   };
1116 
1117   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1118 
1119   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1120                const MCInstrInfo &MII,
1121                const MCTargetOptions &Options)
1122       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1123     MCAsmParserExtension::Initialize(Parser);
1124 
1125     if (getFeatureBits().none()) {
1126       // Set default features.
1127       copySTI().ToggleFeature("southern-islands");
1128     }
1129 
1130     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1131 
1132     {
1133       // TODO: make those pre-defined variables read-only.
1134       // Currently there is none suitable machinery in the core llvm-mc for this.
1135       // MCSymbol::isRedefinable is intended for another purpose, and
1136       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1137       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1138       MCContext &Ctx = getContext();
1139       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1140         MCSymbol *Sym =
1141             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1142         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1143         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1144         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1145         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1146         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1147       } else {
1148         MCSymbol *Sym =
1149             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1150         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1151         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1152         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1153         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1154         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1155       }
1156       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1157         initializeGprCountSymbol(IS_VGPR);
1158         initializeGprCountSymbol(IS_SGPR);
1159       } else
1160         KernelScope.initialize(getContext());
1161     }
1162   }
1163 
1164   bool hasXNACK() const {
1165     return AMDGPU::hasXNACK(getSTI());
1166   }
1167 
1168   bool hasMIMG_R128() const {
1169     return AMDGPU::hasMIMG_R128(getSTI());
1170   }
1171 
1172   bool hasPackedD16() const {
1173     return AMDGPU::hasPackedD16(getSTI());
1174   }
1175 
1176   bool hasGFX10A16() const {
1177     return AMDGPU::hasGFX10A16(getSTI());
1178   }
1179 
1180   bool isSI() const {
1181     return AMDGPU::isSI(getSTI());
1182   }
1183 
1184   bool isCI() const {
1185     return AMDGPU::isCI(getSTI());
1186   }
1187 
1188   bool isVI() const {
1189     return AMDGPU::isVI(getSTI());
1190   }
1191 
1192   bool isGFX9() const {
1193     return AMDGPU::isGFX9(getSTI());
1194   }
1195 
1196   bool isGFX9Plus() const {
1197     return AMDGPU::isGFX9Plus(getSTI());
1198   }
1199 
1200   bool isGFX10() const {
1201     return AMDGPU::isGFX10(getSTI());
1202   }
1203 
1204   bool isGFX10_BEncoding() const {
1205     return AMDGPU::isGFX10_BEncoding(getSTI());
1206   }
1207 
1208   bool hasInv2PiInlineImm() const {
1209     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1210   }
1211 
1212   bool hasFlatOffsets() const {
1213     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1214   }
1215 
1216   bool hasSGPR102_SGPR103() const {
1217     return !isVI() && !isGFX9();
1218   }
1219 
1220   bool hasSGPR104_SGPR105() const {
1221     return isGFX10();
1222   }
1223 
1224   bool hasIntClamp() const {
1225     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1226   }
1227 
1228   AMDGPUTargetStreamer &getTargetStreamer() {
1229     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1230     return static_cast<AMDGPUTargetStreamer &>(TS);
1231   }
1232 
1233   const MCRegisterInfo *getMRI() const {
1234     // We need this const_cast because for some reason getContext() is not const
1235     // in MCAsmParser.
1236     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1237   }
1238 
1239   const MCInstrInfo *getMII() const {
1240     return &MII;
1241   }
1242 
1243   const FeatureBitset &getFeatureBits() const {
1244     return getSTI().getFeatureBits();
1245   }
1246 
1247   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1248   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1249   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1250 
1251   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1252   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1253   bool isForcedDPP() const { return ForcedDPP; }
1254   bool isForcedSDWA() const { return ForcedSDWA; }
1255   ArrayRef<unsigned> getMatchedVariants() const;
1256   StringRef getMatchedVariantName() const;
1257 
1258   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1259   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1260                      bool RestoreOnFailure);
1261   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1262   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1263                                         SMLoc &EndLoc) override;
1264   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1265   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1266                                       unsigned Kind) override;
1267   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1268                                OperandVector &Operands, MCStreamer &Out,
1269                                uint64_t &ErrorInfo,
1270                                bool MatchingInlineAsm) override;
1271   bool ParseDirective(AsmToken DirectiveID) override;
1272   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1273                                     OperandMode Mode = OperandMode_Default);
1274   StringRef parseMnemonicSuffix(StringRef Name);
1275   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1276                         SMLoc NameLoc, OperandVector &Operands) override;
1277   //bool ProcessInstruction(MCInst &Inst);
1278 
1279   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1280 
1281   OperandMatchResultTy
1282   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1283                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1284                      bool (*ConvertResult)(int64_t &) = nullptr);
1285 
1286   OperandMatchResultTy
1287   parseOperandArrayWithPrefix(const char *Prefix,
1288                               OperandVector &Operands,
1289                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1290                               bool (*ConvertResult)(int64_t&) = nullptr);
1291 
1292   OperandMatchResultTy
1293   parseNamedBit(const char *Name, OperandVector &Operands,
1294                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1295   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1296                                              StringRef &Value);
1297 
1298   bool isModifier();
1299   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1300   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1301   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1302   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1303   bool parseSP3NegModifier();
1304   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1305   OperandMatchResultTy parseReg(OperandVector &Operands);
1306   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1307   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1308   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1309   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1310   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1311   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1312   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1313   OperandMatchResultTy parseUfmt(int64_t &Format);
1314   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1315   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1316   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1317   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1318   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1319   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1320   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1321 
1322   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1323   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1324   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1325   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1326 
1327   bool parseCnt(int64_t &IntVal);
1328   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1329   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1330 
1331 private:
1332   struct OperandInfoTy {
1333     int64_t Id;
1334     bool IsSymbolic = false;
1335     bool IsDefined = false;
1336 
1337     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1338   };
1339 
1340   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1341   bool validateSendMsg(const OperandInfoTy &Msg,
1342                        const OperandInfoTy &Op,
1343                        const OperandInfoTy &Stream,
1344                        const SMLoc Loc);
1345 
1346   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1347   bool validateHwreg(const OperandInfoTy &HwReg,
1348                      const int64_t Offset,
1349                      const int64_t Width,
1350                      const SMLoc Loc);
1351 
1352   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1353   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1354   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1355 
1356   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1357   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1358   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1359   bool validateSOPLiteral(const MCInst &Inst) const;
1360   bool validateConstantBusLimitations(const MCInst &Inst);
1361   bool validateEarlyClobberLimitations(const MCInst &Inst);
1362   bool validateIntClampSupported(const MCInst &Inst);
1363   bool validateMIMGAtomicDMask(const MCInst &Inst);
1364   bool validateMIMGGatherDMask(const MCInst &Inst);
1365   bool validateMovrels(const MCInst &Inst);
1366   bool validateMIMGDataSize(const MCInst &Inst);
1367   bool validateMIMGAddrSize(const MCInst &Inst);
1368   bool validateMIMGD16(const MCInst &Inst);
1369   bool validateMIMGDim(const MCInst &Inst);
1370   bool validateLdsDirect(const MCInst &Inst);
1371   bool validateOpSel(const MCInst &Inst);
1372   bool validateVccOperand(unsigned Reg) const;
1373   bool validateVOP3Literal(const MCInst &Inst) const;
1374   bool validateMAIAccWrite(const MCInst &Inst);
1375   bool validateDivScale(const MCInst &Inst);
1376   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1377                              const SMLoc &IDLoc);
1378   unsigned getConstantBusLimit(unsigned Opcode) const;
1379   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1380   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1381   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1382 
1383   bool isSupportedMnemo(StringRef Mnemo,
1384                         const FeatureBitset &FBS);
1385   bool isSupportedMnemo(StringRef Mnemo,
1386                         const FeatureBitset &FBS,
1387                         ArrayRef<unsigned> Variants);
1388   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1389 
1390   bool isId(const StringRef Id) const;
1391   bool isId(const AsmToken &Token, const StringRef Id) const;
1392   bool isToken(const AsmToken::TokenKind Kind) const;
1393   bool trySkipId(const StringRef Id);
1394   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1395   bool trySkipToken(const AsmToken::TokenKind Kind);
1396   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1397   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1398   bool parseId(StringRef &Val, const StringRef ErrMsg);
1399 
1400   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1401   AsmToken::TokenKind getTokenKind() const;
1402   bool parseExpr(int64_t &Imm);
1403   bool parseExpr(OperandVector &Operands);
1404   StringRef getTokenStr() const;
1405   AsmToken peekToken();
1406   AsmToken getToken() const;
1407   SMLoc getLoc() const;
1408   void lex();
1409 
1410 public:
1411   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1412   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1413 
1414   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1415   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1416   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1417   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1418   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1419   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1420 
1421   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1422                             const unsigned MinVal,
1423                             const unsigned MaxVal,
1424                             const StringRef ErrMsg);
1425   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1426   bool parseSwizzleOffset(int64_t &Imm);
1427   bool parseSwizzleMacro(int64_t &Imm);
1428   bool parseSwizzleQuadPerm(int64_t &Imm);
1429   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1430   bool parseSwizzleBroadcast(int64_t &Imm);
1431   bool parseSwizzleSwap(int64_t &Imm);
1432   bool parseSwizzleReverse(int64_t &Imm);
1433 
1434   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1435   int64_t parseGPRIdxMacro();
1436 
1437   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1438   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1439   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1440   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1441   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1442 
1443   AMDGPUOperand::Ptr defaultDLC() const;
1444   AMDGPUOperand::Ptr defaultGLC() const;
1445   AMDGPUOperand::Ptr defaultGLC_1() const;
1446   AMDGPUOperand::Ptr defaultSLC() const;
1447 
1448   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1449   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1450   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1451   AMDGPUOperand::Ptr defaultFlatOffset() const;
1452 
1453   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1454 
1455   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1456                OptionalImmIndexMap &OptionalIdx);
1457   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1458   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1459   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1460 
1461   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1462 
1463   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1464                bool IsAtomic = false);
1465   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1466   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1467 
1468   OperandMatchResultTy parseDim(OperandVector &Operands);
1469   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1470   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1471   AMDGPUOperand::Ptr defaultRowMask() const;
1472   AMDGPUOperand::Ptr defaultBankMask() const;
1473   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1474   AMDGPUOperand::Ptr defaultFI() const;
1475   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1476   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1477 
1478   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1479                                     AMDGPUOperand::ImmTy Type);
1480   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1481   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1482   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1483   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1484   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1485   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1486   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1487                uint64_t BasicInstType,
1488                bool SkipDstVcc = false,
1489                bool SkipSrcVcc = false);
1490 
1491   AMDGPUOperand::Ptr defaultBLGP() const;
1492   AMDGPUOperand::Ptr defaultCBSZ() const;
1493   AMDGPUOperand::Ptr defaultABID() const;
1494 
1495   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1496   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1497 };
1498 
1499 struct OptionalOperand {
1500   const char *Name;
1501   AMDGPUOperand::ImmTy Type;
1502   bool IsBit;
1503   bool (*ConvertResult)(int64_t&);
1504 };
1505 
1506 } // end anonymous namespace
1507 
1508 // May be called with integer type with equivalent bitwidth.
1509 static const fltSemantics *getFltSemantics(unsigned Size) {
1510   switch (Size) {
1511   case 4:
1512     return &APFloat::IEEEsingle();
1513   case 8:
1514     return &APFloat::IEEEdouble();
1515   case 2:
1516     return &APFloat::IEEEhalf();
1517   default:
1518     llvm_unreachable("unsupported fp type");
1519   }
1520 }
1521 
1522 static const fltSemantics *getFltSemantics(MVT VT) {
1523   return getFltSemantics(VT.getSizeInBits() / 8);
1524 }
1525 
1526 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1527   switch (OperandType) {
1528   case AMDGPU::OPERAND_REG_IMM_INT32:
1529   case AMDGPU::OPERAND_REG_IMM_FP32:
1530   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1531   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1532   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1533   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1534     return &APFloat::IEEEsingle();
1535   case AMDGPU::OPERAND_REG_IMM_INT64:
1536   case AMDGPU::OPERAND_REG_IMM_FP64:
1537   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1538   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1539     return &APFloat::IEEEdouble();
1540   case AMDGPU::OPERAND_REG_IMM_INT16:
1541   case AMDGPU::OPERAND_REG_IMM_FP16:
1542   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1543   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1544   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1545   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1546   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1547   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1548   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1549   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1550   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1551   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1552     return &APFloat::IEEEhalf();
1553   default:
1554     llvm_unreachable("unsupported fp type");
1555   }
1556 }
1557 
1558 //===----------------------------------------------------------------------===//
1559 // Operand
1560 //===----------------------------------------------------------------------===//
1561 
1562 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1563   bool Lost;
1564 
1565   // Convert literal to single precision
1566   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1567                                                APFloat::rmNearestTiesToEven,
1568                                                &Lost);
1569   // We allow precision lost but not overflow or underflow
1570   if (Status != APFloat::opOK &&
1571       Lost &&
1572       ((Status & APFloat::opOverflow)  != 0 ||
1573        (Status & APFloat::opUnderflow) != 0)) {
1574     return false;
1575   }
1576 
1577   return true;
1578 }
1579 
1580 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1581   return isUIntN(Size, Val) || isIntN(Size, Val);
1582 }
1583 
1584 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1585   if (VT.getScalarType() == MVT::i16) {
1586     // FP immediate values are broken.
1587     return isInlinableIntLiteral(Val);
1588   }
1589 
1590   // f16/v2f16 operands work correctly for all values.
1591   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1592 }
1593 
1594 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1595 
1596   // This is a hack to enable named inline values like
1597   // shared_base with both 32-bit and 64-bit operands.
1598   // Note that these values are defined as
1599   // 32-bit operands only.
1600   if (isInlineValue()) {
1601     return true;
1602   }
1603 
1604   if (!isImmTy(ImmTyNone)) {
1605     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1606     return false;
1607   }
1608   // TODO: We should avoid using host float here. It would be better to
1609   // check the float bit values which is what a few other places do.
1610   // We've had bot failures before due to weird NaN support on mips hosts.
1611 
1612   APInt Literal(64, Imm.Val);
1613 
1614   if (Imm.IsFPImm) { // We got fp literal token
1615     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1616       return AMDGPU::isInlinableLiteral64(Imm.Val,
1617                                           AsmParser->hasInv2PiInlineImm());
1618     }
1619 
1620     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1621     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1622       return false;
1623 
1624     if (type.getScalarSizeInBits() == 16) {
1625       return isInlineableLiteralOp16(
1626         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1627         type, AsmParser->hasInv2PiInlineImm());
1628     }
1629 
1630     // Check if single precision literal is inlinable
1631     return AMDGPU::isInlinableLiteral32(
1632       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1633       AsmParser->hasInv2PiInlineImm());
1634   }
1635 
1636   // We got int literal token.
1637   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1638     return AMDGPU::isInlinableLiteral64(Imm.Val,
1639                                         AsmParser->hasInv2PiInlineImm());
1640   }
1641 
1642   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1643     return false;
1644   }
1645 
1646   if (type.getScalarSizeInBits() == 16) {
1647     return isInlineableLiteralOp16(
1648       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1649       type, AsmParser->hasInv2PiInlineImm());
1650   }
1651 
1652   return AMDGPU::isInlinableLiteral32(
1653     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1654     AsmParser->hasInv2PiInlineImm());
1655 }
1656 
1657 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1658   // Check that this immediate can be added as literal
1659   if (!isImmTy(ImmTyNone)) {
1660     return false;
1661   }
1662 
1663   if (!Imm.IsFPImm) {
1664     // We got int literal token.
1665 
1666     if (type == MVT::f64 && hasFPModifiers()) {
1667       // Cannot apply fp modifiers to int literals preserving the same semantics
1668       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1669       // disable these cases.
1670       return false;
1671     }
1672 
1673     unsigned Size = type.getSizeInBits();
1674     if (Size == 64)
1675       Size = 32;
1676 
1677     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1678     // types.
1679     return isSafeTruncation(Imm.Val, Size);
1680   }
1681 
1682   // We got fp literal token
1683   if (type == MVT::f64) { // Expected 64-bit fp operand
1684     // We would set low 64-bits of literal to zeroes but we accept this literals
1685     return true;
1686   }
1687 
1688   if (type == MVT::i64) { // Expected 64-bit int operand
1689     // We don't allow fp literals in 64-bit integer instructions. It is
1690     // unclear how we should encode them.
1691     return false;
1692   }
1693 
1694   // We allow fp literals with f16x2 operands assuming that the specified
1695   // literal goes into the lower half and the upper half is zero. We also
1696   // require that the literal may be losslesly converted to f16.
1697   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1698                      (type == MVT::v2i16)? MVT::i16 : type;
1699 
1700   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1701   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1702 }
1703 
1704 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1705   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1706 }
1707 
1708 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1709   if (AsmParser->isVI())
1710     return isVReg32();
1711   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1712     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1713   else
1714     return false;
1715 }
1716 
1717 bool AMDGPUOperand::isSDWAFP16Operand() const {
1718   return isSDWAOperand(MVT::f16);
1719 }
1720 
1721 bool AMDGPUOperand::isSDWAFP32Operand() const {
1722   return isSDWAOperand(MVT::f32);
1723 }
1724 
1725 bool AMDGPUOperand::isSDWAInt16Operand() const {
1726   return isSDWAOperand(MVT::i16);
1727 }
1728 
1729 bool AMDGPUOperand::isSDWAInt32Operand() const {
1730   return isSDWAOperand(MVT::i32);
1731 }
1732 
1733 bool AMDGPUOperand::isBoolReg() const {
1734   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1735          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1736 }
1737 
1738 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1739 {
1740   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1741   assert(Size == 2 || Size == 4 || Size == 8);
1742 
1743   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1744 
1745   if (Imm.Mods.Abs) {
1746     Val &= ~FpSignMask;
1747   }
1748   if (Imm.Mods.Neg) {
1749     Val ^= FpSignMask;
1750   }
1751 
1752   return Val;
1753 }
1754 
1755 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1756   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1757                              Inst.getNumOperands())) {
1758     addLiteralImmOperand(Inst, Imm.Val,
1759                          ApplyModifiers &
1760                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1761   } else {
1762     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1763     Inst.addOperand(MCOperand::createImm(Imm.Val));
1764   }
1765 }
1766 
1767 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1768   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1769   auto OpNum = Inst.getNumOperands();
1770   // Check that this operand accepts literals
1771   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1772 
1773   if (ApplyModifiers) {
1774     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1775     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1776     Val = applyInputFPModifiers(Val, Size);
1777   }
1778 
1779   APInt Literal(64, Val);
1780   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1781 
1782   if (Imm.IsFPImm) { // We got fp literal token
1783     switch (OpTy) {
1784     case AMDGPU::OPERAND_REG_IMM_INT64:
1785     case AMDGPU::OPERAND_REG_IMM_FP64:
1786     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1787     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1788       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1789                                        AsmParser->hasInv2PiInlineImm())) {
1790         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1791         return;
1792       }
1793 
1794       // Non-inlineable
1795       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1796         // For fp operands we check if low 32 bits are zeros
1797         if (Literal.getLoBits(32) != 0) {
1798           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1799           "Can't encode literal as exact 64-bit floating-point operand. "
1800           "Low 32-bits will be set to zero");
1801         }
1802 
1803         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1804         return;
1805       }
1806 
1807       // We don't allow fp literals in 64-bit integer instructions. It is
1808       // unclear how we should encode them. This case should be checked earlier
1809       // in predicate methods (isLiteralImm())
1810       llvm_unreachable("fp literal in 64-bit integer instruction.");
1811 
1812     case AMDGPU::OPERAND_REG_IMM_INT32:
1813     case AMDGPU::OPERAND_REG_IMM_FP32:
1814     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1815     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1816     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1817     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1818     case AMDGPU::OPERAND_REG_IMM_INT16:
1819     case AMDGPU::OPERAND_REG_IMM_FP16:
1820     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1821     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1822     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1823     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1824     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1825     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1826     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1827     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1828     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1829     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1830       bool lost;
1831       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1832       // Convert literal to single precision
1833       FPLiteral.convert(*getOpFltSemantics(OpTy),
1834                         APFloat::rmNearestTiesToEven, &lost);
1835       // We allow precision lost but not overflow or underflow. This should be
1836       // checked earlier in isLiteralImm()
1837 
1838       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1839       Inst.addOperand(MCOperand::createImm(ImmVal));
1840       return;
1841     }
1842     default:
1843       llvm_unreachable("invalid operand size");
1844     }
1845 
1846     return;
1847   }
1848 
1849   // We got int literal token.
1850   // Only sign extend inline immediates.
1851   switch (OpTy) {
1852   case AMDGPU::OPERAND_REG_IMM_INT32:
1853   case AMDGPU::OPERAND_REG_IMM_FP32:
1854   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1855   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1856   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1857   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1858   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1859   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1860     if (isSafeTruncation(Val, 32) &&
1861         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1862                                      AsmParser->hasInv2PiInlineImm())) {
1863       Inst.addOperand(MCOperand::createImm(Val));
1864       return;
1865     }
1866 
1867     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1868     return;
1869 
1870   case AMDGPU::OPERAND_REG_IMM_INT64:
1871   case AMDGPU::OPERAND_REG_IMM_FP64:
1872   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1873   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1874     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1875       Inst.addOperand(MCOperand::createImm(Val));
1876       return;
1877     }
1878 
1879     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1880     return;
1881 
1882   case AMDGPU::OPERAND_REG_IMM_INT16:
1883   case AMDGPU::OPERAND_REG_IMM_FP16:
1884   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1885   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1886   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1887   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1888     if (isSafeTruncation(Val, 16) &&
1889         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1890                                      AsmParser->hasInv2PiInlineImm())) {
1891       Inst.addOperand(MCOperand::createImm(Val));
1892       return;
1893     }
1894 
1895     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1896     return;
1897 
1898   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1899   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1900   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1901   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1902     assert(isSafeTruncation(Val, 16));
1903     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1904                                         AsmParser->hasInv2PiInlineImm()));
1905 
1906     Inst.addOperand(MCOperand::createImm(Val));
1907     return;
1908   }
1909   default:
1910     llvm_unreachable("invalid operand size");
1911   }
1912 }
1913 
1914 template <unsigned Bitwidth>
1915 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1916   APInt Literal(64, Imm.Val);
1917 
1918   if (!Imm.IsFPImm) {
1919     // We got int literal token.
1920     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1921     return;
1922   }
1923 
1924   bool Lost;
1925   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1926   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1927                     APFloat::rmNearestTiesToEven, &Lost);
1928   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1929 }
1930 
1931 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1932   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1933 }
1934 
1935 static bool isInlineValue(unsigned Reg) {
1936   switch (Reg) {
1937   case AMDGPU::SRC_SHARED_BASE:
1938   case AMDGPU::SRC_SHARED_LIMIT:
1939   case AMDGPU::SRC_PRIVATE_BASE:
1940   case AMDGPU::SRC_PRIVATE_LIMIT:
1941   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1942     return true;
1943   case AMDGPU::SRC_VCCZ:
1944   case AMDGPU::SRC_EXECZ:
1945   case AMDGPU::SRC_SCC:
1946     return true;
1947   case AMDGPU::SGPR_NULL:
1948     return true;
1949   default:
1950     return false;
1951   }
1952 }
1953 
1954 bool AMDGPUOperand::isInlineValue() const {
1955   return isRegKind() && ::isInlineValue(getReg());
1956 }
1957 
1958 //===----------------------------------------------------------------------===//
1959 // AsmParser
1960 //===----------------------------------------------------------------------===//
1961 
1962 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1963   if (Is == IS_VGPR) {
1964     switch (RegWidth) {
1965       default: return -1;
1966       case 1: return AMDGPU::VGPR_32RegClassID;
1967       case 2: return AMDGPU::VReg_64RegClassID;
1968       case 3: return AMDGPU::VReg_96RegClassID;
1969       case 4: return AMDGPU::VReg_128RegClassID;
1970       case 5: return AMDGPU::VReg_160RegClassID;
1971       case 6: return AMDGPU::VReg_192RegClassID;
1972       case 8: return AMDGPU::VReg_256RegClassID;
1973       case 16: return AMDGPU::VReg_512RegClassID;
1974       case 32: return AMDGPU::VReg_1024RegClassID;
1975     }
1976   } else if (Is == IS_TTMP) {
1977     switch (RegWidth) {
1978       default: return -1;
1979       case 1: return AMDGPU::TTMP_32RegClassID;
1980       case 2: return AMDGPU::TTMP_64RegClassID;
1981       case 4: return AMDGPU::TTMP_128RegClassID;
1982       case 8: return AMDGPU::TTMP_256RegClassID;
1983       case 16: return AMDGPU::TTMP_512RegClassID;
1984     }
1985   } else if (Is == IS_SGPR) {
1986     switch (RegWidth) {
1987       default: return -1;
1988       case 1: return AMDGPU::SGPR_32RegClassID;
1989       case 2: return AMDGPU::SGPR_64RegClassID;
1990       case 3: return AMDGPU::SGPR_96RegClassID;
1991       case 4: return AMDGPU::SGPR_128RegClassID;
1992       case 5: return AMDGPU::SGPR_160RegClassID;
1993       case 6: return AMDGPU::SGPR_192RegClassID;
1994       case 8: return AMDGPU::SGPR_256RegClassID;
1995       case 16: return AMDGPU::SGPR_512RegClassID;
1996     }
1997   } else if (Is == IS_AGPR) {
1998     switch (RegWidth) {
1999       default: return -1;
2000       case 1: return AMDGPU::AGPR_32RegClassID;
2001       case 2: return AMDGPU::AReg_64RegClassID;
2002       case 3: return AMDGPU::AReg_96RegClassID;
2003       case 4: return AMDGPU::AReg_128RegClassID;
2004       case 5: return AMDGPU::AReg_160RegClassID;
2005       case 6: return AMDGPU::AReg_192RegClassID;
2006       case 8: return AMDGPU::AReg_256RegClassID;
2007       case 16: return AMDGPU::AReg_512RegClassID;
2008       case 32: return AMDGPU::AReg_1024RegClassID;
2009     }
2010   }
2011   return -1;
2012 }
2013 
2014 static unsigned getSpecialRegForName(StringRef RegName) {
2015   return StringSwitch<unsigned>(RegName)
2016     .Case("exec", AMDGPU::EXEC)
2017     .Case("vcc", AMDGPU::VCC)
2018     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2019     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2020     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2021     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2022     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2023     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2024     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2025     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2026     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2027     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2028     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2029     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2030     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2031     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2032     .Case("m0", AMDGPU::M0)
2033     .Case("vccz", AMDGPU::SRC_VCCZ)
2034     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2035     .Case("execz", AMDGPU::SRC_EXECZ)
2036     .Case("src_execz", AMDGPU::SRC_EXECZ)
2037     .Case("scc", AMDGPU::SRC_SCC)
2038     .Case("src_scc", AMDGPU::SRC_SCC)
2039     .Case("tba", AMDGPU::TBA)
2040     .Case("tma", AMDGPU::TMA)
2041     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2042     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2043     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2044     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2045     .Case("vcc_lo", AMDGPU::VCC_LO)
2046     .Case("vcc_hi", AMDGPU::VCC_HI)
2047     .Case("exec_lo", AMDGPU::EXEC_LO)
2048     .Case("exec_hi", AMDGPU::EXEC_HI)
2049     .Case("tma_lo", AMDGPU::TMA_LO)
2050     .Case("tma_hi", AMDGPU::TMA_HI)
2051     .Case("tba_lo", AMDGPU::TBA_LO)
2052     .Case("tba_hi", AMDGPU::TBA_HI)
2053     .Case("pc", AMDGPU::PC_REG)
2054     .Case("null", AMDGPU::SGPR_NULL)
2055     .Default(AMDGPU::NoRegister);
2056 }
2057 
2058 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2059                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2060   auto R = parseRegister();
2061   if (!R) return true;
2062   assert(R->isReg());
2063   RegNo = R->getReg();
2064   StartLoc = R->getStartLoc();
2065   EndLoc = R->getEndLoc();
2066   return false;
2067 }
2068 
2069 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2070                                     SMLoc &EndLoc) {
2071   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2072 }
2073 
2074 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2075                                                        SMLoc &StartLoc,
2076                                                        SMLoc &EndLoc) {
2077   bool Result =
2078       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2079   bool PendingErrors = getParser().hasPendingError();
2080   getParser().clearPendingErrors();
2081   if (PendingErrors)
2082     return MatchOperand_ParseFail;
2083   if (Result)
2084     return MatchOperand_NoMatch;
2085   return MatchOperand_Success;
2086 }
2087 
2088 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2089                                             RegisterKind RegKind, unsigned Reg1,
2090                                             SMLoc Loc) {
2091   switch (RegKind) {
2092   case IS_SPECIAL:
2093     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2094       Reg = AMDGPU::EXEC;
2095       RegWidth = 2;
2096       return true;
2097     }
2098     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2099       Reg = AMDGPU::FLAT_SCR;
2100       RegWidth = 2;
2101       return true;
2102     }
2103     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2104       Reg = AMDGPU::XNACK_MASK;
2105       RegWidth = 2;
2106       return true;
2107     }
2108     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2109       Reg = AMDGPU::VCC;
2110       RegWidth = 2;
2111       return true;
2112     }
2113     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2114       Reg = AMDGPU::TBA;
2115       RegWidth = 2;
2116       return true;
2117     }
2118     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2119       Reg = AMDGPU::TMA;
2120       RegWidth = 2;
2121       return true;
2122     }
2123     Error(Loc, "register does not fit in the list");
2124     return false;
2125   case IS_VGPR:
2126   case IS_SGPR:
2127   case IS_AGPR:
2128   case IS_TTMP:
2129     if (Reg1 != Reg + RegWidth) {
2130       Error(Loc, "registers in a list must have consecutive indices");
2131       return false;
2132     }
2133     RegWidth++;
2134     return true;
2135   default:
2136     llvm_unreachable("unexpected register kind");
2137   }
2138 }
2139 
2140 struct RegInfo {
2141   StringLiteral Name;
2142   RegisterKind Kind;
2143 };
2144 
2145 static constexpr RegInfo RegularRegisters[] = {
2146   {{"v"},    IS_VGPR},
2147   {{"s"},    IS_SGPR},
2148   {{"ttmp"}, IS_TTMP},
2149   {{"acc"},  IS_AGPR},
2150   {{"a"},    IS_AGPR},
2151 };
2152 
2153 static bool isRegularReg(RegisterKind Kind) {
2154   return Kind == IS_VGPR ||
2155          Kind == IS_SGPR ||
2156          Kind == IS_TTMP ||
2157          Kind == IS_AGPR;
2158 }
2159 
2160 static const RegInfo* getRegularRegInfo(StringRef Str) {
2161   for (const RegInfo &Reg : RegularRegisters)
2162     if (Str.startswith(Reg.Name))
2163       return &Reg;
2164   return nullptr;
2165 }
2166 
2167 static bool getRegNum(StringRef Str, unsigned& Num) {
2168   return !Str.getAsInteger(10, Num);
2169 }
2170 
2171 bool
2172 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2173                             const AsmToken &NextToken) const {
2174 
2175   // A list of consecutive registers: [s0,s1,s2,s3]
2176   if (Token.is(AsmToken::LBrac))
2177     return true;
2178 
2179   if (!Token.is(AsmToken::Identifier))
2180     return false;
2181 
2182   // A single register like s0 or a range of registers like s[0:1]
2183 
2184   StringRef Str = Token.getString();
2185   const RegInfo *Reg = getRegularRegInfo(Str);
2186   if (Reg) {
2187     StringRef RegName = Reg->Name;
2188     StringRef RegSuffix = Str.substr(RegName.size());
2189     if (!RegSuffix.empty()) {
2190       unsigned Num;
2191       // A single register with an index: rXX
2192       if (getRegNum(RegSuffix, Num))
2193         return true;
2194     } else {
2195       // A range of registers: r[XX:YY].
2196       if (NextToken.is(AsmToken::LBrac))
2197         return true;
2198     }
2199   }
2200 
2201   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2202 }
2203 
2204 bool
2205 AMDGPUAsmParser::isRegister()
2206 {
2207   return isRegister(getToken(), peekToken());
2208 }
2209 
2210 unsigned
2211 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2212                                unsigned RegNum,
2213                                unsigned RegWidth,
2214                                SMLoc Loc) {
2215 
2216   assert(isRegularReg(RegKind));
2217 
2218   unsigned AlignSize = 1;
2219   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2220     // SGPR and TTMP registers must be aligned.
2221     // Max required alignment is 4 dwords.
2222     AlignSize = std::min(RegWidth, 4u);
2223   }
2224 
2225   if (RegNum % AlignSize != 0) {
2226     Error(Loc, "invalid register alignment");
2227     return AMDGPU::NoRegister;
2228   }
2229 
2230   unsigned RegIdx = RegNum / AlignSize;
2231   int RCID = getRegClass(RegKind, RegWidth);
2232   if (RCID == -1) {
2233     Error(Loc, "invalid or unsupported register size");
2234     return AMDGPU::NoRegister;
2235   }
2236 
2237   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2238   const MCRegisterClass RC = TRI->getRegClass(RCID);
2239   if (RegIdx >= RC.getNumRegs()) {
2240     Error(Loc, "register index is out of range");
2241     return AMDGPU::NoRegister;
2242   }
2243 
2244   return RC.getRegister(RegIdx);
2245 }
2246 
2247 bool
2248 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2249   int64_t RegLo, RegHi;
2250   if (!skipToken(AsmToken::LBrac, "missing register index"))
2251     return false;
2252 
2253   SMLoc FirstIdxLoc = getLoc();
2254   SMLoc SecondIdxLoc;
2255 
2256   if (!parseExpr(RegLo))
2257     return false;
2258 
2259   if (trySkipToken(AsmToken::Colon)) {
2260     SecondIdxLoc = getLoc();
2261     if (!parseExpr(RegHi))
2262       return false;
2263   } else {
2264     RegHi = RegLo;
2265   }
2266 
2267   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2268     return false;
2269 
2270   if (!isUInt<32>(RegLo)) {
2271     Error(FirstIdxLoc, "invalid register index");
2272     return false;
2273   }
2274 
2275   if (!isUInt<32>(RegHi)) {
2276     Error(SecondIdxLoc, "invalid register index");
2277     return false;
2278   }
2279 
2280   if (RegLo > RegHi) {
2281     Error(FirstIdxLoc, "first register index should not exceed second index");
2282     return false;
2283   }
2284 
2285   Num = static_cast<unsigned>(RegLo);
2286   Width = (RegHi - RegLo) + 1;
2287   return true;
2288 }
2289 
2290 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2291                                           unsigned &RegNum, unsigned &RegWidth,
2292                                           SmallVectorImpl<AsmToken> &Tokens) {
2293   assert(isToken(AsmToken::Identifier));
2294   unsigned Reg = getSpecialRegForName(getTokenStr());
2295   if (Reg) {
2296     RegNum = 0;
2297     RegWidth = 1;
2298     RegKind = IS_SPECIAL;
2299     Tokens.push_back(getToken());
2300     lex(); // skip register name
2301   }
2302   return Reg;
2303 }
2304 
2305 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2306                                           unsigned &RegNum, unsigned &RegWidth,
2307                                           SmallVectorImpl<AsmToken> &Tokens) {
2308   assert(isToken(AsmToken::Identifier));
2309   StringRef RegName = getTokenStr();
2310   auto Loc = getLoc();
2311 
2312   const RegInfo *RI = getRegularRegInfo(RegName);
2313   if (!RI) {
2314     Error(Loc, "invalid register name");
2315     return AMDGPU::NoRegister;
2316   }
2317 
2318   Tokens.push_back(getToken());
2319   lex(); // skip register name
2320 
2321   RegKind = RI->Kind;
2322   StringRef RegSuffix = RegName.substr(RI->Name.size());
2323   if (!RegSuffix.empty()) {
2324     // Single 32-bit register: vXX.
2325     if (!getRegNum(RegSuffix, RegNum)) {
2326       Error(Loc, "invalid register index");
2327       return AMDGPU::NoRegister;
2328     }
2329     RegWidth = 1;
2330   } else {
2331     // Range of registers: v[XX:YY]. ":YY" is optional.
2332     if (!ParseRegRange(RegNum, RegWidth))
2333       return AMDGPU::NoRegister;
2334   }
2335 
2336   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2337 }
2338 
2339 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2340                                        unsigned &RegWidth,
2341                                        SmallVectorImpl<AsmToken> &Tokens) {
2342   unsigned Reg = AMDGPU::NoRegister;
2343   auto ListLoc = getLoc();
2344 
2345   if (!skipToken(AsmToken::LBrac,
2346                  "expected a register or a list of registers")) {
2347     return AMDGPU::NoRegister;
2348   }
2349 
2350   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2351 
2352   auto Loc = getLoc();
2353   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2354     return AMDGPU::NoRegister;
2355   if (RegWidth != 1) {
2356     Error(Loc, "expected a single 32-bit register");
2357     return AMDGPU::NoRegister;
2358   }
2359 
2360   for (; trySkipToken(AsmToken::Comma); ) {
2361     RegisterKind NextRegKind;
2362     unsigned NextReg, NextRegNum, NextRegWidth;
2363     Loc = getLoc();
2364 
2365     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2366                              NextRegNum, NextRegWidth,
2367                              Tokens)) {
2368       return AMDGPU::NoRegister;
2369     }
2370     if (NextRegWidth != 1) {
2371       Error(Loc, "expected a single 32-bit register");
2372       return AMDGPU::NoRegister;
2373     }
2374     if (NextRegKind != RegKind) {
2375       Error(Loc, "registers in a list must be of the same kind");
2376       return AMDGPU::NoRegister;
2377     }
2378     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2379       return AMDGPU::NoRegister;
2380   }
2381 
2382   if (!skipToken(AsmToken::RBrac,
2383                  "expected a comma or a closing square bracket")) {
2384     return AMDGPU::NoRegister;
2385   }
2386 
2387   if (isRegularReg(RegKind))
2388     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2389 
2390   return Reg;
2391 }
2392 
2393 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2394                                           unsigned &RegNum, unsigned &RegWidth,
2395                                           SmallVectorImpl<AsmToken> &Tokens) {
2396   auto Loc = getLoc();
2397   Reg = AMDGPU::NoRegister;
2398 
2399   if (isToken(AsmToken::Identifier)) {
2400     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2401     if (Reg == AMDGPU::NoRegister)
2402       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2403   } else {
2404     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2405   }
2406 
2407   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2408   if (Reg == AMDGPU::NoRegister) {
2409     assert(Parser.hasPendingError());
2410     return false;
2411   }
2412 
2413   if (!subtargetHasRegister(*TRI, Reg)) {
2414     if (Reg == AMDGPU::SGPR_NULL) {
2415       Error(Loc, "'null' operand is not supported on this GPU");
2416     } else {
2417       Error(Loc, "register not available on this GPU");
2418     }
2419     return false;
2420   }
2421 
2422   return true;
2423 }
2424 
2425 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2426                                           unsigned &RegNum, unsigned &RegWidth,
2427                                           bool RestoreOnFailure /*=false*/) {
2428   Reg = AMDGPU::NoRegister;
2429 
2430   SmallVector<AsmToken, 1> Tokens;
2431   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2432     if (RestoreOnFailure) {
2433       while (!Tokens.empty()) {
2434         getLexer().UnLex(Tokens.pop_back_val());
2435       }
2436     }
2437     return true;
2438   }
2439   return false;
2440 }
2441 
2442 Optional<StringRef>
2443 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2444   switch (RegKind) {
2445   case IS_VGPR:
2446     return StringRef(".amdgcn.next_free_vgpr");
2447   case IS_SGPR:
2448     return StringRef(".amdgcn.next_free_sgpr");
2449   default:
2450     return None;
2451   }
2452 }
2453 
2454 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2455   auto SymbolName = getGprCountSymbolName(RegKind);
2456   assert(SymbolName && "initializing invalid register kind");
2457   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2458   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2459 }
2460 
2461 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2462                                             unsigned DwordRegIndex,
2463                                             unsigned RegWidth) {
2464   // Symbols are only defined for GCN targets
2465   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2466     return true;
2467 
2468   auto SymbolName = getGprCountSymbolName(RegKind);
2469   if (!SymbolName)
2470     return true;
2471   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2472 
2473   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2474   int64_t OldCount;
2475 
2476   if (!Sym->isVariable())
2477     return !Error(getParser().getTok().getLoc(),
2478                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2479   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2480     return !Error(
2481         getParser().getTok().getLoc(),
2482         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2483 
2484   if (OldCount <= NewMax)
2485     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2486 
2487   return true;
2488 }
2489 
2490 std::unique_ptr<AMDGPUOperand>
2491 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2492   const auto &Tok = Parser.getTok();
2493   SMLoc StartLoc = Tok.getLoc();
2494   SMLoc EndLoc = Tok.getEndLoc();
2495   RegisterKind RegKind;
2496   unsigned Reg, RegNum, RegWidth;
2497 
2498   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2499     return nullptr;
2500   }
2501   if (isHsaAbiVersion3(&getSTI())) {
2502     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2503       return nullptr;
2504   } else
2505     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2506   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2507 }
2508 
2509 OperandMatchResultTy
2510 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2511   // TODO: add syntactic sugar for 1/(2*PI)
2512 
2513   assert(!isRegister());
2514   assert(!isModifier());
2515 
2516   const auto& Tok = getToken();
2517   const auto& NextTok = peekToken();
2518   bool IsReal = Tok.is(AsmToken::Real);
2519   SMLoc S = getLoc();
2520   bool Negate = false;
2521 
2522   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2523     lex();
2524     IsReal = true;
2525     Negate = true;
2526   }
2527 
2528   if (IsReal) {
2529     // Floating-point expressions are not supported.
2530     // Can only allow floating-point literals with an
2531     // optional sign.
2532 
2533     StringRef Num = getTokenStr();
2534     lex();
2535 
2536     APFloat RealVal(APFloat::IEEEdouble());
2537     auto roundMode = APFloat::rmNearestTiesToEven;
2538     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2539       return MatchOperand_ParseFail;
2540     }
2541     if (Negate)
2542       RealVal.changeSign();
2543 
2544     Operands.push_back(
2545       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2546                                AMDGPUOperand::ImmTyNone, true));
2547 
2548     return MatchOperand_Success;
2549 
2550   } else {
2551     int64_t IntVal;
2552     const MCExpr *Expr;
2553     SMLoc S = getLoc();
2554 
2555     if (HasSP3AbsModifier) {
2556       // This is a workaround for handling expressions
2557       // as arguments of SP3 'abs' modifier, for example:
2558       //     |1.0|
2559       //     |-1|
2560       //     |1+x|
2561       // This syntax is not compatible with syntax of standard
2562       // MC expressions (due to the trailing '|').
2563       SMLoc EndLoc;
2564       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2565         return MatchOperand_ParseFail;
2566     } else {
2567       if (Parser.parseExpression(Expr))
2568         return MatchOperand_ParseFail;
2569     }
2570 
2571     if (Expr->evaluateAsAbsolute(IntVal)) {
2572       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2573     } else {
2574       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2575     }
2576 
2577     return MatchOperand_Success;
2578   }
2579 
2580   return MatchOperand_NoMatch;
2581 }
2582 
2583 OperandMatchResultTy
2584 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2585   if (!isRegister())
2586     return MatchOperand_NoMatch;
2587 
2588   if (auto R = parseRegister()) {
2589     assert(R->isReg());
2590     Operands.push_back(std::move(R));
2591     return MatchOperand_Success;
2592   }
2593   return MatchOperand_ParseFail;
2594 }
2595 
2596 OperandMatchResultTy
2597 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2598   auto res = parseReg(Operands);
2599   if (res != MatchOperand_NoMatch) {
2600     return res;
2601   } else if (isModifier()) {
2602     return MatchOperand_NoMatch;
2603   } else {
2604     return parseImm(Operands, HasSP3AbsMod);
2605   }
2606 }
2607 
2608 bool
2609 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2610   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2611     const auto &str = Token.getString();
2612     return str == "abs" || str == "neg" || str == "sext";
2613   }
2614   return false;
2615 }
2616 
2617 bool
2618 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2619   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2620 }
2621 
2622 bool
2623 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2624   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2625 }
2626 
2627 bool
2628 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2629   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2630 }
2631 
2632 // Check if this is an operand modifier or an opcode modifier
2633 // which may look like an expression but it is not. We should
2634 // avoid parsing these modifiers as expressions. Currently
2635 // recognized sequences are:
2636 //   |...|
2637 //   abs(...)
2638 //   neg(...)
2639 //   sext(...)
2640 //   -reg
2641 //   -|...|
2642 //   -abs(...)
2643 //   name:...
2644 // Note that simple opcode modifiers like 'gds' may be parsed as
2645 // expressions; this is a special case. See getExpressionAsToken.
2646 //
2647 bool
2648 AMDGPUAsmParser::isModifier() {
2649 
2650   AsmToken Tok = getToken();
2651   AsmToken NextToken[2];
2652   peekTokens(NextToken);
2653 
2654   return isOperandModifier(Tok, NextToken[0]) ||
2655          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2656          isOpcodeModifierWithVal(Tok, NextToken[0]);
2657 }
2658 
2659 // Check if the current token is an SP3 'neg' modifier.
2660 // Currently this modifier is allowed in the following context:
2661 //
2662 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2663 // 2. Before an 'abs' modifier: -abs(...)
2664 // 3. Before an SP3 'abs' modifier: -|...|
2665 //
2666 // In all other cases "-" is handled as a part
2667 // of an expression that follows the sign.
2668 //
2669 // Note: When "-" is followed by an integer literal,
2670 // this is interpreted as integer negation rather
2671 // than a floating-point NEG modifier applied to N.
2672 // Beside being contr-intuitive, such use of floating-point
2673 // NEG modifier would have resulted in different meaning
2674 // of integer literals used with VOP1/2/C and VOP3,
2675 // for example:
2676 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2677 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2678 // Negative fp literals with preceding "-" are
2679 // handled likewise for unifomtity
2680 //
2681 bool
2682 AMDGPUAsmParser::parseSP3NegModifier() {
2683 
2684   AsmToken NextToken[2];
2685   peekTokens(NextToken);
2686 
2687   if (isToken(AsmToken::Minus) &&
2688       (isRegister(NextToken[0], NextToken[1]) ||
2689        NextToken[0].is(AsmToken::Pipe) ||
2690        isId(NextToken[0], "abs"))) {
2691     lex();
2692     return true;
2693   }
2694 
2695   return false;
2696 }
2697 
2698 OperandMatchResultTy
2699 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2700                                               bool AllowImm) {
2701   bool Neg, SP3Neg;
2702   bool Abs, SP3Abs;
2703   SMLoc Loc;
2704 
2705   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2706   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2707     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2708     return MatchOperand_ParseFail;
2709   }
2710 
2711   SP3Neg = parseSP3NegModifier();
2712 
2713   Loc = getLoc();
2714   Neg = trySkipId("neg");
2715   if (Neg && SP3Neg) {
2716     Error(Loc, "expected register or immediate");
2717     return MatchOperand_ParseFail;
2718   }
2719   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2720     return MatchOperand_ParseFail;
2721 
2722   Abs = trySkipId("abs");
2723   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2724     return MatchOperand_ParseFail;
2725 
2726   Loc = getLoc();
2727   SP3Abs = trySkipToken(AsmToken::Pipe);
2728   if (Abs && SP3Abs) {
2729     Error(Loc, "expected register or immediate");
2730     return MatchOperand_ParseFail;
2731   }
2732 
2733   OperandMatchResultTy Res;
2734   if (AllowImm) {
2735     Res = parseRegOrImm(Operands, SP3Abs);
2736   } else {
2737     Res = parseReg(Operands);
2738   }
2739   if (Res != MatchOperand_Success) {
2740     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2741   }
2742 
2743   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2744     return MatchOperand_ParseFail;
2745   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2746     return MatchOperand_ParseFail;
2747   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2748     return MatchOperand_ParseFail;
2749 
2750   AMDGPUOperand::Modifiers Mods;
2751   Mods.Abs = Abs || SP3Abs;
2752   Mods.Neg = Neg || SP3Neg;
2753 
2754   if (Mods.hasFPModifiers()) {
2755     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2756     if (Op.isExpr()) {
2757       Error(Op.getStartLoc(), "expected an absolute expression");
2758       return MatchOperand_ParseFail;
2759     }
2760     Op.setModifiers(Mods);
2761   }
2762   return MatchOperand_Success;
2763 }
2764 
2765 OperandMatchResultTy
2766 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2767                                                bool AllowImm) {
2768   bool Sext = trySkipId("sext");
2769   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2770     return MatchOperand_ParseFail;
2771 
2772   OperandMatchResultTy Res;
2773   if (AllowImm) {
2774     Res = parseRegOrImm(Operands);
2775   } else {
2776     Res = parseReg(Operands);
2777   }
2778   if (Res != MatchOperand_Success) {
2779     return Sext? MatchOperand_ParseFail : Res;
2780   }
2781 
2782   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2783     return MatchOperand_ParseFail;
2784 
2785   AMDGPUOperand::Modifiers Mods;
2786   Mods.Sext = Sext;
2787 
2788   if (Mods.hasIntModifiers()) {
2789     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2790     if (Op.isExpr()) {
2791       Error(Op.getStartLoc(), "expected an absolute expression");
2792       return MatchOperand_ParseFail;
2793     }
2794     Op.setModifiers(Mods);
2795   }
2796 
2797   return MatchOperand_Success;
2798 }
2799 
2800 OperandMatchResultTy
2801 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2802   return parseRegOrImmWithFPInputMods(Operands, false);
2803 }
2804 
2805 OperandMatchResultTy
2806 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2807   return parseRegOrImmWithIntInputMods(Operands, false);
2808 }
2809 
2810 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2811   auto Loc = getLoc();
2812   if (trySkipId("off")) {
2813     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2814                                                 AMDGPUOperand::ImmTyOff, false));
2815     return MatchOperand_Success;
2816   }
2817 
2818   if (!isRegister())
2819     return MatchOperand_NoMatch;
2820 
2821   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2822   if (Reg) {
2823     Operands.push_back(std::move(Reg));
2824     return MatchOperand_Success;
2825   }
2826 
2827   return MatchOperand_ParseFail;
2828 
2829 }
2830 
2831 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2832   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2833 
2834   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2835       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2836       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2837       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2838     return Match_InvalidOperand;
2839 
2840   if ((TSFlags & SIInstrFlags::VOP3) &&
2841       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2842       getForcedEncodingSize() != 64)
2843     return Match_PreferE32;
2844 
2845   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2846       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2847     // v_mac_f32/16 allow only dst_sel == DWORD;
2848     auto OpNum =
2849         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2850     const auto &Op = Inst.getOperand(OpNum);
2851     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2852       return Match_InvalidOperand;
2853     }
2854   }
2855 
2856   return Match_Success;
2857 }
2858 
2859 static ArrayRef<unsigned> getAllVariants() {
2860   static const unsigned Variants[] = {
2861     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2862     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2863   };
2864 
2865   return makeArrayRef(Variants);
2866 }
2867 
2868 // What asm variants we should check
2869 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2870   if (getForcedEncodingSize() == 32) {
2871     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2872     return makeArrayRef(Variants);
2873   }
2874 
2875   if (isForcedVOP3()) {
2876     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2877     return makeArrayRef(Variants);
2878   }
2879 
2880   if (isForcedSDWA()) {
2881     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2882                                         AMDGPUAsmVariants::SDWA9};
2883     return makeArrayRef(Variants);
2884   }
2885 
2886   if (isForcedDPP()) {
2887     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2888     return makeArrayRef(Variants);
2889   }
2890 
2891   return getAllVariants();
2892 }
2893 
2894 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
2895   if (getForcedEncodingSize() == 32)
2896     return "e32";
2897 
2898   if (isForcedVOP3())
2899     return "e64";
2900 
2901   if (isForcedSDWA())
2902     return "sdwa";
2903 
2904   if (isForcedDPP())
2905     return "dpp";
2906 
2907   return "";
2908 }
2909 
2910 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2911   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2912   const unsigned Num = Desc.getNumImplicitUses();
2913   for (unsigned i = 0; i < Num; ++i) {
2914     unsigned Reg = Desc.ImplicitUses[i];
2915     switch (Reg) {
2916     case AMDGPU::FLAT_SCR:
2917     case AMDGPU::VCC:
2918     case AMDGPU::VCC_LO:
2919     case AMDGPU::VCC_HI:
2920     case AMDGPU::M0:
2921       return Reg;
2922     default:
2923       break;
2924     }
2925   }
2926   return AMDGPU::NoRegister;
2927 }
2928 
2929 // NB: This code is correct only when used to check constant
2930 // bus limitations because GFX7 support no f16 inline constants.
2931 // Note that there are no cases when a GFX7 opcode violates
2932 // constant bus limitations due to the use of an f16 constant.
2933 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2934                                        unsigned OpIdx) const {
2935   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2936 
2937   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2938     return false;
2939   }
2940 
2941   const MCOperand &MO = Inst.getOperand(OpIdx);
2942 
2943   int64_t Val = MO.getImm();
2944   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2945 
2946   switch (OpSize) { // expected operand size
2947   case 8:
2948     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2949   case 4:
2950     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2951   case 2: {
2952     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2953     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
2954         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
2955         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
2956       return AMDGPU::isInlinableIntLiteral(Val);
2957 
2958     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2959         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2960         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
2961       return AMDGPU::isInlinableIntLiteralV216(Val);
2962 
2963     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2964         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2965         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
2966       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2967 
2968     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2969   }
2970   default:
2971     llvm_unreachable("invalid operand size");
2972   }
2973 }
2974 
2975 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2976   if (!isGFX10())
2977     return 1;
2978 
2979   switch (Opcode) {
2980   // 64-bit shift instructions can use only one scalar value input
2981   case AMDGPU::V_LSHLREV_B64:
2982   case AMDGPU::V_LSHLREV_B64_gfx10:
2983   case AMDGPU::V_LSHL_B64:
2984   case AMDGPU::V_LSHRREV_B64:
2985   case AMDGPU::V_LSHRREV_B64_gfx10:
2986   case AMDGPU::V_LSHR_B64:
2987   case AMDGPU::V_ASHRREV_I64:
2988   case AMDGPU::V_ASHRREV_I64_gfx10:
2989   case AMDGPU::V_ASHR_I64:
2990     return 1;
2991   default:
2992     return 2;
2993   }
2994 }
2995 
2996 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2997   const MCOperand &MO = Inst.getOperand(OpIdx);
2998   if (MO.isImm()) {
2999     return !isInlineConstant(Inst, OpIdx);
3000   } else if (MO.isReg()) {
3001     auto Reg = MO.getReg();
3002     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3003     auto PReg = mc2PseudoReg(Reg);
3004     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3005   } else {
3006     return true;
3007   }
3008 }
3009 
3010 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
3011   const unsigned Opcode = Inst.getOpcode();
3012   const MCInstrDesc &Desc = MII.get(Opcode);
3013   unsigned ConstantBusUseCount = 0;
3014   unsigned NumLiterals = 0;
3015   unsigned LiteralSize;
3016 
3017   if (Desc.TSFlags &
3018       (SIInstrFlags::VOPC |
3019        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3020        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3021        SIInstrFlags::SDWA)) {
3022     // Check special imm operands (used by madmk, etc)
3023     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3024       ++ConstantBusUseCount;
3025     }
3026 
3027     SmallDenseSet<unsigned> SGPRsUsed;
3028     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3029     if (SGPRUsed != AMDGPU::NoRegister) {
3030       SGPRsUsed.insert(SGPRUsed);
3031       ++ConstantBusUseCount;
3032     }
3033 
3034     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3035     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3036     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3037 
3038     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3039 
3040     for (int OpIdx : OpIndices) {
3041       if (OpIdx == -1) break;
3042 
3043       const MCOperand &MO = Inst.getOperand(OpIdx);
3044       if (usesConstantBus(Inst, OpIdx)) {
3045         if (MO.isReg()) {
3046           const unsigned Reg = mc2PseudoReg(MO.getReg());
3047           // Pairs of registers with a partial intersections like these
3048           //   s0, s[0:1]
3049           //   flat_scratch_lo, flat_scratch
3050           //   flat_scratch_lo, flat_scratch_hi
3051           // are theoretically valid but they are disabled anyway.
3052           // Note that this code mimics SIInstrInfo::verifyInstruction
3053           if (!SGPRsUsed.count(Reg)) {
3054             SGPRsUsed.insert(Reg);
3055             ++ConstantBusUseCount;
3056           }
3057         } else { // Expression or a literal
3058 
3059           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3060             continue; // special operand like VINTERP attr_chan
3061 
3062           // An instruction may use only one literal.
3063           // This has been validated on the previous step.
3064           // See validateVOP3Literal.
3065           // This literal may be used as more than one operand.
3066           // If all these operands are of the same size,
3067           // this literal counts as one scalar value.
3068           // Otherwise it counts as 2 scalar values.
3069           // See "GFX10 Shader Programming", section 3.6.2.3.
3070 
3071           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3072           if (Size < 4) Size = 4;
3073 
3074           if (NumLiterals == 0) {
3075             NumLiterals = 1;
3076             LiteralSize = Size;
3077           } else if (LiteralSize != Size) {
3078             NumLiterals = 2;
3079           }
3080         }
3081       }
3082     }
3083   }
3084   ConstantBusUseCount += NumLiterals;
3085 
3086   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
3087 }
3088 
3089 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
3090   const unsigned Opcode = Inst.getOpcode();
3091   const MCInstrDesc &Desc = MII.get(Opcode);
3092 
3093   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3094   if (DstIdx == -1 ||
3095       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3096     return true;
3097   }
3098 
3099   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3100 
3101   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3102   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3103   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3104 
3105   assert(DstIdx != -1);
3106   const MCOperand &Dst = Inst.getOperand(DstIdx);
3107   assert(Dst.isReg());
3108   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3109 
3110   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3111 
3112   for (int SrcIdx : SrcIndices) {
3113     if (SrcIdx == -1) break;
3114     const MCOperand &Src = Inst.getOperand(SrcIdx);
3115     if (Src.isReg()) {
3116       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3117       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3118         return false;
3119       }
3120     }
3121   }
3122 
3123   return true;
3124 }
3125 
3126 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3127 
3128   const unsigned Opc = Inst.getOpcode();
3129   const MCInstrDesc &Desc = MII.get(Opc);
3130 
3131   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3132     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3133     assert(ClampIdx != -1);
3134     return Inst.getOperand(ClampIdx).getImm() == 0;
3135   }
3136 
3137   return true;
3138 }
3139 
3140 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3141 
3142   const unsigned Opc = Inst.getOpcode();
3143   const MCInstrDesc &Desc = MII.get(Opc);
3144 
3145   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3146     return true;
3147 
3148   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3149   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3150   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3151 
3152   assert(VDataIdx != -1);
3153 
3154   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3155     return true;
3156 
3157   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3158   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3159   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3160   if (DMask == 0)
3161     DMask = 1;
3162 
3163   unsigned DataSize =
3164     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3165   if (hasPackedD16()) {
3166     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3167     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3168       DataSize = (DataSize + 1) / 2;
3169   }
3170 
3171   return (VDataSize / 4) == DataSize + TFESize;
3172 }
3173 
3174 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3175   const unsigned Opc = Inst.getOpcode();
3176   const MCInstrDesc &Desc = MII.get(Opc);
3177 
3178   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
3179     return true;
3180 
3181   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3182 
3183   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3184       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3185   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3186   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3187   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3188 
3189   assert(VAddr0Idx != -1);
3190   assert(SrsrcIdx != -1);
3191   assert(SrsrcIdx > VAddr0Idx);
3192 
3193   if (DimIdx == -1)
3194     return true; // intersect_ray
3195 
3196   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3197   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3198   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3199   unsigned VAddrSize =
3200       IsNSA ? SrsrcIdx - VAddr0Idx
3201             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3202 
3203   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3204                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3205                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3206                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3207   if (!IsNSA) {
3208     if (AddrSize > 8)
3209       AddrSize = 16;
3210     else if (AddrSize > 4)
3211       AddrSize = 8;
3212   }
3213 
3214   return VAddrSize == AddrSize;
3215 }
3216 
3217 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3218 
3219   const unsigned Opc = Inst.getOpcode();
3220   const MCInstrDesc &Desc = MII.get(Opc);
3221 
3222   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3223     return true;
3224   if (!Desc.mayLoad() || !Desc.mayStore())
3225     return true; // Not atomic
3226 
3227   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3228   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3229 
3230   // This is an incomplete check because image_atomic_cmpswap
3231   // may only use 0x3 and 0xf while other atomic operations
3232   // may use 0x1 and 0x3. However these limitations are
3233   // verified when we check that dmask matches dst size.
3234   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3235 }
3236 
3237 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3238 
3239   const unsigned Opc = Inst.getOpcode();
3240   const MCInstrDesc &Desc = MII.get(Opc);
3241 
3242   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3243     return true;
3244 
3245   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3246   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3247 
3248   // GATHER4 instructions use dmask in a different fashion compared to
3249   // other MIMG instructions. The only useful DMASK values are
3250   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3251   // (red,red,red,red) etc.) The ISA document doesn't mention
3252   // this.
3253   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3254 }
3255 
3256 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3257 {
3258   switch (Opcode) {
3259   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3260   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3261   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3262     return true;
3263   default:
3264     return false;
3265   }
3266 }
3267 
3268 // movrels* opcodes should only allow VGPRS as src0.
3269 // This is specified in .td description for vop1/vop3,
3270 // but sdwa is handled differently. See isSDWAOperand.
3271 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3272 
3273   const unsigned Opc = Inst.getOpcode();
3274   const MCInstrDesc &Desc = MII.get(Opc);
3275 
3276   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3277     return true;
3278 
3279   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3280   assert(Src0Idx != -1);
3281 
3282   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3283   if (!Src0.isReg())
3284     return false;
3285 
3286   auto Reg = Src0.getReg();
3287   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3288   return !isSGPR(mc2PseudoReg(Reg), TRI);
3289 }
3290 
3291 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) {
3292 
3293   const unsigned Opc = Inst.getOpcode();
3294 
3295   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3296     return true;
3297 
3298   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3299   assert(Src0Idx != -1);
3300 
3301   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3302   if (!Src0.isReg())
3303     return true;
3304 
3305   auto Reg = Src0.getReg();
3306   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3307   if (isSGPR(mc2PseudoReg(Reg), TRI)) {
3308     Error(getLoc(), "source operand must be either a VGPR or an inline constant");
3309     return false;
3310   }
3311 
3312   return true;
3313 }
3314 
3315 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3316   switch (Inst.getOpcode()) {
3317   default:
3318     return true;
3319   case V_DIV_SCALE_F32_gfx6_gfx7:
3320   case V_DIV_SCALE_F32_vi:
3321   case V_DIV_SCALE_F32_gfx10:
3322   case V_DIV_SCALE_F64_gfx6_gfx7:
3323   case V_DIV_SCALE_F64_vi:
3324   case V_DIV_SCALE_F64_gfx10:
3325     break;
3326   }
3327 
3328   // TODO: Check that src0 = src1 or src2.
3329 
3330   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3331                     AMDGPU::OpName::src2_modifiers,
3332                     AMDGPU::OpName::src2_modifiers}) {
3333     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3334             .getImm() &
3335         SISrcMods::ABS) {
3336       Error(getLoc(), "ABS not allowed in VOP3B instructions");
3337       return false;
3338     }
3339   }
3340 
3341   return true;
3342 }
3343 
3344 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3345 
3346   const unsigned Opc = Inst.getOpcode();
3347   const MCInstrDesc &Desc = MII.get(Opc);
3348 
3349   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3350     return true;
3351 
3352   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3353   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3354     if (isCI() || isSI())
3355       return false;
3356   }
3357 
3358   return true;
3359 }
3360 
3361 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3362   const unsigned Opc = Inst.getOpcode();
3363   const MCInstrDesc &Desc = MII.get(Opc);
3364 
3365   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3366     return true;
3367 
3368   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3369   if (DimIdx < 0)
3370     return true;
3371 
3372   long Imm = Inst.getOperand(DimIdx).getImm();
3373   if (Imm < 0 || Imm >= 8)
3374     return false;
3375 
3376   return true;
3377 }
3378 
3379 static bool IsRevOpcode(const unsigned Opcode)
3380 {
3381   switch (Opcode) {
3382   case AMDGPU::V_SUBREV_F32_e32:
3383   case AMDGPU::V_SUBREV_F32_e64:
3384   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3385   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3386   case AMDGPU::V_SUBREV_F32_e32_vi:
3387   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3388   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3389   case AMDGPU::V_SUBREV_F32_e64_vi:
3390 
3391   case AMDGPU::V_SUBREV_CO_U32_e32:
3392   case AMDGPU::V_SUBREV_CO_U32_e64:
3393   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3394   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3395 
3396   case AMDGPU::V_SUBBREV_U32_e32:
3397   case AMDGPU::V_SUBBREV_U32_e64:
3398   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3399   case AMDGPU::V_SUBBREV_U32_e32_vi:
3400   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3401   case AMDGPU::V_SUBBREV_U32_e64_vi:
3402 
3403   case AMDGPU::V_SUBREV_U32_e32:
3404   case AMDGPU::V_SUBREV_U32_e64:
3405   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3406   case AMDGPU::V_SUBREV_U32_e32_vi:
3407   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3408   case AMDGPU::V_SUBREV_U32_e64_vi:
3409 
3410   case AMDGPU::V_SUBREV_F16_e32:
3411   case AMDGPU::V_SUBREV_F16_e64:
3412   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3413   case AMDGPU::V_SUBREV_F16_e32_vi:
3414   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3415   case AMDGPU::V_SUBREV_F16_e64_vi:
3416 
3417   case AMDGPU::V_SUBREV_U16_e32:
3418   case AMDGPU::V_SUBREV_U16_e64:
3419   case AMDGPU::V_SUBREV_U16_e32_vi:
3420   case AMDGPU::V_SUBREV_U16_e64_vi:
3421 
3422   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3423   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3424   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3425 
3426   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3427   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3428 
3429   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3430   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3431 
3432   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3433   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3434 
3435   case AMDGPU::V_LSHRREV_B32_e32:
3436   case AMDGPU::V_LSHRREV_B32_e64:
3437   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3438   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3439   case AMDGPU::V_LSHRREV_B32_e32_vi:
3440   case AMDGPU::V_LSHRREV_B32_e64_vi:
3441   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3442   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3443 
3444   case AMDGPU::V_ASHRREV_I32_e32:
3445   case AMDGPU::V_ASHRREV_I32_e64:
3446   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3447   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3448   case AMDGPU::V_ASHRREV_I32_e32_vi:
3449   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3450   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3451   case AMDGPU::V_ASHRREV_I32_e64_vi:
3452 
3453   case AMDGPU::V_LSHLREV_B32_e32:
3454   case AMDGPU::V_LSHLREV_B32_e64:
3455   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3456   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3457   case AMDGPU::V_LSHLREV_B32_e32_vi:
3458   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3459   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3460   case AMDGPU::V_LSHLREV_B32_e64_vi:
3461 
3462   case AMDGPU::V_LSHLREV_B16_e32:
3463   case AMDGPU::V_LSHLREV_B16_e64:
3464   case AMDGPU::V_LSHLREV_B16_e32_vi:
3465   case AMDGPU::V_LSHLREV_B16_e64_vi:
3466   case AMDGPU::V_LSHLREV_B16_gfx10:
3467 
3468   case AMDGPU::V_LSHRREV_B16_e32:
3469   case AMDGPU::V_LSHRREV_B16_e64:
3470   case AMDGPU::V_LSHRREV_B16_e32_vi:
3471   case AMDGPU::V_LSHRREV_B16_e64_vi:
3472   case AMDGPU::V_LSHRREV_B16_gfx10:
3473 
3474   case AMDGPU::V_ASHRREV_I16_e32:
3475   case AMDGPU::V_ASHRREV_I16_e64:
3476   case AMDGPU::V_ASHRREV_I16_e32_vi:
3477   case AMDGPU::V_ASHRREV_I16_e64_vi:
3478   case AMDGPU::V_ASHRREV_I16_gfx10:
3479 
3480   case AMDGPU::V_LSHLREV_B64:
3481   case AMDGPU::V_LSHLREV_B64_gfx10:
3482   case AMDGPU::V_LSHLREV_B64_vi:
3483 
3484   case AMDGPU::V_LSHRREV_B64:
3485   case AMDGPU::V_LSHRREV_B64_gfx10:
3486   case AMDGPU::V_LSHRREV_B64_vi:
3487 
3488   case AMDGPU::V_ASHRREV_I64:
3489   case AMDGPU::V_ASHRREV_I64_gfx10:
3490   case AMDGPU::V_ASHRREV_I64_vi:
3491 
3492   case AMDGPU::V_PK_LSHLREV_B16:
3493   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3494   case AMDGPU::V_PK_LSHLREV_B16_vi:
3495 
3496   case AMDGPU::V_PK_LSHRREV_B16:
3497   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3498   case AMDGPU::V_PK_LSHRREV_B16_vi:
3499   case AMDGPU::V_PK_ASHRREV_I16:
3500   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3501   case AMDGPU::V_PK_ASHRREV_I16_vi:
3502     return true;
3503   default:
3504     return false;
3505   }
3506 }
3507 
3508 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3509 
3510   using namespace SIInstrFlags;
3511   const unsigned Opcode = Inst.getOpcode();
3512   const MCInstrDesc &Desc = MII.get(Opcode);
3513 
3514   // lds_direct register is defined so that it can be used
3515   // with 9-bit operands only. Ignore encodings which do not accept these.
3516   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3517     return true;
3518 
3519   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3520   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3521   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3522 
3523   const int SrcIndices[] = { Src1Idx, Src2Idx };
3524 
3525   // lds_direct cannot be specified as either src1 or src2.
3526   for (int SrcIdx : SrcIndices) {
3527     if (SrcIdx == -1) break;
3528     const MCOperand &Src = Inst.getOperand(SrcIdx);
3529     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3530       return false;
3531     }
3532   }
3533 
3534   if (Src0Idx == -1)
3535     return true;
3536 
3537   const MCOperand &Src = Inst.getOperand(Src0Idx);
3538   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3539     return true;
3540 
3541   // lds_direct is specified as src0. Check additional limitations.
3542   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3543 }
3544 
3545 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3546   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3547     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3548     if (Op.isFlatOffset())
3549       return Op.getStartLoc();
3550   }
3551   return getLoc();
3552 }
3553 
3554 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3555                                          const OperandVector &Operands) {
3556   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3557   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3558     return true;
3559 
3560   auto Opcode = Inst.getOpcode();
3561   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3562   assert(OpNum != -1);
3563 
3564   const auto &Op = Inst.getOperand(OpNum);
3565   if (!hasFlatOffsets() && Op.getImm() != 0) {
3566     Error(getFlatOffsetLoc(Operands),
3567           "flat offset modifier is not supported on this GPU");
3568     return false;
3569   }
3570 
3571   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3572   // For FLAT segment the offset must be positive;
3573   // MSB is ignored and forced to zero.
3574   unsigned OffsetSize = isGFX9() ? 13 : 12;
3575   if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) {
3576     if (!isIntN(OffsetSize, Op.getImm())) {
3577       Error(getFlatOffsetLoc(Operands),
3578             isGFX9() ? "expected a 13-bit signed offset" :
3579                        "expected a 12-bit signed offset");
3580       return false;
3581     }
3582   } else {
3583     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3584       Error(getFlatOffsetLoc(Operands),
3585             isGFX9() ? "expected a 12-bit unsigned offset" :
3586                        "expected an 11-bit unsigned offset");
3587       return false;
3588     }
3589   }
3590 
3591   return true;
3592 }
3593 
3594 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3595   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3596     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3597     if (Op.isSMEMOffset())
3598       return Op.getStartLoc();
3599   }
3600   return getLoc();
3601 }
3602 
3603 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3604                                          const OperandVector &Operands) {
3605   if (isCI() || isSI())
3606     return true;
3607 
3608   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3609   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3610     return true;
3611 
3612   auto Opcode = Inst.getOpcode();
3613   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3614   if (OpNum == -1)
3615     return true;
3616 
3617   const auto &Op = Inst.getOperand(OpNum);
3618   if (!Op.isImm())
3619     return true;
3620 
3621   uint64_t Offset = Op.getImm();
3622   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3623   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3624       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3625     return true;
3626 
3627   Error(getSMEMOffsetLoc(Operands),
3628         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3629                                "expected a 21-bit signed offset");
3630 
3631   return false;
3632 }
3633 
3634 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3635   unsigned Opcode = Inst.getOpcode();
3636   const MCInstrDesc &Desc = MII.get(Opcode);
3637   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3638     return true;
3639 
3640   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3641   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3642 
3643   const int OpIndices[] = { Src0Idx, Src1Idx };
3644 
3645   unsigned NumExprs = 0;
3646   unsigned NumLiterals = 0;
3647   uint32_t LiteralValue;
3648 
3649   for (int OpIdx : OpIndices) {
3650     if (OpIdx == -1) break;
3651 
3652     const MCOperand &MO = Inst.getOperand(OpIdx);
3653     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3654     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3655       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3656         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3657         if (NumLiterals == 0 || LiteralValue != Value) {
3658           LiteralValue = Value;
3659           ++NumLiterals;
3660         }
3661       } else if (MO.isExpr()) {
3662         ++NumExprs;
3663       }
3664     }
3665   }
3666 
3667   return NumLiterals + NumExprs <= 1;
3668 }
3669 
3670 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3671   const unsigned Opc = Inst.getOpcode();
3672   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3673       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3674     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3675     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3676 
3677     if (OpSel & ~3)
3678       return false;
3679   }
3680   return true;
3681 }
3682 
3683 // Check if VCC register matches wavefront size
3684 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3685   auto FB = getFeatureBits();
3686   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3687     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3688 }
3689 
3690 // VOP3 literal is only allowed in GFX10+ and only one can be used
3691 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3692   unsigned Opcode = Inst.getOpcode();
3693   const MCInstrDesc &Desc = MII.get(Opcode);
3694   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3695     return true;
3696 
3697   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3698   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3699   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3700 
3701   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3702 
3703   unsigned NumExprs = 0;
3704   unsigned NumLiterals = 0;
3705   uint32_t LiteralValue;
3706 
3707   for (int OpIdx : OpIndices) {
3708     if (OpIdx == -1) break;
3709 
3710     const MCOperand &MO = Inst.getOperand(OpIdx);
3711     if (!MO.isImm() && !MO.isExpr())
3712       continue;
3713     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3714       continue;
3715 
3716     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3717         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3718       return false;
3719 
3720     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3721       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3722       if (NumLiterals == 0 || LiteralValue != Value) {
3723         LiteralValue = Value;
3724         ++NumLiterals;
3725       }
3726     } else if (MO.isExpr()) {
3727       ++NumExprs;
3728     }
3729   }
3730   NumLiterals += NumExprs;
3731 
3732   return !NumLiterals ||
3733          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3734 }
3735 
3736 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
3737                                             const OperandVector &Operands,
3738                                             const SMLoc &IDLoc) {
3739   int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
3740                                           AMDGPU::OpName::glc1);
3741   if (GLCPos != -1) {
3742     // -1 is set by GLC_1 default operand. In all cases "glc" must be present
3743     // in the asm string, and the default value means it is not present.
3744     if (Inst.getOperand(GLCPos).getImm() == -1) {
3745       Error(IDLoc, "instruction must use glc");
3746       return false;
3747     }
3748   }
3749 
3750   return true;
3751 }
3752 
3753 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3754                                           const SMLoc &IDLoc,
3755                                           const OperandVector &Operands) {
3756   if (!validateLdsDirect(Inst)) {
3757     Error(IDLoc,
3758       "invalid use of lds_direct");
3759     return false;
3760   }
3761   if (!validateSOPLiteral(Inst)) {
3762     Error(IDLoc,
3763       "only one literal operand is allowed");
3764     return false;
3765   }
3766   if (!validateVOP3Literal(Inst)) {
3767     Error(IDLoc,
3768       "invalid literal operand");
3769     return false;
3770   }
3771   if (!validateConstantBusLimitations(Inst)) {
3772     Error(IDLoc,
3773       "invalid operand (violates constant bus restrictions)");
3774     return false;
3775   }
3776   if (!validateEarlyClobberLimitations(Inst)) {
3777     Error(IDLoc,
3778       "destination must be different than all sources");
3779     return false;
3780   }
3781   if (!validateIntClampSupported(Inst)) {
3782     Error(IDLoc,
3783       "integer clamping is not supported on this GPU");
3784     return false;
3785   }
3786   if (!validateOpSel(Inst)) {
3787     Error(IDLoc,
3788       "invalid op_sel operand");
3789     return false;
3790   }
3791   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3792   if (!validateMIMGD16(Inst)) {
3793     Error(IDLoc,
3794       "d16 modifier is not supported on this GPU");
3795     return false;
3796   }
3797   if (!validateMIMGDim(Inst)) {
3798     Error(IDLoc, "dim modifier is required on this GPU");
3799     return false;
3800   }
3801   if (!validateMIMGDataSize(Inst)) {
3802     Error(IDLoc,
3803       "image data size does not match dmask and tfe");
3804     return false;
3805   }
3806   if (!validateMIMGAddrSize(Inst)) {
3807     Error(IDLoc,
3808       "image address size does not match dim and a16");
3809     return false;
3810   }
3811   if (!validateMIMGAtomicDMask(Inst)) {
3812     Error(IDLoc,
3813       "invalid atomic image dmask");
3814     return false;
3815   }
3816   if (!validateMIMGGatherDMask(Inst)) {
3817     Error(IDLoc,
3818       "invalid image_gather dmask: only one bit must be set");
3819     return false;
3820   }
3821   if (!validateMovrels(Inst)) {
3822     Error(IDLoc, "source operand must be a VGPR");
3823     return false;
3824   }
3825   if (!validateFlatOffset(Inst, Operands)) {
3826     return false;
3827   }
3828   if (!validateSMEMOffset(Inst, Operands)) {
3829     return false;
3830   }
3831   if (!validateMAIAccWrite(Inst)) {
3832     return false;
3833   }
3834   if (!validateDivScale(Inst)) {
3835     return false;
3836   }
3837   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
3838     return false;
3839   }
3840 
3841   return true;
3842 }
3843 
3844 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3845                                             const FeatureBitset &FBS,
3846                                             unsigned VariantID = 0);
3847 
3848 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
3849                                 const FeatureBitset &AvailableFeatures,
3850                                 unsigned VariantID);
3851 
3852 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3853                                        const FeatureBitset &FBS) {
3854   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
3855 }
3856 
3857 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3858                                        const FeatureBitset &FBS,
3859                                        ArrayRef<unsigned> Variants) {
3860   for (auto Variant : Variants) {
3861     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
3862       return true;
3863   }
3864 
3865   return false;
3866 }
3867 
3868 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
3869                                                   const SMLoc &IDLoc) {
3870   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3871 
3872   // Check if requested instruction variant is supported.
3873   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
3874     return false;
3875 
3876   // This instruction is not supported.
3877   // Clear any other pending errors because they are no longer relevant.
3878   getParser().clearPendingErrors();
3879 
3880   // Requested instruction variant is not supported.
3881   // Check if any other variants are supported.
3882   StringRef VariantName = getMatchedVariantName();
3883   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
3884     return Error(IDLoc,
3885                  Twine(VariantName,
3886                        " variant of this instruction is not supported"));
3887   }
3888 
3889   // Finally check if this instruction is supported on any other GPU.
3890   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
3891     return Error(IDLoc, "instruction not supported on this GPU");
3892   }
3893 
3894   // Instruction not supported on any GPU. Probably a typo.
3895   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
3896   return Error(IDLoc, "invalid instruction" + Suggestion);
3897 }
3898 
3899 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3900                                               OperandVector &Operands,
3901                                               MCStreamer &Out,
3902                                               uint64_t &ErrorInfo,
3903                                               bool MatchingInlineAsm) {
3904   MCInst Inst;
3905   unsigned Result = Match_Success;
3906   for (auto Variant : getMatchedVariants()) {
3907     uint64_t EI;
3908     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3909                                   Variant);
3910     // We order match statuses from least to most specific. We use most specific
3911     // status as resulting
3912     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3913     if ((R == Match_Success) ||
3914         (R == Match_PreferE32) ||
3915         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3916         (R == Match_InvalidOperand && Result != Match_MissingFeature
3917                                    && Result != Match_PreferE32) ||
3918         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3919                                    && Result != Match_MissingFeature
3920                                    && Result != Match_PreferE32)) {
3921       Result = R;
3922       ErrorInfo = EI;
3923     }
3924     if (R == Match_Success)
3925       break;
3926   }
3927 
3928   if (Result == Match_Success) {
3929     if (!validateInstruction(Inst, IDLoc, Operands)) {
3930       return true;
3931     }
3932     Inst.setLoc(IDLoc);
3933     Out.emitInstruction(Inst, getSTI());
3934     return false;
3935   }
3936 
3937   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
3938   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
3939     return true;
3940   }
3941 
3942   switch (Result) {
3943   default: break;
3944   case Match_MissingFeature:
3945     // It has been verified that the specified instruction
3946     // mnemonic is valid. A match was found but it requires
3947     // features which are not supported on this GPU.
3948     return Error(IDLoc, "operands are not valid for this GPU or mode");
3949 
3950   case Match_InvalidOperand: {
3951     SMLoc ErrorLoc = IDLoc;
3952     if (ErrorInfo != ~0ULL) {
3953       if (ErrorInfo >= Operands.size()) {
3954         return Error(IDLoc, "too few operands for instruction");
3955       }
3956       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3957       if (ErrorLoc == SMLoc())
3958         ErrorLoc = IDLoc;
3959     }
3960     return Error(ErrorLoc, "invalid operand for instruction");
3961   }
3962 
3963   case Match_PreferE32:
3964     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3965                         "should be encoded as e32");
3966   case Match_MnemonicFail:
3967     llvm_unreachable("Invalid instructions should have been handled already");
3968   }
3969   llvm_unreachable("Implement any new match types added!");
3970 }
3971 
3972 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3973   int64_t Tmp = -1;
3974   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3975     return true;
3976   }
3977   if (getParser().parseAbsoluteExpression(Tmp)) {
3978     return true;
3979   }
3980   Ret = static_cast<uint32_t>(Tmp);
3981   return false;
3982 }
3983 
3984 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3985                                                uint32_t &Minor) {
3986   if (ParseAsAbsoluteExpression(Major))
3987     return TokError("invalid major version");
3988 
3989   if (getLexer().isNot(AsmToken::Comma))
3990     return TokError("minor version number required, comma expected");
3991   Lex();
3992 
3993   if (ParseAsAbsoluteExpression(Minor))
3994     return TokError("invalid minor version");
3995 
3996   return false;
3997 }
3998 
3999 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4000   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4001     return TokError("directive only supported for amdgcn architecture");
4002 
4003   std::string Target;
4004 
4005   SMLoc TargetStart = getTok().getLoc();
4006   if (getParser().parseEscapedString(Target))
4007     return true;
4008   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4009 
4010   std::string ExpectedTarget;
4011   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
4012   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
4013 
4014   if (Target != ExpectedTargetOS.str())
4015     return getParser().Error(TargetRange.Start, "target must match options",
4016                              TargetRange);
4017 
4018   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
4019   return false;
4020 }
4021 
4022 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4023   return getParser().Error(Range.Start, "value out of range", Range);
4024 }
4025 
4026 bool AMDGPUAsmParser::calculateGPRBlocks(
4027     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4028     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4029     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4030     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4031   // TODO(scott.linder): These calculations are duplicated from
4032   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4033   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4034 
4035   unsigned NumVGPRs = NextFreeVGPR;
4036   unsigned NumSGPRs = NextFreeSGPR;
4037 
4038   if (Version.Major >= 10)
4039     NumSGPRs = 0;
4040   else {
4041     unsigned MaxAddressableNumSGPRs =
4042         IsaInfo::getAddressableNumSGPRs(&getSTI());
4043 
4044     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4045         NumSGPRs > MaxAddressableNumSGPRs)
4046       return OutOfRangeError(SGPRRange);
4047 
4048     NumSGPRs +=
4049         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4050 
4051     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4052         NumSGPRs > MaxAddressableNumSGPRs)
4053       return OutOfRangeError(SGPRRange);
4054 
4055     if (Features.test(FeatureSGPRInitBug))
4056       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4057   }
4058 
4059   VGPRBlocks =
4060       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4061   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4062 
4063   return false;
4064 }
4065 
4066 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4067   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4068     return TokError("directive only supported for amdgcn architecture");
4069 
4070   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4071     return TokError("directive only supported for amdhsa OS");
4072 
4073   StringRef KernelName;
4074   if (getParser().parseIdentifier(KernelName))
4075     return true;
4076 
4077   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4078 
4079   StringSet<> Seen;
4080 
4081   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4082 
4083   SMRange VGPRRange;
4084   uint64_t NextFreeVGPR = 0;
4085   SMRange SGPRRange;
4086   uint64_t NextFreeSGPR = 0;
4087   unsigned UserSGPRCount = 0;
4088   bool ReserveVCC = true;
4089   bool ReserveFlatScr = true;
4090   bool ReserveXNACK = hasXNACK();
4091   Optional<bool> EnableWavefrontSize32;
4092 
4093   while (true) {
4094     while (getLexer().is(AsmToken::EndOfStatement))
4095       Lex();
4096 
4097     if (getLexer().isNot(AsmToken::Identifier))
4098       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
4099 
4100     StringRef ID = getTok().getIdentifier();
4101     SMRange IDRange = getTok().getLocRange();
4102     Lex();
4103 
4104     if (ID == ".end_amdhsa_kernel")
4105       break;
4106 
4107     if (Seen.find(ID) != Seen.end())
4108       return TokError(".amdhsa_ directives cannot be repeated");
4109     Seen.insert(ID);
4110 
4111     SMLoc ValStart = getTok().getLoc();
4112     int64_t IVal;
4113     if (getParser().parseAbsoluteExpression(IVal))
4114       return true;
4115     SMLoc ValEnd = getTok().getLoc();
4116     SMRange ValRange = SMRange(ValStart, ValEnd);
4117 
4118     if (IVal < 0)
4119       return OutOfRangeError(ValRange);
4120 
4121     uint64_t Val = IVal;
4122 
4123 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4124   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4125     return OutOfRangeError(RANGE);                                             \
4126   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4127 
4128     if (ID == ".amdhsa_group_segment_fixed_size") {
4129       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4130         return OutOfRangeError(ValRange);
4131       KD.group_segment_fixed_size = Val;
4132     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4133       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4134         return OutOfRangeError(ValRange);
4135       KD.private_segment_fixed_size = Val;
4136     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4137       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4138                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4139                        Val, ValRange);
4140       if (Val)
4141         UserSGPRCount += 4;
4142     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4143       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4144                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4145                        ValRange);
4146       if (Val)
4147         UserSGPRCount += 2;
4148     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4149       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4150                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4151                        ValRange);
4152       if (Val)
4153         UserSGPRCount += 2;
4154     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4155       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4156                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4157                        Val, ValRange);
4158       if (Val)
4159         UserSGPRCount += 2;
4160     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4161       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4162                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4163                        ValRange);
4164       if (Val)
4165         UserSGPRCount += 2;
4166     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4167       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4168                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4169                        ValRange);
4170       if (Val)
4171         UserSGPRCount += 2;
4172     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4173       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4174                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4175                        Val, ValRange);
4176       if (Val)
4177         UserSGPRCount += 1;
4178     } else if (ID == ".amdhsa_wavefront_size32") {
4179       if (IVersion.Major < 10)
4180         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4181                                  IDRange);
4182       EnableWavefrontSize32 = Val;
4183       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4184                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4185                        Val, ValRange);
4186     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4187       PARSE_BITS_ENTRY(
4188           KD.compute_pgm_rsrc2,
4189           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
4190           ValRange);
4191     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4192       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4193                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4194                        ValRange);
4195     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4196       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4197                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4198                        ValRange);
4199     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4200       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4201                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4202                        ValRange);
4203     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4204       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4205                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4206                        ValRange);
4207     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4208       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4209                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4210                        ValRange);
4211     } else if (ID == ".amdhsa_next_free_vgpr") {
4212       VGPRRange = ValRange;
4213       NextFreeVGPR = Val;
4214     } else if (ID == ".amdhsa_next_free_sgpr") {
4215       SGPRRange = ValRange;
4216       NextFreeSGPR = Val;
4217     } else if (ID == ".amdhsa_reserve_vcc") {
4218       if (!isUInt<1>(Val))
4219         return OutOfRangeError(ValRange);
4220       ReserveVCC = Val;
4221     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4222       if (IVersion.Major < 7)
4223         return getParser().Error(IDRange.Start, "directive requires gfx7+",
4224                                  IDRange);
4225       if (!isUInt<1>(Val))
4226         return OutOfRangeError(ValRange);
4227       ReserveFlatScr = Val;
4228     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4229       if (IVersion.Major < 8)
4230         return getParser().Error(IDRange.Start, "directive requires gfx8+",
4231                                  IDRange);
4232       if (!isUInt<1>(Val))
4233         return OutOfRangeError(ValRange);
4234       ReserveXNACK = Val;
4235     } else if (ID == ".amdhsa_float_round_mode_32") {
4236       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4237                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4238     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4239       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4240                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4241     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4242       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4243                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4244     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4245       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4246                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4247                        ValRange);
4248     } else if (ID == ".amdhsa_dx10_clamp") {
4249       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4250                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4251     } else if (ID == ".amdhsa_ieee_mode") {
4252       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4253                        Val, ValRange);
4254     } else if (ID == ".amdhsa_fp16_overflow") {
4255       if (IVersion.Major < 9)
4256         return getParser().Error(IDRange.Start, "directive requires gfx9+",
4257                                  IDRange);
4258       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4259                        ValRange);
4260     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4261       if (IVersion.Major < 10)
4262         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4263                                  IDRange);
4264       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4265                        ValRange);
4266     } else if (ID == ".amdhsa_memory_ordered") {
4267       if (IVersion.Major < 10)
4268         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4269                                  IDRange);
4270       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4271                        ValRange);
4272     } else if (ID == ".amdhsa_forward_progress") {
4273       if (IVersion.Major < 10)
4274         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4275                                  IDRange);
4276       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4277                        ValRange);
4278     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4279       PARSE_BITS_ENTRY(
4280           KD.compute_pgm_rsrc2,
4281           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4282           ValRange);
4283     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4284       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4285                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4286                        Val, ValRange);
4287     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4288       PARSE_BITS_ENTRY(
4289           KD.compute_pgm_rsrc2,
4290           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4291           ValRange);
4292     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4293       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4294                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4295                        Val, ValRange);
4296     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4297       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4298                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4299                        Val, ValRange);
4300     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4301       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4302                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4303                        Val, ValRange);
4304     } else if (ID == ".amdhsa_exception_int_div_zero") {
4305       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4306                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4307                        Val, ValRange);
4308     } else {
4309       return getParser().Error(IDRange.Start,
4310                                "unknown .amdhsa_kernel directive", IDRange);
4311     }
4312 
4313 #undef PARSE_BITS_ENTRY
4314   }
4315 
4316   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4317     return TokError(".amdhsa_next_free_vgpr directive is required");
4318 
4319   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4320     return TokError(".amdhsa_next_free_sgpr directive is required");
4321 
4322   unsigned VGPRBlocks;
4323   unsigned SGPRBlocks;
4324   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4325                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4326                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4327                          SGPRBlocks))
4328     return true;
4329 
4330   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4331           VGPRBlocks))
4332     return OutOfRangeError(VGPRRange);
4333   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4334                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4335 
4336   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4337           SGPRBlocks))
4338     return OutOfRangeError(SGPRRange);
4339   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4340                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4341                   SGPRBlocks);
4342 
4343   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4344     return TokError("too many user SGPRs enabled");
4345   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4346                   UserSGPRCount);
4347 
4348   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4349       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4350       ReserveFlatScr, ReserveXNACK);
4351   return false;
4352 }
4353 
4354 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4355   uint32_t Major;
4356   uint32_t Minor;
4357 
4358   if (ParseDirectiveMajorMinor(Major, Minor))
4359     return true;
4360 
4361   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4362   return false;
4363 }
4364 
4365 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4366   uint32_t Major;
4367   uint32_t Minor;
4368   uint32_t Stepping;
4369   StringRef VendorName;
4370   StringRef ArchName;
4371 
4372   // If this directive has no arguments, then use the ISA version for the
4373   // targeted GPU.
4374   if (getLexer().is(AsmToken::EndOfStatement)) {
4375     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4376     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4377                                                       ISA.Stepping,
4378                                                       "AMD", "AMDGPU");
4379     return false;
4380   }
4381 
4382   if (ParseDirectiveMajorMinor(Major, Minor))
4383     return true;
4384 
4385   if (getLexer().isNot(AsmToken::Comma))
4386     return TokError("stepping version number required, comma expected");
4387   Lex();
4388 
4389   if (ParseAsAbsoluteExpression(Stepping))
4390     return TokError("invalid stepping version");
4391 
4392   if (getLexer().isNot(AsmToken::Comma))
4393     return TokError("vendor name required, comma expected");
4394   Lex();
4395 
4396   if (getLexer().isNot(AsmToken::String))
4397     return TokError("invalid vendor name");
4398 
4399   VendorName = getLexer().getTok().getStringContents();
4400   Lex();
4401 
4402   if (getLexer().isNot(AsmToken::Comma))
4403     return TokError("arch name required, comma expected");
4404   Lex();
4405 
4406   if (getLexer().isNot(AsmToken::String))
4407     return TokError("invalid arch name");
4408 
4409   ArchName = getLexer().getTok().getStringContents();
4410   Lex();
4411 
4412   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4413                                                     VendorName, ArchName);
4414   return false;
4415 }
4416 
4417 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4418                                                amd_kernel_code_t &Header) {
4419   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4420   // assembly for backwards compatibility.
4421   if (ID == "max_scratch_backing_memory_byte_size") {
4422     Parser.eatToEndOfStatement();
4423     return false;
4424   }
4425 
4426   SmallString<40> ErrStr;
4427   raw_svector_ostream Err(ErrStr);
4428   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4429     return TokError(Err.str());
4430   }
4431   Lex();
4432 
4433   if (ID == "enable_wavefront_size32") {
4434     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4435       if (!isGFX10())
4436         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4437       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4438         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4439     } else {
4440       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4441         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4442     }
4443   }
4444 
4445   if (ID == "wavefront_size") {
4446     if (Header.wavefront_size == 5) {
4447       if (!isGFX10())
4448         return TokError("wavefront_size=5 is only allowed on GFX10+");
4449       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4450         return TokError("wavefront_size=5 requires +WavefrontSize32");
4451     } else if (Header.wavefront_size == 6) {
4452       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4453         return TokError("wavefront_size=6 requires +WavefrontSize64");
4454     }
4455   }
4456 
4457   if (ID == "enable_wgp_mode") {
4458     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4459       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4460   }
4461 
4462   if (ID == "enable_mem_ordered") {
4463     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4464       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4465   }
4466 
4467   if (ID == "enable_fwd_progress") {
4468     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4469       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4470   }
4471 
4472   return false;
4473 }
4474 
4475 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4476   amd_kernel_code_t Header;
4477   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4478 
4479   while (true) {
4480     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4481     // will set the current token to EndOfStatement.
4482     while(getLexer().is(AsmToken::EndOfStatement))
4483       Lex();
4484 
4485     if (getLexer().isNot(AsmToken::Identifier))
4486       return TokError("expected value identifier or .end_amd_kernel_code_t");
4487 
4488     StringRef ID = getLexer().getTok().getIdentifier();
4489     Lex();
4490 
4491     if (ID == ".end_amd_kernel_code_t")
4492       break;
4493 
4494     if (ParseAMDKernelCodeTValue(ID, Header))
4495       return true;
4496   }
4497 
4498   getTargetStreamer().EmitAMDKernelCodeT(Header);
4499 
4500   return false;
4501 }
4502 
4503 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4504   if (getLexer().isNot(AsmToken::Identifier))
4505     return TokError("expected symbol name");
4506 
4507   StringRef KernelName = Parser.getTok().getString();
4508 
4509   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4510                                            ELF::STT_AMDGPU_HSA_KERNEL);
4511   Lex();
4512 
4513   KernelScope.initialize(getContext());
4514   return false;
4515 }
4516 
4517 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4518   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4519     return Error(getParser().getTok().getLoc(),
4520                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4521                  "architectures");
4522   }
4523 
4524   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4525 
4526   std::string ISAVersionStringFromSTI;
4527   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4528   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4529 
4530   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4531     return Error(getParser().getTok().getLoc(),
4532                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4533                  "arguments specified through the command line");
4534   }
4535 
4536   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4537   Lex();
4538 
4539   return false;
4540 }
4541 
4542 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4543   const char *AssemblerDirectiveBegin;
4544   const char *AssemblerDirectiveEnd;
4545   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4546       isHsaAbiVersion3(&getSTI())
4547           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4548                             HSAMD::V3::AssemblerDirectiveEnd)
4549           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4550                             HSAMD::AssemblerDirectiveEnd);
4551 
4552   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4553     return Error(getParser().getTok().getLoc(),
4554                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4555                  "not available on non-amdhsa OSes")).str());
4556   }
4557 
4558   std::string HSAMetadataString;
4559   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4560                           HSAMetadataString))
4561     return true;
4562 
4563   if (isHsaAbiVersion3(&getSTI())) {
4564     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4565       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4566   } else {
4567     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4568       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4569   }
4570 
4571   return false;
4572 }
4573 
4574 /// Common code to parse out a block of text (typically YAML) between start and
4575 /// end directives.
4576 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4577                                           const char *AssemblerDirectiveEnd,
4578                                           std::string &CollectString) {
4579 
4580   raw_string_ostream CollectStream(CollectString);
4581 
4582   getLexer().setSkipSpace(false);
4583 
4584   bool FoundEnd = false;
4585   while (!getLexer().is(AsmToken::Eof)) {
4586     while (getLexer().is(AsmToken::Space)) {
4587       CollectStream << getLexer().getTok().getString();
4588       Lex();
4589     }
4590 
4591     if (getLexer().is(AsmToken::Identifier)) {
4592       StringRef ID = getLexer().getTok().getIdentifier();
4593       if (ID == AssemblerDirectiveEnd) {
4594         Lex();
4595         FoundEnd = true;
4596         break;
4597       }
4598     }
4599 
4600     CollectStream << Parser.parseStringToEndOfStatement()
4601                   << getContext().getAsmInfo()->getSeparatorString();
4602 
4603     Parser.eatToEndOfStatement();
4604   }
4605 
4606   getLexer().setSkipSpace(true);
4607 
4608   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4609     return TokError(Twine("expected directive ") +
4610                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4611   }
4612 
4613   CollectStream.flush();
4614   return false;
4615 }
4616 
4617 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4618 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4619   std::string String;
4620   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4621                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4622     return true;
4623 
4624   auto PALMetadata = getTargetStreamer().getPALMetadata();
4625   if (!PALMetadata->setFromString(String))
4626     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4627   return false;
4628 }
4629 
4630 /// Parse the assembler directive for old linear-format PAL metadata.
4631 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4632   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4633     return Error(getParser().getTok().getLoc(),
4634                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4635                  "not available on non-amdpal OSes")).str());
4636   }
4637 
4638   auto PALMetadata = getTargetStreamer().getPALMetadata();
4639   PALMetadata->setLegacy();
4640   for (;;) {
4641     uint32_t Key, Value;
4642     if (ParseAsAbsoluteExpression(Key)) {
4643       return TokError(Twine("invalid value in ") +
4644                       Twine(PALMD::AssemblerDirective));
4645     }
4646     if (getLexer().isNot(AsmToken::Comma)) {
4647       return TokError(Twine("expected an even number of values in ") +
4648                       Twine(PALMD::AssemblerDirective));
4649     }
4650     Lex();
4651     if (ParseAsAbsoluteExpression(Value)) {
4652       return TokError(Twine("invalid value in ") +
4653                       Twine(PALMD::AssemblerDirective));
4654     }
4655     PALMetadata->setRegister(Key, Value);
4656     if (getLexer().isNot(AsmToken::Comma))
4657       break;
4658     Lex();
4659   }
4660   return false;
4661 }
4662 
4663 /// ParseDirectiveAMDGPULDS
4664 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4665 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4666   if (getParser().checkForValidSection())
4667     return true;
4668 
4669   StringRef Name;
4670   SMLoc NameLoc = getLexer().getLoc();
4671   if (getParser().parseIdentifier(Name))
4672     return TokError("expected identifier in directive");
4673 
4674   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4675   if (parseToken(AsmToken::Comma, "expected ','"))
4676     return true;
4677 
4678   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4679 
4680   int64_t Size;
4681   SMLoc SizeLoc = getLexer().getLoc();
4682   if (getParser().parseAbsoluteExpression(Size))
4683     return true;
4684   if (Size < 0)
4685     return Error(SizeLoc, "size must be non-negative");
4686   if (Size > LocalMemorySize)
4687     return Error(SizeLoc, "size is too large");
4688 
4689   int64_t Alignment = 4;
4690   if (getLexer().is(AsmToken::Comma)) {
4691     Lex();
4692     SMLoc AlignLoc = getLexer().getLoc();
4693     if (getParser().parseAbsoluteExpression(Alignment))
4694       return true;
4695     if (Alignment < 0 || !isPowerOf2_64(Alignment))
4696       return Error(AlignLoc, "alignment must be a power of two");
4697 
4698     // Alignment larger than the size of LDS is possible in theory, as long
4699     // as the linker manages to place to symbol at address 0, but we do want
4700     // to make sure the alignment fits nicely into a 32-bit integer.
4701     if (Alignment >= 1u << 31)
4702       return Error(AlignLoc, "alignment is too large");
4703   }
4704 
4705   if (parseToken(AsmToken::EndOfStatement,
4706                  "unexpected token in '.amdgpu_lds' directive"))
4707     return true;
4708 
4709   Symbol->redefineIfPossible();
4710   if (!Symbol->isUndefined())
4711     return Error(NameLoc, "invalid symbol redefinition");
4712 
4713   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
4714   return false;
4715 }
4716 
4717 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4718   StringRef IDVal = DirectiveID.getString();
4719 
4720   if (isHsaAbiVersion3(&getSTI())) {
4721     if (IDVal == ".amdgcn_target")
4722       return ParseDirectiveAMDGCNTarget();
4723 
4724     if (IDVal == ".amdhsa_kernel")
4725       return ParseDirectiveAMDHSAKernel();
4726 
4727     // TODO: Restructure/combine with PAL metadata directive.
4728     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4729       return ParseDirectiveHSAMetadata();
4730   } else {
4731     if (IDVal == ".hsa_code_object_version")
4732       return ParseDirectiveHSACodeObjectVersion();
4733 
4734     if (IDVal == ".hsa_code_object_isa")
4735       return ParseDirectiveHSACodeObjectISA();
4736 
4737     if (IDVal == ".amd_kernel_code_t")
4738       return ParseDirectiveAMDKernelCodeT();
4739 
4740     if (IDVal == ".amdgpu_hsa_kernel")
4741       return ParseDirectiveAMDGPUHsaKernel();
4742 
4743     if (IDVal == ".amd_amdgpu_isa")
4744       return ParseDirectiveISAVersion();
4745 
4746     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4747       return ParseDirectiveHSAMetadata();
4748   }
4749 
4750   if (IDVal == ".amdgpu_lds")
4751     return ParseDirectiveAMDGPULDS();
4752 
4753   if (IDVal == PALMD::AssemblerDirectiveBegin)
4754     return ParseDirectivePALMetadataBegin();
4755 
4756   if (IDVal == PALMD::AssemblerDirective)
4757     return ParseDirectivePALMetadata();
4758 
4759   return true;
4760 }
4761 
4762 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4763                                            unsigned RegNo) const {
4764 
4765   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4766        R.isValid(); ++R) {
4767     if (*R == RegNo)
4768       return isGFX9Plus();
4769   }
4770 
4771   // GFX10 has 2 more SGPRs 104 and 105.
4772   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4773        R.isValid(); ++R) {
4774     if (*R == RegNo)
4775       return hasSGPR104_SGPR105();
4776   }
4777 
4778   switch (RegNo) {
4779   case AMDGPU::SRC_SHARED_BASE:
4780   case AMDGPU::SRC_SHARED_LIMIT:
4781   case AMDGPU::SRC_PRIVATE_BASE:
4782   case AMDGPU::SRC_PRIVATE_LIMIT:
4783   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4784     return !isCI() && !isSI() && !isVI();
4785   case AMDGPU::TBA:
4786   case AMDGPU::TBA_LO:
4787   case AMDGPU::TBA_HI:
4788   case AMDGPU::TMA:
4789   case AMDGPU::TMA_LO:
4790   case AMDGPU::TMA_HI:
4791     return !isGFX9() && !isGFX10();
4792   case AMDGPU::XNACK_MASK:
4793   case AMDGPU::XNACK_MASK_LO:
4794   case AMDGPU::XNACK_MASK_HI:
4795     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4796   case AMDGPU::SGPR_NULL:
4797     return isGFX10();
4798   default:
4799     break;
4800   }
4801 
4802   if (isCI())
4803     return true;
4804 
4805   if (isSI() || isGFX10()) {
4806     // No flat_scr on SI.
4807     // On GFX10 flat scratch is not a valid register operand and can only be
4808     // accessed with s_setreg/s_getreg.
4809     switch (RegNo) {
4810     case AMDGPU::FLAT_SCR:
4811     case AMDGPU::FLAT_SCR_LO:
4812     case AMDGPU::FLAT_SCR_HI:
4813       return false;
4814     default:
4815       return true;
4816     }
4817   }
4818 
4819   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4820   // SI/CI have.
4821   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4822        R.isValid(); ++R) {
4823     if (*R == RegNo)
4824       return hasSGPR102_SGPR103();
4825   }
4826 
4827   return true;
4828 }
4829 
4830 OperandMatchResultTy
4831 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4832                               OperandMode Mode) {
4833   // Try to parse with a custom parser
4834   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4835 
4836   // If we successfully parsed the operand or if there as an error parsing,
4837   // we are done.
4838   //
4839   // If we are parsing after we reach EndOfStatement then this means we
4840   // are appending default values to the Operands list.  This is only done
4841   // by custom parser, so we shouldn't continue on to the generic parsing.
4842   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4843       getLexer().is(AsmToken::EndOfStatement))
4844     return ResTy;
4845 
4846   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4847     unsigned Prefix = Operands.size();
4848     SMLoc LBraceLoc = getTok().getLoc();
4849     Parser.Lex(); // eat the '['
4850 
4851     for (;;) {
4852       ResTy = parseReg(Operands);
4853       if (ResTy != MatchOperand_Success)
4854         return ResTy;
4855 
4856       if (getLexer().is(AsmToken::RBrac))
4857         break;
4858 
4859       if (getLexer().isNot(AsmToken::Comma))
4860         return MatchOperand_ParseFail;
4861       Parser.Lex();
4862     }
4863 
4864     if (Operands.size() - Prefix > 1) {
4865       Operands.insert(Operands.begin() + Prefix,
4866                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4867       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4868                                                     getTok().getLoc()));
4869     }
4870 
4871     Parser.Lex(); // eat the ']'
4872     return MatchOperand_Success;
4873   }
4874 
4875   return parseRegOrImm(Operands);
4876 }
4877 
4878 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4879   // Clear any forced encodings from the previous instruction.
4880   setForcedEncodingSize(0);
4881   setForcedDPP(false);
4882   setForcedSDWA(false);
4883 
4884   if (Name.endswith("_e64")) {
4885     setForcedEncodingSize(64);
4886     return Name.substr(0, Name.size() - 4);
4887   } else if (Name.endswith("_e32")) {
4888     setForcedEncodingSize(32);
4889     return Name.substr(0, Name.size() - 4);
4890   } else if (Name.endswith("_dpp")) {
4891     setForcedDPP(true);
4892     return Name.substr(0, Name.size() - 4);
4893   } else if (Name.endswith("_sdwa")) {
4894     setForcedSDWA(true);
4895     return Name.substr(0, Name.size() - 5);
4896   }
4897   return Name;
4898 }
4899 
4900 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4901                                        StringRef Name,
4902                                        SMLoc NameLoc, OperandVector &Operands) {
4903   // Add the instruction mnemonic
4904   Name = parseMnemonicSuffix(Name);
4905   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4906 
4907   bool IsMIMG = Name.startswith("image_");
4908 
4909   while (!getLexer().is(AsmToken::EndOfStatement)) {
4910     OperandMode Mode = OperandMode_Default;
4911     if (IsMIMG && isGFX10() && Operands.size() == 2)
4912       Mode = OperandMode_NSA;
4913     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4914 
4915     // Eat the comma or space if there is one.
4916     if (getLexer().is(AsmToken::Comma))
4917       Parser.Lex();
4918 
4919     if (Res != MatchOperand_Success) {
4920       checkUnsupportedInstruction(Name, NameLoc);
4921       if (!Parser.hasPendingError()) {
4922         // FIXME: use real operand location rather than the current location.
4923         StringRef Msg =
4924           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
4925                                             "not a valid operand.";
4926         Error(getLexer().getLoc(), Msg);
4927       }
4928       while (!getLexer().is(AsmToken::EndOfStatement)) {
4929         Parser.Lex();
4930       }
4931       return true;
4932     }
4933   }
4934 
4935   return false;
4936 }
4937 
4938 //===----------------------------------------------------------------------===//
4939 // Utility functions
4940 //===----------------------------------------------------------------------===//
4941 
4942 OperandMatchResultTy
4943 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4944 
4945   if (!trySkipId(Prefix, AsmToken::Colon))
4946     return MatchOperand_NoMatch;
4947 
4948   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4949 }
4950 
4951 OperandMatchResultTy
4952 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4953                                     AMDGPUOperand::ImmTy ImmTy,
4954                                     bool (*ConvertResult)(int64_t&)) {
4955   SMLoc S = getLoc();
4956   int64_t Value = 0;
4957 
4958   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4959   if (Res != MatchOperand_Success)
4960     return Res;
4961 
4962   if (ConvertResult && !ConvertResult(Value)) {
4963     Error(S, "invalid " + StringRef(Prefix) + " value.");
4964   }
4965 
4966   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4967   return MatchOperand_Success;
4968 }
4969 
4970 OperandMatchResultTy
4971 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4972                                              OperandVector &Operands,
4973                                              AMDGPUOperand::ImmTy ImmTy,
4974                                              bool (*ConvertResult)(int64_t&)) {
4975   SMLoc S = getLoc();
4976   if (!trySkipId(Prefix, AsmToken::Colon))
4977     return MatchOperand_NoMatch;
4978 
4979   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4980     return MatchOperand_ParseFail;
4981 
4982   unsigned Val = 0;
4983   const unsigned MaxSize = 4;
4984 
4985   // FIXME: How to verify the number of elements matches the number of src
4986   // operands?
4987   for (int I = 0; ; ++I) {
4988     int64_t Op;
4989     SMLoc Loc = getLoc();
4990     if (!parseExpr(Op))
4991       return MatchOperand_ParseFail;
4992 
4993     if (Op != 0 && Op != 1) {
4994       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4995       return MatchOperand_ParseFail;
4996     }
4997 
4998     Val |= (Op << I);
4999 
5000     if (trySkipToken(AsmToken::RBrac))
5001       break;
5002 
5003     if (I + 1 == MaxSize) {
5004       Error(getLoc(), "expected a closing square bracket");
5005       return MatchOperand_ParseFail;
5006     }
5007 
5008     if (!skipToken(AsmToken::Comma, "expected a comma"))
5009       return MatchOperand_ParseFail;
5010   }
5011 
5012   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5013   return MatchOperand_Success;
5014 }
5015 
5016 OperandMatchResultTy
5017 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
5018                                AMDGPUOperand::ImmTy ImmTy) {
5019   int64_t Bit = 0;
5020   SMLoc S = Parser.getTok().getLoc();
5021 
5022   // We are at the end of the statement, and this is a default argument, so
5023   // use a default value.
5024   if (getLexer().isNot(AsmToken::EndOfStatement)) {
5025     switch(getLexer().getKind()) {
5026       case AsmToken::Identifier: {
5027         StringRef Tok = Parser.getTok().getString();
5028         if (Tok == Name) {
5029           if (Tok == "r128" && !hasMIMG_R128())
5030             Error(S, "r128 modifier is not supported on this GPU");
5031           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
5032             Error(S, "a16 modifier is not supported on this GPU");
5033           Bit = 1;
5034           Parser.Lex();
5035         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
5036           Bit = 0;
5037           Parser.Lex();
5038         } else {
5039           return MatchOperand_NoMatch;
5040         }
5041         break;
5042       }
5043       default:
5044         return MatchOperand_NoMatch;
5045     }
5046   }
5047 
5048   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
5049     return MatchOperand_ParseFail;
5050 
5051   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5052     ImmTy = AMDGPUOperand::ImmTyR128A16;
5053 
5054   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5055   return MatchOperand_Success;
5056 }
5057 
5058 static void addOptionalImmOperand(
5059   MCInst& Inst, const OperandVector& Operands,
5060   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5061   AMDGPUOperand::ImmTy ImmT,
5062   int64_t Default = 0) {
5063   auto i = OptionalIdx.find(ImmT);
5064   if (i != OptionalIdx.end()) {
5065     unsigned Idx = i->second;
5066     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5067   } else {
5068     Inst.addOperand(MCOperand::createImm(Default));
5069   }
5070 }
5071 
5072 OperandMatchResultTy
5073 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
5074   if (getLexer().isNot(AsmToken::Identifier)) {
5075     return MatchOperand_NoMatch;
5076   }
5077   StringRef Tok = Parser.getTok().getString();
5078   if (Tok != Prefix) {
5079     return MatchOperand_NoMatch;
5080   }
5081 
5082   Parser.Lex();
5083   if (getLexer().isNot(AsmToken::Colon)) {
5084     return MatchOperand_ParseFail;
5085   }
5086 
5087   Parser.Lex();
5088   if (getLexer().isNot(AsmToken::Identifier)) {
5089     return MatchOperand_ParseFail;
5090   }
5091 
5092   Value = Parser.getTok().getString();
5093   return MatchOperand_Success;
5094 }
5095 
5096 //===----------------------------------------------------------------------===//
5097 // MTBUF format
5098 //===----------------------------------------------------------------------===//
5099 
5100 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5101                                   int64_t MaxVal,
5102                                   int64_t &Fmt) {
5103   int64_t Val;
5104   SMLoc Loc = getLoc();
5105 
5106   auto Res = parseIntWithPrefix(Pref, Val);
5107   if (Res == MatchOperand_ParseFail)
5108     return false;
5109   if (Res == MatchOperand_NoMatch)
5110     return true;
5111 
5112   if (Val < 0 || Val > MaxVal) {
5113     Error(Loc, Twine("out of range ", StringRef(Pref)));
5114     return false;
5115   }
5116 
5117   Fmt = Val;
5118   return true;
5119 }
5120 
5121 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5122 // values to live in a joint format operand in the MCInst encoding.
5123 OperandMatchResultTy
5124 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5125   using namespace llvm::AMDGPU::MTBUFFormat;
5126 
5127   int64_t Dfmt = DFMT_UNDEF;
5128   int64_t Nfmt = NFMT_UNDEF;
5129 
5130   // dfmt and nfmt can appear in either order, and each is optional.
5131   for (int I = 0; I < 2; ++I) {
5132     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5133       return MatchOperand_ParseFail;
5134 
5135     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5136       return MatchOperand_ParseFail;
5137     }
5138     // Skip optional comma between dfmt/nfmt
5139     // but guard against 2 commas following each other.
5140     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5141         !peekToken().is(AsmToken::Comma)) {
5142       trySkipToken(AsmToken::Comma);
5143     }
5144   }
5145 
5146   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5147     return MatchOperand_NoMatch;
5148 
5149   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5150   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5151 
5152   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5153   return MatchOperand_Success;
5154 }
5155 
5156 OperandMatchResultTy
5157 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5158   using namespace llvm::AMDGPU::MTBUFFormat;
5159 
5160   int64_t Fmt = UFMT_UNDEF;
5161 
5162   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5163     return MatchOperand_ParseFail;
5164 
5165   if (Fmt == UFMT_UNDEF)
5166     return MatchOperand_NoMatch;
5167 
5168   Format = Fmt;
5169   return MatchOperand_Success;
5170 }
5171 
5172 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5173                                     int64_t &Nfmt,
5174                                     StringRef FormatStr,
5175                                     SMLoc Loc) {
5176   using namespace llvm::AMDGPU::MTBUFFormat;
5177   int64_t Format;
5178 
5179   Format = getDfmt(FormatStr);
5180   if (Format != DFMT_UNDEF) {
5181     Dfmt = Format;
5182     return true;
5183   }
5184 
5185   Format = getNfmt(FormatStr, getSTI());
5186   if (Format != NFMT_UNDEF) {
5187     Nfmt = Format;
5188     return true;
5189   }
5190 
5191   Error(Loc, "unsupported format");
5192   return false;
5193 }
5194 
5195 OperandMatchResultTy
5196 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5197                                           SMLoc FormatLoc,
5198                                           int64_t &Format) {
5199   using namespace llvm::AMDGPU::MTBUFFormat;
5200 
5201   int64_t Dfmt = DFMT_UNDEF;
5202   int64_t Nfmt = NFMT_UNDEF;
5203   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5204     return MatchOperand_ParseFail;
5205 
5206   if (trySkipToken(AsmToken::Comma)) {
5207     StringRef Str;
5208     SMLoc Loc = getLoc();
5209     if (!parseId(Str, "expected a format string") ||
5210         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5211       return MatchOperand_ParseFail;
5212     }
5213     if (Dfmt == DFMT_UNDEF) {
5214       Error(Loc, "duplicate numeric format");
5215       return MatchOperand_ParseFail;
5216     } else if (Nfmt == NFMT_UNDEF) {
5217       Error(Loc, "duplicate data format");
5218       return MatchOperand_ParseFail;
5219     }
5220   }
5221 
5222   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5223   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5224 
5225   if (isGFX10()) {
5226     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5227     if (Ufmt == UFMT_UNDEF) {
5228       Error(FormatLoc, "unsupported format");
5229       return MatchOperand_ParseFail;
5230     }
5231     Format = Ufmt;
5232   } else {
5233     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5234   }
5235 
5236   return MatchOperand_Success;
5237 }
5238 
5239 OperandMatchResultTy
5240 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5241                                             SMLoc Loc,
5242                                             int64_t &Format) {
5243   using namespace llvm::AMDGPU::MTBUFFormat;
5244 
5245   auto Id = getUnifiedFormat(FormatStr);
5246   if (Id == UFMT_UNDEF)
5247     return MatchOperand_NoMatch;
5248 
5249   if (!isGFX10()) {
5250     Error(Loc, "unified format is not supported on this GPU");
5251     return MatchOperand_ParseFail;
5252   }
5253 
5254   Format = Id;
5255   return MatchOperand_Success;
5256 }
5257 
5258 OperandMatchResultTy
5259 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5260   using namespace llvm::AMDGPU::MTBUFFormat;
5261   SMLoc Loc = getLoc();
5262 
5263   if (!parseExpr(Format))
5264     return MatchOperand_ParseFail;
5265   if (!isValidFormatEncoding(Format, getSTI())) {
5266     Error(Loc, "out of range format");
5267     return MatchOperand_ParseFail;
5268   }
5269 
5270   return MatchOperand_Success;
5271 }
5272 
5273 OperandMatchResultTy
5274 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5275   using namespace llvm::AMDGPU::MTBUFFormat;
5276 
5277   if (!trySkipId("format", AsmToken::Colon))
5278     return MatchOperand_NoMatch;
5279 
5280   if (trySkipToken(AsmToken::LBrac)) {
5281     StringRef FormatStr;
5282     SMLoc Loc = getLoc();
5283     if (!parseId(FormatStr, "expected a format string"))
5284       return MatchOperand_ParseFail;
5285 
5286     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5287     if (Res == MatchOperand_NoMatch)
5288       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5289     if (Res != MatchOperand_Success)
5290       return Res;
5291 
5292     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5293       return MatchOperand_ParseFail;
5294 
5295     return MatchOperand_Success;
5296   }
5297 
5298   return parseNumericFormat(Format);
5299 }
5300 
5301 OperandMatchResultTy
5302 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5303   using namespace llvm::AMDGPU::MTBUFFormat;
5304 
5305   int64_t Format = getDefaultFormatEncoding(getSTI());
5306   OperandMatchResultTy Res;
5307   SMLoc Loc = getLoc();
5308 
5309   // Parse legacy format syntax.
5310   Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5311   if (Res == MatchOperand_ParseFail)
5312     return Res;
5313 
5314   bool FormatFound = (Res == MatchOperand_Success);
5315 
5316   Operands.push_back(
5317     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5318 
5319   if (FormatFound)
5320     trySkipToken(AsmToken::Comma);
5321 
5322   if (isToken(AsmToken::EndOfStatement)) {
5323     // We are expecting an soffset operand,
5324     // but let matcher handle the error.
5325     return MatchOperand_Success;
5326   }
5327 
5328   // Parse soffset.
5329   Res = parseRegOrImm(Operands);
5330   if (Res != MatchOperand_Success)
5331     return Res;
5332 
5333   trySkipToken(AsmToken::Comma);
5334 
5335   if (!FormatFound) {
5336     Res = parseSymbolicOrNumericFormat(Format);
5337     if (Res == MatchOperand_ParseFail)
5338       return Res;
5339     if (Res == MatchOperand_Success) {
5340       auto Size = Operands.size();
5341       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5342       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5343       Op.setImm(Format);
5344     }
5345     return MatchOperand_Success;
5346   }
5347 
5348   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5349     Error(getLoc(), "duplicate format");
5350     return MatchOperand_ParseFail;
5351   }
5352   return MatchOperand_Success;
5353 }
5354 
5355 //===----------------------------------------------------------------------===//
5356 // ds
5357 //===----------------------------------------------------------------------===//
5358 
5359 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5360                                     const OperandVector &Operands) {
5361   OptionalImmIndexMap OptionalIdx;
5362 
5363   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5364     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5365 
5366     // Add the register arguments
5367     if (Op.isReg()) {
5368       Op.addRegOperands(Inst, 1);
5369       continue;
5370     }
5371 
5372     // Handle optional arguments
5373     OptionalIdx[Op.getImmTy()] = i;
5374   }
5375 
5376   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5377   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5378   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5379 
5380   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5381 }
5382 
5383 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5384                                 bool IsGdsHardcoded) {
5385   OptionalImmIndexMap OptionalIdx;
5386 
5387   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5388     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5389 
5390     // Add the register arguments
5391     if (Op.isReg()) {
5392       Op.addRegOperands(Inst, 1);
5393       continue;
5394     }
5395 
5396     if (Op.isToken() && Op.getToken() == "gds") {
5397       IsGdsHardcoded = true;
5398       continue;
5399     }
5400 
5401     // Handle optional arguments
5402     OptionalIdx[Op.getImmTy()] = i;
5403   }
5404 
5405   AMDGPUOperand::ImmTy OffsetType =
5406     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5407      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5408      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5409                                                       AMDGPUOperand::ImmTyOffset;
5410 
5411   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5412 
5413   if (!IsGdsHardcoded) {
5414     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5415   }
5416   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5417 }
5418 
5419 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5420   OptionalImmIndexMap OptionalIdx;
5421 
5422   unsigned OperandIdx[4];
5423   unsigned EnMask = 0;
5424   int SrcIdx = 0;
5425 
5426   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5427     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5428 
5429     // Add the register arguments
5430     if (Op.isReg()) {
5431       assert(SrcIdx < 4);
5432       OperandIdx[SrcIdx] = Inst.size();
5433       Op.addRegOperands(Inst, 1);
5434       ++SrcIdx;
5435       continue;
5436     }
5437 
5438     if (Op.isOff()) {
5439       assert(SrcIdx < 4);
5440       OperandIdx[SrcIdx] = Inst.size();
5441       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5442       ++SrcIdx;
5443       continue;
5444     }
5445 
5446     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5447       Op.addImmOperands(Inst, 1);
5448       continue;
5449     }
5450 
5451     if (Op.isToken() && Op.getToken() == "done")
5452       continue;
5453 
5454     // Handle optional arguments
5455     OptionalIdx[Op.getImmTy()] = i;
5456   }
5457 
5458   assert(SrcIdx == 4);
5459 
5460   bool Compr = false;
5461   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5462     Compr = true;
5463     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5464     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5465     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5466   }
5467 
5468   for (auto i = 0; i < SrcIdx; ++i) {
5469     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5470       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5471     }
5472   }
5473 
5474   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5475   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5476 
5477   Inst.addOperand(MCOperand::createImm(EnMask));
5478 }
5479 
5480 //===----------------------------------------------------------------------===//
5481 // s_waitcnt
5482 //===----------------------------------------------------------------------===//
5483 
5484 static bool
5485 encodeCnt(
5486   const AMDGPU::IsaVersion ISA,
5487   int64_t &IntVal,
5488   int64_t CntVal,
5489   bool Saturate,
5490   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5491   unsigned (*decode)(const IsaVersion &Version, unsigned))
5492 {
5493   bool Failed = false;
5494 
5495   IntVal = encode(ISA, IntVal, CntVal);
5496   if (CntVal != decode(ISA, IntVal)) {
5497     if (Saturate) {
5498       IntVal = encode(ISA, IntVal, -1);
5499     } else {
5500       Failed = true;
5501     }
5502   }
5503   return Failed;
5504 }
5505 
5506 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5507 
5508   SMLoc CntLoc = getLoc();
5509   StringRef CntName = getTokenStr();
5510 
5511   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5512       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5513     return false;
5514 
5515   int64_t CntVal;
5516   SMLoc ValLoc = getLoc();
5517   if (!parseExpr(CntVal))
5518     return false;
5519 
5520   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5521 
5522   bool Failed = true;
5523   bool Sat = CntName.endswith("_sat");
5524 
5525   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5526     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5527   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5528     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5529   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5530     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5531   } else {
5532     Error(CntLoc, "invalid counter name " + CntName);
5533     return false;
5534   }
5535 
5536   if (Failed) {
5537     Error(ValLoc, "too large value for " + CntName);
5538     return false;
5539   }
5540 
5541   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5542     return false;
5543 
5544   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5545     if (isToken(AsmToken::EndOfStatement)) {
5546       Error(getLoc(), "expected a counter name");
5547       return false;
5548     }
5549   }
5550 
5551   return true;
5552 }
5553 
5554 OperandMatchResultTy
5555 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5556   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5557   int64_t Waitcnt = getWaitcntBitMask(ISA);
5558   SMLoc S = getLoc();
5559 
5560   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5561     while (!isToken(AsmToken::EndOfStatement)) {
5562       if (!parseCnt(Waitcnt))
5563         return MatchOperand_ParseFail;
5564     }
5565   } else {
5566     if (!parseExpr(Waitcnt))
5567       return MatchOperand_ParseFail;
5568   }
5569 
5570   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5571   return MatchOperand_Success;
5572 }
5573 
5574 bool
5575 AMDGPUOperand::isSWaitCnt() const {
5576   return isImm();
5577 }
5578 
5579 //===----------------------------------------------------------------------===//
5580 // hwreg
5581 //===----------------------------------------------------------------------===//
5582 
5583 bool
5584 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5585                                 int64_t &Offset,
5586                                 int64_t &Width) {
5587   using namespace llvm::AMDGPU::Hwreg;
5588 
5589   // The register may be specified by name or using a numeric code
5590   if (isToken(AsmToken::Identifier) &&
5591       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5592     HwReg.IsSymbolic = true;
5593     lex(); // skip message name
5594   } else if (!parseExpr(HwReg.Id)) {
5595     return false;
5596   }
5597 
5598   if (trySkipToken(AsmToken::RParen))
5599     return true;
5600 
5601   // parse optional params
5602   return
5603     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
5604     parseExpr(Offset) &&
5605     skipToken(AsmToken::Comma, "expected a comma") &&
5606     parseExpr(Width) &&
5607     skipToken(AsmToken::RParen, "expected a closing parenthesis");
5608 }
5609 
5610 bool
5611 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5612                                const int64_t Offset,
5613                                const int64_t Width,
5614                                const SMLoc Loc) {
5615 
5616   using namespace llvm::AMDGPU::Hwreg;
5617 
5618   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5619     Error(Loc, "specified hardware register is not supported on this GPU");
5620     return false;
5621   } else if (!isValidHwreg(HwReg.Id)) {
5622     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
5623     return false;
5624   } else if (!isValidHwregOffset(Offset)) {
5625     Error(Loc, "invalid bit offset: only 5-bit values are legal");
5626     return false;
5627   } else if (!isValidHwregWidth(Width)) {
5628     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
5629     return false;
5630   }
5631   return true;
5632 }
5633 
5634 OperandMatchResultTy
5635 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5636   using namespace llvm::AMDGPU::Hwreg;
5637 
5638   int64_t ImmVal = 0;
5639   SMLoc Loc = getLoc();
5640 
5641   if (trySkipId("hwreg", AsmToken::LParen)) {
5642     OperandInfoTy HwReg(ID_UNKNOWN_);
5643     int64_t Offset = OFFSET_DEFAULT_;
5644     int64_t Width = WIDTH_DEFAULT_;
5645     if (parseHwregBody(HwReg, Offset, Width) &&
5646         validateHwreg(HwReg, Offset, Width, Loc)) {
5647       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5648     } else {
5649       return MatchOperand_ParseFail;
5650     }
5651   } else if (parseExpr(ImmVal)) {
5652     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5653       Error(Loc, "invalid immediate: only 16-bit values are legal");
5654       return MatchOperand_ParseFail;
5655     }
5656   } else {
5657     return MatchOperand_ParseFail;
5658   }
5659 
5660   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5661   return MatchOperand_Success;
5662 }
5663 
5664 bool AMDGPUOperand::isHwreg() const {
5665   return isImmTy(ImmTyHwreg);
5666 }
5667 
5668 //===----------------------------------------------------------------------===//
5669 // sendmsg
5670 //===----------------------------------------------------------------------===//
5671 
5672 bool
5673 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5674                                   OperandInfoTy &Op,
5675                                   OperandInfoTy &Stream) {
5676   using namespace llvm::AMDGPU::SendMsg;
5677 
5678   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5679     Msg.IsSymbolic = true;
5680     lex(); // skip message name
5681   } else if (!parseExpr(Msg.Id)) {
5682     return false;
5683   }
5684 
5685   if (trySkipToken(AsmToken::Comma)) {
5686     Op.IsDefined = true;
5687     if (isToken(AsmToken::Identifier) &&
5688         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5689       lex(); // skip operation name
5690     } else if (!parseExpr(Op.Id)) {
5691       return false;
5692     }
5693 
5694     if (trySkipToken(AsmToken::Comma)) {
5695       Stream.IsDefined = true;
5696       if (!parseExpr(Stream.Id))
5697         return false;
5698     }
5699   }
5700 
5701   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5702 }
5703 
5704 bool
5705 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5706                                  const OperandInfoTy &Op,
5707                                  const OperandInfoTy &Stream,
5708                                  const SMLoc S) {
5709   using namespace llvm::AMDGPU::SendMsg;
5710 
5711   // Validation strictness depends on whether message is specified
5712   // in a symbolc or in a numeric form. In the latter case
5713   // only encoding possibility is checked.
5714   bool Strict = Msg.IsSymbolic;
5715 
5716   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5717     Error(S, "invalid message id");
5718     return false;
5719   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5720     Error(S, Op.IsDefined ?
5721              "message does not support operations" :
5722              "missing message operation");
5723     return false;
5724   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5725     Error(S, "invalid operation id");
5726     return false;
5727   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5728     Error(S, "message operation does not support streams");
5729     return false;
5730   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5731     Error(S, "invalid message stream id");
5732     return false;
5733   }
5734   return true;
5735 }
5736 
5737 OperandMatchResultTy
5738 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5739   using namespace llvm::AMDGPU::SendMsg;
5740 
5741   int64_t ImmVal = 0;
5742   SMLoc Loc = getLoc();
5743 
5744   if (trySkipId("sendmsg", AsmToken::LParen)) {
5745     OperandInfoTy Msg(ID_UNKNOWN_);
5746     OperandInfoTy Op(OP_NONE_);
5747     OperandInfoTy Stream(STREAM_ID_NONE_);
5748     if (parseSendMsgBody(Msg, Op, Stream) &&
5749         validateSendMsg(Msg, Op, Stream, Loc)) {
5750       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5751     } else {
5752       return MatchOperand_ParseFail;
5753     }
5754   } else if (parseExpr(ImmVal)) {
5755     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5756       Error(Loc, "invalid immediate: only 16-bit values are legal");
5757       return MatchOperand_ParseFail;
5758     }
5759   } else {
5760     return MatchOperand_ParseFail;
5761   }
5762 
5763   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5764   return MatchOperand_Success;
5765 }
5766 
5767 bool AMDGPUOperand::isSendMsg() const {
5768   return isImmTy(ImmTySendMsg);
5769 }
5770 
5771 //===----------------------------------------------------------------------===//
5772 // v_interp
5773 //===----------------------------------------------------------------------===//
5774 
5775 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5776   if (getLexer().getKind() != AsmToken::Identifier)
5777     return MatchOperand_NoMatch;
5778 
5779   StringRef Str = Parser.getTok().getString();
5780   int Slot = StringSwitch<int>(Str)
5781     .Case("p10", 0)
5782     .Case("p20", 1)
5783     .Case("p0", 2)
5784     .Default(-1);
5785 
5786   SMLoc S = Parser.getTok().getLoc();
5787   if (Slot == -1)
5788     return MatchOperand_ParseFail;
5789 
5790   Parser.Lex();
5791   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5792                                               AMDGPUOperand::ImmTyInterpSlot));
5793   return MatchOperand_Success;
5794 }
5795 
5796 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5797   if (getLexer().getKind() != AsmToken::Identifier)
5798     return MatchOperand_NoMatch;
5799 
5800   StringRef Str = Parser.getTok().getString();
5801   if (!Str.startswith("attr"))
5802     return MatchOperand_NoMatch;
5803 
5804   StringRef Chan = Str.take_back(2);
5805   int AttrChan = StringSwitch<int>(Chan)
5806     .Case(".x", 0)
5807     .Case(".y", 1)
5808     .Case(".z", 2)
5809     .Case(".w", 3)
5810     .Default(-1);
5811   if (AttrChan == -1)
5812     return MatchOperand_ParseFail;
5813 
5814   Str = Str.drop_back(2).drop_front(4);
5815 
5816   uint8_t Attr;
5817   if (Str.getAsInteger(10, Attr))
5818     return MatchOperand_ParseFail;
5819 
5820   SMLoc S = Parser.getTok().getLoc();
5821   Parser.Lex();
5822   if (Attr > 63) {
5823     Error(S, "out of bounds attr");
5824     return MatchOperand_ParseFail;
5825   }
5826 
5827   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5828 
5829   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5830                                               AMDGPUOperand::ImmTyInterpAttr));
5831   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5832                                               AMDGPUOperand::ImmTyAttrChan));
5833   return MatchOperand_Success;
5834 }
5835 
5836 //===----------------------------------------------------------------------===//
5837 // exp
5838 //===----------------------------------------------------------------------===//
5839 
5840 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5841                                                       uint8_t &Val) {
5842   if (Str == "null") {
5843     Val = 9;
5844     return MatchOperand_Success;
5845   }
5846 
5847   if (Str.startswith("mrt")) {
5848     Str = Str.drop_front(3);
5849     if (Str == "z") { // == mrtz
5850       Val = 8;
5851       return MatchOperand_Success;
5852     }
5853 
5854     if (Str.getAsInteger(10, Val))
5855       return MatchOperand_ParseFail;
5856 
5857     if (Val > 7) {
5858       Error(getLoc(), "invalid exp target");
5859       return MatchOperand_ParseFail;
5860     }
5861 
5862     return MatchOperand_Success;
5863   }
5864 
5865   if (Str.startswith("pos")) {
5866     Str = Str.drop_front(3);
5867     if (Str.getAsInteger(10, Val))
5868       return MatchOperand_ParseFail;
5869 
5870     if (Val > 4 || (Val == 4 && !isGFX10())) {
5871       Error(getLoc(), "invalid exp target");
5872       return MatchOperand_ParseFail;
5873     }
5874 
5875     Val += 12;
5876     return MatchOperand_Success;
5877   }
5878 
5879   if (isGFX10() && Str == "prim") {
5880     Val = 20;
5881     return MatchOperand_Success;
5882   }
5883 
5884   if (Str.startswith("param")) {
5885     Str = Str.drop_front(5);
5886     if (Str.getAsInteger(10, Val))
5887       return MatchOperand_ParseFail;
5888 
5889     if (Val >= 32) {
5890       Error(getLoc(), "invalid exp target");
5891       return MatchOperand_ParseFail;
5892     }
5893 
5894     Val += 32;
5895     return MatchOperand_Success;
5896   }
5897 
5898   if (Str.startswith("invalid_target_")) {
5899     Str = Str.drop_front(15);
5900     if (Str.getAsInteger(10, Val))
5901       return MatchOperand_ParseFail;
5902 
5903     Error(getLoc(), "invalid exp target");
5904     return MatchOperand_ParseFail;
5905   }
5906 
5907   return MatchOperand_NoMatch;
5908 }
5909 
5910 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5911   uint8_t Val;
5912   StringRef Str = Parser.getTok().getString();
5913 
5914   auto Res = parseExpTgtImpl(Str, Val);
5915   if (Res != MatchOperand_Success)
5916     return Res;
5917 
5918   SMLoc S = Parser.getTok().getLoc();
5919   Parser.Lex();
5920 
5921   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5922                                               AMDGPUOperand::ImmTyExpTgt));
5923   return MatchOperand_Success;
5924 }
5925 
5926 //===----------------------------------------------------------------------===//
5927 // parser helpers
5928 //===----------------------------------------------------------------------===//
5929 
5930 bool
5931 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5932   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5933 }
5934 
5935 bool
5936 AMDGPUAsmParser::isId(const StringRef Id) const {
5937   return isId(getToken(), Id);
5938 }
5939 
5940 bool
5941 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5942   return getTokenKind() == Kind;
5943 }
5944 
5945 bool
5946 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5947   if (isId(Id)) {
5948     lex();
5949     return true;
5950   }
5951   return false;
5952 }
5953 
5954 bool
5955 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5956   if (isId(Id) && peekToken().is(Kind)) {
5957     lex();
5958     lex();
5959     return true;
5960   }
5961   return false;
5962 }
5963 
5964 bool
5965 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5966   if (isToken(Kind)) {
5967     lex();
5968     return true;
5969   }
5970   return false;
5971 }
5972 
5973 bool
5974 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5975                            const StringRef ErrMsg) {
5976   if (!trySkipToken(Kind)) {
5977     Error(getLoc(), ErrMsg);
5978     return false;
5979   }
5980   return true;
5981 }
5982 
5983 bool
5984 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5985   return !getParser().parseAbsoluteExpression(Imm);
5986 }
5987 
5988 bool
5989 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5990   SMLoc S = getLoc();
5991 
5992   const MCExpr *Expr;
5993   if (Parser.parseExpression(Expr))
5994     return false;
5995 
5996   int64_t IntVal;
5997   if (Expr->evaluateAsAbsolute(IntVal)) {
5998     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5999   } else {
6000     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6001   }
6002   return true;
6003 }
6004 
6005 bool
6006 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6007   if (isToken(AsmToken::String)) {
6008     Val = getToken().getStringContents();
6009     lex();
6010     return true;
6011   } else {
6012     Error(getLoc(), ErrMsg);
6013     return false;
6014   }
6015 }
6016 
6017 bool
6018 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6019   if (isToken(AsmToken::Identifier)) {
6020     Val = getTokenStr();
6021     lex();
6022     return true;
6023   } else {
6024     Error(getLoc(), ErrMsg);
6025     return false;
6026   }
6027 }
6028 
6029 AsmToken
6030 AMDGPUAsmParser::getToken() const {
6031   return Parser.getTok();
6032 }
6033 
6034 AsmToken
6035 AMDGPUAsmParser::peekToken() {
6036   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6037 }
6038 
6039 void
6040 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6041   auto TokCount = getLexer().peekTokens(Tokens);
6042 
6043   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6044     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6045 }
6046 
6047 AsmToken::TokenKind
6048 AMDGPUAsmParser::getTokenKind() const {
6049   return getLexer().getKind();
6050 }
6051 
6052 SMLoc
6053 AMDGPUAsmParser::getLoc() const {
6054   return getToken().getLoc();
6055 }
6056 
6057 StringRef
6058 AMDGPUAsmParser::getTokenStr() const {
6059   return getToken().getString();
6060 }
6061 
6062 void
6063 AMDGPUAsmParser::lex() {
6064   Parser.Lex();
6065 }
6066 
6067 //===----------------------------------------------------------------------===//
6068 // swizzle
6069 //===----------------------------------------------------------------------===//
6070 
6071 LLVM_READNONE
6072 static unsigned
6073 encodeBitmaskPerm(const unsigned AndMask,
6074                   const unsigned OrMask,
6075                   const unsigned XorMask) {
6076   using namespace llvm::AMDGPU::Swizzle;
6077 
6078   return BITMASK_PERM_ENC |
6079          (AndMask << BITMASK_AND_SHIFT) |
6080          (OrMask  << BITMASK_OR_SHIFT)  |
6081          (XorMask << BITMASK_XOR_SHIFT);
6082 }
6083 
6084 bool
6085 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6086                                       const unsigned MinVal,
6087                                       const unsigned MaxVal,
6088                                       const StringRef ErrMsg) {
6089   for (unsigned i = 0; i < OpNum; ++i) {
6090     if (!skipToken(AsmToken::Comma, "expected a comma")){
6091       return false;
6092     }
6093     SMLoc ExprLoc = Parser.getTok().getLoc();
6094     if (!parseExpr(Op[i])) {
6095       return false;
6096     }
6097     if (Op[i] < MinVal || Op[i] > MaxVal) {
6098       Error(ExprLoc, ErrMsg);
6099       return false;
6100     }
6101   }
6102 
6103   return true;
6104 }
6105 
6106 bool
6107 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6108   using namespace llvm::AMDGPU::Swizzle;
6109 
6110   int64_t Lane[LANE_NUM];
6111   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6112                            "expected a 2-bit lane id")) {
6113     Imm = QUAD_PERM_ENC;
6114     for (unsigned I = 0; I < LANE_NUM; ++I) {
6115       Imm |= Lane[I] << (LANE_SHIFT * I);
6116     }
6117     return true;
6118   }
6119   return false;
6120 }
6121 
6122 bool
6123 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6124   using namespace llvm::AMDGPU::Swizzle;
6125 
6126   SMLoc S = Parser.getTok().getLoc();
6127   int64_t GroupSize;
6128   int64_t LaneIdx;
6129 
6130   if (!parseSwizzleOperands(1, &GroupSize,
6131                             2, 32,
6132                             "group size must be in the interval [2,32]")) {
6133     return false;
6134   }
6135   if (!isPowerOf2_64(GroupSize)) {
6136     Error(S, "group size must be a power of two");
6137     return false;
6138   }
6139   if (parseSwizzleOperands(1, &LaneIdx,
6140                            0, GroupSize - 1,
6141                            "lane id must be in the interval [0,group size - 1]")) {
6142     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6143     return true;
6144   }
6145   return false;
6146 }
6147 
6148 bool
6149 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6150   using namespace llvm::AMDGPU::Swizzle;
6151 
6152   SMLoc S = Parser.getTok().getLoc();
6153   int64_t GroupSize;
6154 
6155   if (!parseSwizzleOperands(1, &GroupSize,
6156       2, 32, "group size must be in the interval [2,32]")) {
6157     return false;
6158   }
6159   if (!isPowerOf2_64(GroupSize)) {
6160     Error(S, "group size must be a power of two");
6161     return false;
6162   }
6163 
6164   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6165   return true;
6166 }
6167 
6168 bool
6169 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6170   using namespace llvm::AMDGPU::Swizzle;
6171 
6172   SMLoc S = Parser.getTok().getLoc();
6173   int64_t GroupSize;
6174 
6175   if (!parseSwizzleOperands(1, &GroupSize,
6176       1, 16, "group size must be in the interval [1,16]")) {
6177     return false;
6178   }
6179   if (!isPowerOf2_64(GroupSize)) {
6180     Error(S, "group size must be a power of two");
6181     return false;
6182   }
6183 
6184   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6185   return true;
6186 }
6187 
6188 bool
6189 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6190   using namespace llvm::AMDGPU::Swizzle;
6191 
6192   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6193     return false;
6194   }
6195 
6196   StringRef Ctl;
6197   SMLoc StrLoc = Parser.getTok().getLoc();
6198   if (!parseString(Ctl)) {
6199     return false;
6200   }
6201   if (Ctl.size() != BITMASK_WIDTH) {
6202     Error(StrLoc, "expected a 5-character mask");
6203     return false;
6204   }
6205 
6206   unsigned AndMask = 0;
6207   unsigned OrMask = 0;
6208   unsigned XorMask = 0;
6209 
6210   for (size_t i = 0; i < Ctl.size(); ++i) {
6211     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6212     switch(Ctl[i]) {
6213     default:
6214       Error(StrLoc, "invalid mask");
6215       return false;
6216     case '0':
6217       break;
6218     case '1':
6219       OrMask |= Mask;
6220       break;
6221     case 'p':
6222       AndMask |= Mask;
6223       break;
6224     case 'i':
6225       AndMask |= Mask;
6226       XorMask |= Mask;
6227       break;
6228     }
6229   }
6230 
6231   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6232   return true;
6233 }
6234 
6235 bool
6236 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6237 
6238   SMLoc OffsetLoc = Parser.getTok().getLoc();
6239 
6240   if (!parseExpr(Imm)) {
6241     return false;
6242   }
6243   if (!isUInt<16>(Imm)) {
6244     Error(OffsetLoc, "expected a 16-bit offset");
6245     return false;
6246   }
6247   return true;
6248 }
6249 
6250 bool
6251 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6252   using namespace llvm::AMDGPU::Swizzle;
6253 
6254   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6255 
6256     SMLoc ModeLoc = Parser.getTok().getLoc();
6257     bool Ok = false;
6258 
6259     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6260       Ok = parseSwizzleQuadPerm(Imm);
6261     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6262       Ok = parseSwizzleBitmaskPerm(Imm);
6263     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6264       Ok = parseSwizzleBroadcast(Imm);
6265     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6266       Ok = parseSwizzleSwap(Imm);
6267     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6268       Ok = parseSwizzleReverse(Imm);
6269     } else {
6270       Error(ModeLoc, "expected a swizzle mode");
6271     }
6272 
6273     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6274   }
6275 
6276   return false;
6277 }
6278 
6279 OperandMatchResultTy
6280 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6281   SMLoc S = Parser.getTok().getLoc();
6282   int64_t Imm = 0;
6283 
6284   if (trySkipId("offset")) {
6285 
6286     bool Ok = false;
6287     if (skipToken(AsmToken::Colon, "expected a colon")) {
6288       if (trySkipId("swizzle")) {
6289         Ok = parseSwizzleMacro(Imm);
6290       } else {
6291         Ok = parseSwizzleOffset(Imm);
6292       }
6293     }
6294 
6295     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6296 
6297     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6298   } else {
6299     // Swizzle "offset" operand is optional.
6300     // If it is omitted, try parsing other optional operands.
6301     return parseOptionalOpr(Operands);
6302   }
6303 }
6304 
6305 bool
6306 AMDGPUOperand::isSwizzle() const {
6307   return isImmTy(ImmTySwizzle);
6308 }
6309 
6310 //===----------------------------------------------------------------------===//
6311 // VGPR Index Mode
6312 //===----------------------------------------------------------------------===//
6313 
6314 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6315 
6316   using namespace llvm::AMDGPU::VGPRIndexMode;
6317 
6318   if (trySkipToken(AsmToken::RParen)) {
6319     return OFF;
6320   }
6321 
6322   int64_t Imm = 0;
6323 
6324   while (true) {
6325     unsigned Mode = 0;
6326     SMLoc S = Parser.getTok().getLoc();
6327 
6328     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6329       if (trySkipId(IdSymbolic[ModeId])) {
6330         Mode = 1 << ModeId;
6331         break;
6332       }
6333     }
6334 
6335     if (Mode == 0) {
6336       Error(S, (Imm == 0)?
6337                "expected a VGPR index mode or a closing parenthesis" :
6338                "expected a VGPR index mode");
6339       return UNDEF;
6340     }
6341 
6342     if (Imm & Mode) {
6343       Error(S, "duplicate VGPR index mode");
6344       return UNDEF;
6345     }
6346     Imm |= Mode;
6347 
6348     if (trySkipToken(AsmToken::RParen))
6349       break;
6350     if (!skipToken(AsmToken::Comma,
6351                    "expected a comma or a closing parenthesis"))
6352       return UNDEF;
6353   }
6354 
6355   return Imm;
6356 }
6357 
6358 OperandMatchResultTy
6359 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6360 
6361   using namespace llvm::AMDGPU::VGPRIndexMode;
6362 
6363   int64_t Imm = 0;
6364   SMLoc S = Parser.getTok().getLoc();
6365 
6366   if (getLexer().getKind() == AsmToken::Identifier &&
6367       Parser.getTok().getString() == "gpr_idx" &&
6368       getLexer().peekTok().is(AsmToken::LParen)) {
6369 
6370     Parser.Lex();
6371     Parser.Lex();
6372 
6373     Imm = parseGPRIdxMacro();
6374     if (Imm == UNDEF)
6375       return MatchOperand_ParseFail;
6376 
6377   } else {
6378     if (getParser().parseAbsoluteExpression(Imm))
6379       return MatchOperand_ParseFail;
6380     if (Imm < 0 || !isUInt<4>(Imm)) {
6381       Error(S, "invalid immediate: only 4-bit values are legal");
6382       return MatchOperand_ParseFail;
6383     }
6384   }
6385 
6386   Operands.push_back(
6387       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6388   return MatchOperand_Success;
6389 }
6390 
6391 bool AMDGPUOperand::isGPRIdxMode() const {
6392   return isImmTy(ImmTyGprIdxMode);
6393 }
6394 
6395 //===----------------------------------------------------------------------===//
6396 // sopp branch targets
6397 //===----------------------------------------------------------------------===//
6398 
6399 OperandMatchResultTy
6400 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6401 
6402   // Make sure we are not parsing something
6403   // that looks like a label or an expression but is not.
6404   // This will improve error messages.
6405   if (isRegister() || isModifier())
6406     return MatchOperand_NoMatch;
6407 
6408   if (!parseExpr(Operands))
6409     return MatchOperand_ParseFail;
6410 
6411   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6412   assert(Opr.isImm() || Opr.isExpr());
6413   SMLoc Loc = Opr.getStartLoc();
6414 
6415   // Currently we do not support arbitrary expressions as branch targets.
6416   // Only labels and absolute expressions are accepted.
6417   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6418     Error(Loc, "expected an absolute expression or a label");
6419   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6420     Error(Loc, "expected a 16-bit signed jump offset");
6421   }
6422 
6423   return MatchOperand_Success;
6424 }
6425 
6426 //===----------------------------------------------------------------------===//
6427 // Boolean holding registers
6428 //===----------------------------------------------------------------------===//
6429 
6430 OperandMatchResultTy
6431 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6432   return parseReg(Operands);
6433 }
6434 
6435 //===----------------------------------------------------------------------===//
6436 // mubuf
6437 //===----------------------------------------------------------------------===//
6438 
6439 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
6440   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
6441 }
6442 
6443 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
6444   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
6445 }
6446 
6447 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const {
6448   return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC);
6449 }
6450 
6451 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
6452   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
6453 }
6454 
6455 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6456                                const OperandVector &Operands,
6457                                bool IsAtomic,
6458                                bool IsAtomicReturn,
6459                                bool IsLds) {
6460   bool IsLdsOpcode = IsLds;
6461   bool HasLdsModifier = false;
6462   OptionalImmIndexMap OptionalIdx;
6463   assert(IsAtomicReturn ? IsAtomic : true);
6464   unsigned FirstOperandIdx = 1;
6465 
6466   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6467     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6468 
6469     // Add the register arguments
6470     if (Op.isReg()) {
6471       Op.addRegOperands(Inst, 1);
6472       // Insert a tied src for atomic return dst.
6473       // This cannot be postponed as subsequent calls to
6474       // addImmOperands rely on correct number of MC operands.
6475       if (IsAtomicReturn && i == FirstOperandIdx)
6476         Op.addRegOperands(Inst, 1);
6477       continue;
6478     }
6479 
6480     // Handle the case where soffset is an immediate
6481     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6482       Op.addImmOperands(Inst, 1);
6483       continue;
6484     }
6485 
6486     HasLdsModifier |= Op.isLDS();
6487 
6488     // Handle tokens like 'offen' which are sometimes hard-coded into the
6489     // asm string.  There are no MCInst operands for these.
6490     if (Op.isToken()) {
6491       continue;
6492     }
6493     assert(Op.isImm());
6494 
6495     // Handle optional arguments
6496     OptionalIdx[Op.getImmTy()] = i;
6497   }
6498 
6499   // This is a workaround for an llvm quirk which may result in an
6500   // incorrect instruction selection. Lds and non-lds versions of
6501   // MUBUF instructions are identical except that lds versions
6502   // have mandatory 'lds' modifier. However this modifier follows
6503   // optional modifiers and llvm asm matcher regards this 'lds'
6504   // modifier as an optional one. As a result, an lds version
6505   // of opcode may be selected even if it has no 'lds' modifier.
6506   if (IsLdsOpcode && !HasLdsModifier) {
6507     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6508     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6509       Inst.setOpcode(NoLdsOpcode);
6510       IsLdsOpcode = false;
6511     }
6512   }
6513 
6514   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6515   if (!IsAtomic || IsAtomicReturn) {
6516     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6517   }
6518   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6519 
6520   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6521     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6522   }
6523 
6524   if (isGFX10())
6525     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6526 }
6527 
6528 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6529   OptionalImmIndexMap OptionalIdx;
6530 
6531   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6532     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6533 
6534     // Add the register arguments
6535     if (Op.isReg()) {
6536       Op.addRegOperands(Inst, 1);
6537       continue;
6538     }
6539 
6540     // Handle the case where soffset is an immediate
6541     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6542       Op.addImmOperands(Inst, 1);
6543       continue;
6544     }
6545 
6546     // Handle tokens like 'offen' which are sometimes hard-coded into the
6547     // asm string.  There are no MCInst operands for these.
6548     if (Op.isToken()) {
6549       continue;
6550     }
6551     assert(Op.isImm());
6552 
6553     // Handle optional arguments
6554     OptionalIdx[Op.getImmTy()] = i;
6555   }
6556 
6557   addOptionalImmOperand(Inst, Operands, OptionalIdx,
6558                         AMDGPUOperand::ImmTyOffset);
6559   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6560   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6561   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6562   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6563 
6564   if (isGFX10())
6565     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6566 }
6567 
6568 //===----------------------------------------------------------------------===//
6569 // mimg
6570 //===----------------------------------------------------------------------===//
6571 
6572 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6573                               bool IsAtomic) {
6574   unsigned I = 1;
6575   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6576   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6577     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6578   }
6579 
6580   if (IsAtomic) {
6581     // Add src, same as dst
6582     assert(Desc.getNumDefs() == 1);
6583     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6584   }
6585 
6586   OptionalImmIndexMap OptionalIdx;
6587 
6588   for (unsigned E = Operands.size(); I != E; ++I) {
6589     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6590 
6591     // Add the register arguments
6592     if (Op.isReg()) {
6593       Op.addRegOperands(Inst, 1);
6594     } else if (Op.isImmModifier()) {
6595       OptionalIdx[Op.getImmTy()] = I;
6596     } else if (!Op.isToken()) {
6597       llvm_unreachable("unexpected operand type");
6598     }
6599   }
6600 
6601   bool IsGFX10 = isGFX10();
6602 
6603   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6604   if (IsGFX10)
6605     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6606   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6607   if (IsGFX10)
6608     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6609   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6610   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6611   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6612   if (IsGFX10)
6613     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6614   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6615   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6616   if (!IsGFX10)
6617     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6618   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6619 }
6620 
6621 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6622   cvtMIMG(Inst, Operands, true);
6623 }
6624 
6625 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
6626                                       const OperandVector &Operands) {
6627   for (unsigned I = 1; I < Operands.size(); ++I) {
6628     auto &Operand = (AMDGPUOperand &)*Operands[I];
6629     if (Operand.isReg())
6630       Operand.addRegOperands(Inst, 1);
6631   }
6632 
6633   Inst.addOperand(MCOperand::createImm(1)); // a16
6634 }
6635 
6636 //===----------------------------------------------------------------------===//
6637 // smrd
6638 //===----------------------------------------------------------------------===//
6639 
6640 bool AMDGPUOperand::isSMRDOffset8() const {
6641   return isImm() && isUInt<8>(getImm());
6642 }
6643 
6644 bool AMDGPUOperand::isSMEMOffset() const {
6645   return isImm(); // Offset range is checked later by validator.
6646 }
6647 
6648 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6649   // 32-bit literals are only supported on CI and we only want to use them
6650   // when the offset is > 8-bits.
6651   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6652 }
6653 
6654 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6655   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6656 }
6657 
6658 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
6659   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6660 }
6661 
6662 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6663   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6664 }
6665 
6666 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6667   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6668 }
6669 
6670 //===----------------------------------------------------------------------===//
6671 // vop3
6672 //===----------------------------------------------------------------------===//
6673 
6674 static bool ConvertOmodMul(int64_t &Mul) {
6675   if (Mul != 1 && Mul != 2 && Mul != 4)
6676     return false;
6677 
6678   Mul >>= 1;
6679   return true;
6680 }
6681 
6682 static bool ConvertOmodDiv(int64_t &Div) {
6683   if (Div == 1) {
6684     Div = 0;
6685     return true;
6686   }
6687 
6688   if (Div == 2) {
6689     Div = 3;
6690     return true;
6691   }
6692 
6693   return false;
6694 }
6695 
6696 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6697   if (BoundCtrl == 0) {
6698     BoundCtrl = 1;
6699     return true;
6700   }
6701 
6702   if (BoundCtrl == -1) {
6703     BoundCtrl = 0;
6704     return true;
6705   }
6706 
6707   return false;
6708 }
6709 
6710 // Note: the order in this table matches the order of operands in AsmString.
6711 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6712   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6713   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6714   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6715   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6716   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6717   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6718   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6719   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6720   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6721   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6722   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6723   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6724   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6725   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6726   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6727   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6728   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6729   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6730   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6731   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6732   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6733   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6734   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6735   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6736   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6737   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6738   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6739   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6740   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6741   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6742   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6743   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6744   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6745   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6746   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6747   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6748   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6749   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6750   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6751   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6752   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6753   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6754   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6755 };
6756 
6757 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6758 
6759   OperandMatchResultTy res = parseOptionalOpr(Operands);
6760 
6761   // This is a hack to enable hardcoded mandatory operands which follow
6762   // optional operands.
6763   //
6764   // Current design assumes that all operands after the first optional operand
6765   // are also optional. However implementation of some instructions violates
6766   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6767   //
6768   // To alleviate this problem, we have to (implicitly) parse extra operands
6769   // to make sure autogenerated parser of custom operands never hit hardcoded
6770   // mandatory operands.
6771 
6772   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6773     if (res != MatchOperand_Success ||
6774         isToken(AsmToken::EndOfStatement))
6775       break;
6776 
6777     trySkipToken(AsmToken::Comma);
6778     res = parseOptionalOpr(Operands);
6779   }
6780 
6781   return res;
6782 }
6783 
6784 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6785   OperandMatchResultTy res;
6786   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6787     // try to parse any optional operand here
6788     if (Op.IsBit) {
6789       res = parseNamedBit(Op.Name, Operands, Op.Type);
6790     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6791       res = parseOModOperand(Operands);
6792     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6793                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6794                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6795       res = parseSDWASel(Operands, Op.Name, Op.Type);
6796     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6797       res = parseSDWADstUnused(Operands);
6798     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6799                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6800                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6801                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6802       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6803                                         Op.ConvertResult);
6804     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6805       res = parseDim(Operands);
6806     } else {
6807       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6808     }
6809     if (res != MatchOperand_NoMatch) {
6810       return res;
6811     }
6812   }
6813   return MatchOperand_NoMatch;
6814 }
6815 
6816 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6817   StringRef Name = Parser.getTok().getString();
6818   if (Name == "mul") {
6819     return parseIntWithPrefix("mul", Operands,
6820                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6821   }
6822 
6823   if (Name == "div") {
6824     return parseIntWithPrefix("div", Operands,
6825                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6826   }
6827 
6828   return MatchOperand_NoMatch;
6829 }
6830 
6831 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6832   cvtVOP3P(Inst, Operands);
6833 
6834   int Opc = Inst.getOpcode();
6835 
6836   int SrcNum;
6837   const int Ops[] = { AMDGPU::OpName::src0,
6838                       AMDGPU::OpName::src1,
6839                       AMDGPU::OpName::src2 };
6840   for (SrcNum = 0;
6841        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6842        ++SrcNum);
6843   assert(SrcNum > 0);
6844 
6845   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6846   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6847 
6848   if ((OpSel & (1 << SrcNum)) != 0) {
6849     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6850     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6851     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6852   }
6853 }
6854 
6855 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6856       // 1. This operand is input modifiers
6857   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6858       // 2. This is not last operand
6859       && Desc.NumOperands > (OpNum + 1)
6860       // 3. Next operand is register class
6861       && Desc.OpInfo[OpNum + 1].RegClass != -1
6862       // 4. Next register is not tied to any other operand
6863       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6864 }
6865 
6866 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6867 {
6868   OptionalImmIndexMap OptionalIdx;
6869   unsigned Opc = Inst.getOpcode();
6870 
6871   unsigned I = 1;
6872   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6873   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6874     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6875   }
6876 
6877   for (unsigned E = Operands.size(); I != E; ++I) {
6878     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6879     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6880       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6881     } else if (Op.isInterpSlot() ||
6882                Op.isInterpAttr() ||
6883                Op.isAttrChan()) {
6884       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6885     } else if (Op.isImmModifier()) {
6886       OptionalIdx[Op.getImmTy()] = I;
6887     } else {
6888       llvm_unreachable("unhandled operand type");
6889     }
6890   }
6891 
6892   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6893     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6894   }
6895 
6896   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6897     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6898   }
6899 
6900   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6901     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6902   }
6903 }
6904 
6905 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6906                               OptionalImmIndexMap &OptionalIdx) {
6907   unsigned Opc = Inst.getOpcode();
6908 
6909   unsigned I = 1;
6910   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6911   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6912     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6913   }
6914 
6915   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6916     // This instruction has src modifiers
6917     for (unsigned E = Operands.size(); I != E; ++I) {
6918       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6919       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6920         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6921       } else if (Op.isImmModifier()) {
6922         OptionalIdx[Op.getImmTy()] = I;
6923       } else if (Op.isRegOrImm()) {
6924         Op.addRegOrImmOperands(Inst, 1);
6925       } else {
6926         llvm_unreachable("unhandled operand type");
6927       }
6928     }
6929   } else {
6930     // No src modifiers
6931     for (unsigned E = Operands.size(); I != E; ++I) {
6932       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6933       if (Op.isMod()) {
6934         OptionalIdx[Op.getImmTy()] = I;
6935       } else {
6936         Op.addRegOrImmOperands(Inst, 1);
6937       }
6938     }
6939   }
6940 
6941   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6942     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6943   }
6944 
6945   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6946     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6947   }
6948 
6949   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6950   // it has src2 register operand that is tied to dst operand
6951   // we don't allow modifiers for this operand in assembler so src2_modifiers
6952   // should be 0.
6953   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6954       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6955       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6956       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
6957       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
6958       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6959       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6960       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6961       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
6962       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6963     auto it = Inst.begin();
6964     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6965     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6966     ++it;
6967     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6968   }
6969 }
6970 
6971 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6972   OptionalImmIndexMap OptionalIdx;
6973   cvtVOP3(Inst, Operands, OptionalIdx);
6974 }
6975 
6976 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6977                                const OperandVector &Operands) {
6978   OptionalImmIndexMap OptIdx;
6979   const int Opc = Inst.getOpcode();
6980   const MCInstrDesc &Desc = MII.get(Opc);
6981 
6982   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6983 
6984   cvtVOP3(Inst, Operands, OptIdx);
6985 
6986   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6987     assert(!IsPacked);
6988     Inst.addOperand(Inst.getOperand(0));
6989   }
6990 
6991   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6992   // instruction, and then figure out where to actually put the modifiers
6993 
6994   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6995 
6996   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6997   if (OpSelHiIdx != -1) {
6998     int DefaultVal = IsPacked ? -1 : 0;
6999     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7000                           DefaultVal);
7001   }
7002 
7003   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7004   if (NegLoIdx != -1) {
7005     assert(IsPacked);
7006     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7007     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7008   }
7009 
7010   const int Ops[] = { AMDGPU::OpName::src0,
7011                       AMDGPU::OpName::src1,
7012                       AMDGPU::OpName::src2 };
7013   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7014                          AMDGPU::OpName::src1_modifiers,
7015                          AMDGPU::OpName::src2_modifiers };
7016 
7017   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7018 
7019   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7020   unsigned OpSelHi = 0;
7021   unsigned NegLo = 0;
7022   unsigned NegHi = 0;
7023 
7024   if (OpSelHiIdx != -1) {
7025     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7026   }
7027 
7028   if (NegLoIdx != -1) {
7029     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7030     NegLo = Inst.getOperand(NegLoIdx).getImm();
7031     NegHi = Inst.getOperand(NegHiIdx).getImm();
7032   }
7033 
7034   for (int J = 0; J < 3; ++J) {
7035     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7036     if (OpIdx == -1)
7037       break;
7038 
7039     uint32_t ModVal = 0;
7040 
7041     if ((OpSel & (1 << J)) != 0)
7042       ModVal |= SISrcMods::OP_SEL_0;
7043 
7044     if ((OpSelHi & (1 << J)) != 0)
7045       ModVal |= SISrcMods::OP_SEL_1;
7046 
7047     if ((NegLo & (1 << J)) != 0)
7048       ModVal |= SISrcMods::NEG;
7049 
7050     if ((NegHi & (1 << J)) != 0)
7051       ModVal |= SISrcMods::NEG_HI;
7052 
7053     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7054 
7055     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7056   }
7057 }
7058 
7059 //===----------------------------------------------------------------------===//
7060 // dpp
7061 //===----------------------------------------------------------------------===//
7062 
7063 bool AMDGPUOperand::isDPP8() const {
7064   return isImmTy(ImmTyDPP8);
7065 }
7066 
7067 bool AMDGPUOperand::isDPPCtrl() const {
7068   using namespace AMDGPU::DPP;
7069 
7070   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7071   if (result) {
7072     int64_t Imm = getImm();
7073     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7074            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7075            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7076            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7077            (Imm == DppCtrl::WAVE_SHL1) ||
7078            (Imm == DppCtrl::WAVE_ROL1) ||
7079            (Imm == DppCtrl::WAVE_SHR1) ||
7080            (Imm == DppCtrl::WAVE_ROR1) ||
7081            (Imm == DppCtrl::ROW_MIRROR) ||
7082            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7083            (Imm == DppCtrl::BCAST15) ||
7084            (Imm == DppCtrl::BCAST31) ||
7085            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7086            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7087   }
7088   return false;
7089 }
7090 
7091 //===----------------------------------------------------------------------===//
7092 // mAI
7093 //===----------------------------------------------------------------------===//
7094 
7095 bool AMDGPUOperand::isBLGP() const {
7096   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7097 }
7098 
7099 bool AMDGPUOperand::isCBSZ() const {
7100   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7101 }
7102 
7103 bool AMDGPUOperand::isABID() const {
7104   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7105 }
7106 
7107 bool AMDGPUOperand::isS16Imm() const {
7108   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7109 }
7110 
7111 bool AMDGPUOperand::isU16Imm() const {
7112   return isImm() && isUInt<16>(getImm());
7113 }
7114 
7115 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7116   if (!isGFX10())
7117     return MatchOperand_NoMatch;
7118 
7119   SMLoc S = Parser.getTok().getLoc();
7120 
7121   if (getLexer().isNot(AsmToken::Identifier))
7122     return MatchOperand_NoMatch;
7123   if (getLexer().getTok().getString() != "dim")
7124     return MatchOperand_NoMatch;
7125 
7126   Parser.Lex();
7127   if (getLexer().isNot(AsmToken::Colon))
7128     return MatchOperand_ParseFail;
7129 
7130   Parser.Lex();
7131 
7132   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
7133   // integer.
7134   std::string Token;
7135   if (getLexer().is(AsmToken::Integer)) {
7136     SMLoc Loc = getLexer().getTok().getEndLoc();
7137     Token = std::string(getLexer().getTok().getString());
7138     Parser.Lex();
7139     if (getLexer().getTok().getLoc() != Loc)
7140       return MatchOperand_ParseFail;
7141   }
7142   if (getLexer().isNot(AsmToken::Identifier))
7143     return MatchOperand_ParseFail;
7144   Token += getLexer().getTok().getString();
7145 
7146   StringRef DimId = Token;
7147   if (DimId.startswith("SQ_RSRC_IMG_"))
7148     DimId = DimId.substr(12);
7149 
7150   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7151   if (!DimInfo)
7152     return MatchOperand_ParseFail;
7153 
7154   Parser.Lex();
7155 
7156   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
7157                                               AMDGPUOperand::ImmTyDim));
7158   return MatchOperand_Success;
7159 }
7160 
7161 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7162   SMLoc S = Parser.getTok().getLoc();
7163   StringRef Prefix;
7164 
7165   if (getLexer().getKind() == AsmToken::Identifier) {
7166     Prefix = Parser.getTok().getString();
7167   } else {
7168     return MatchOperand_NoMatch;
7169   }
7170 
7171   if (Prefix != "dpp8")
7172     return parseDPPCtrl(Operands);
7173   if (!isGFX10())
7174     return MatchOperand_NoMatch;
7175 
7176   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7177 
7178   int64_t Sels[8];
7179 
7180   Parser.Lex();
7181   if (getLexer().isNot(AsmToken::Colon))
7182     return MatchOperand_ParseFail;
7183 
7184   Parser.Lex();
7185   if (getLexer().isNot(AsmToken::LBrac))
7186     return MatchOperand_ParseFail;
7187 
7188   Parser.Lex();
7189   if (getParser().parseAbsoluteExpression(Sels[0]))
7190     return MatchOperand_ParseFail;
7191   if (0 > Sels[0] || 7 < Sels[0])
7192     return MatchOperand_ParseFail;
7193 
7194   for (size_t i = 1; i < 8; ++i) {
7195     if (getLexer().isNot(AsmToken::Comma))
7196       return MatchOperand_ParseFail;
7197 
7198     Parser.Lex();
7199     if (getParser().parseAbsoluteExpression(Sels[i]))
7200       return MatchOperand_ParseFail;
7201     if (0 > Sels[i] || 7 < Sels[i])
7202       return MatchOperand_ParseFail;
7203   }
7204 
7205   if (getLexer().isNot(AsmToken::RBrac))
7206     return MatchOperand_ParseFail;
7207   Parser.Lex();
7208 
7209   unsigned DPP8 = 0;
7210   for (size_t i = 0; i < 8; ++i)
7211     DPP8 |= (Sels[i] << (i * 3));
7212 
7213   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7214   return MatchOperand_Success;
7215 }
7216 
7217 OperandMatchResultTy
7218 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7219   using namespace AMDGPU::DPP;
7220 
7221   SMLoc S = Parser.getTok().getLoc();
7222   StringRef Prefix;
7223   int64_t Int;
7224 
7225   if (getLexer().getKind() == AsmToken::Identifier) {
7226     Prefix = Parser.getTok().getString();
7227   } else {
7228     return MatchOperand_NoMatch;
7229   }
7230 
7231   if (Prefix == "row_mirror") {
7232     Int = DppCtrl::ROW_MIRROR;
7233     Parser.Lex();
7234   } else if (Prefix == "row_half_mirror") {
7235     Int = DppCtrl::ROW_HALF_MIRROR;
7236     Parser.Lex();
7237   } else {
7238     // Check to prevent parseDPPCtrlOps from eating invalid tokens
7239     if (Prefix != "quad_perm"
7240         && Prefix != "row_shl"
7241         && Prefix != "row_shr"
7242         && Prefix != "row_ror"
7243         && Prefix != "wave_shl"
7244         && Prefix != "wave_rol"
7245         && Prefix != "wave_shr"
7246         && Prefix != "wave_ror"
7247         && Prefix != "row_bcast"
7248         && Prefix != "row_share"
7249         && Prefix != "row_xmask") {
7250       return MatchOperand_NoMatch;
7251     }
7252 
7253     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
7254       return MatchOperand_NoMatch;
7255 
7256     if (!isVI() && !isGFX9() &&
7257         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
7258          Prefix == "wave_rol" || Prefix == "wave_ror" ||
7259          Prefix == "row_bcast"))
7260       return MatchOperand_NoMatch;
7261 
7262     Parser.Lex();
7263     if (getLexer().isNot(AsmToken::Colon))
7264       return MatchOperand_ParseFail;
7265 
7266     if (Prefix == "quad_perm") {
7267       // quad_perm:[%d,%d,%d,%d]
7268       Parser.Lex();
7269       if (getLexer().isNot(AsmToken::LBrac))
7270         return MatchOperand_ParseFail;
7271       Parser.Lex();
7272 
7273       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
7274         return MatchOperand_ParseFail;
7275 
7276       for (int i = 0; i < 3; ++i) {
7277         if (getLexer().isNot(AsmToken::Comma))
7278           return MatchOperand_ParseFail;
7279         Parser.Lex();
7280 
7281         int64_t Temp;
7282         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
7283           return MatchOperand_ParseFail;
7284         const int shift = i*2 + 2;
7285         Int += (Temp << shift);
7286       }
7287 
7288       if (getLexer().isNot(AsmToken::RBrac))
7289         return MatchOperand_ParseFail;
7290       Parser.Lex();
7291     } else {
7292       // sel:%d
7293       Parser.Lex();
7294       if (getParser().parseAbsoluteExpression(Int))
7295         return MatchOperand_ParseFail;
7296 
7297       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
7298         Int |= DppCtrl::ROW_SHL0;
7299       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
7300         Int |= DppCtrl::ROW_SHR0;
7301       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
7302         Int |= DppCtrl::ROW_ROR0;
7303       } else if (Prefix == "wave_shl" && 1 == Int) {
7304         Int = DppCtrl::WAVE_SHL1;
7305       } else if (Prefix == "wave_rol" && 1 == Int) {
7306         Int = DppCtrl::WAVE_ROL1;
7307       } else if (Prefix == "wave_shr" && 1 == Int) {
7308         Int = DppCtrl::WAVE_SHR1;
7309       } else if (Prefix == "wave_ror" && 1 == Int) {
7310         Int = DppCtrl::WAVE_ROR1;
7311       } else if (Prefix == "row_bcast") {
7312         if (Int == 15) {
7313           Int = DppCtrl::BCAST15;
7314         } else if (Int == 31) {
7315           Int = DppCtrl::BCAST31;
7316         } else {
7317           return MatchOperand_ParseFail;
7318         }
7319       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
7320         Int |= DppCtrl::ROW_SHARE_FIRST;
7321       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
7322         Int |= DppCtrl::ROW_XMASK_FIRST;
7323       } else {
7324         return MatchOperand_ParseFail;
7325       }
7326     }
7327   }
7328 
7329   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
7330   return MatchOperand_Success;
7331 }
7332 
7333 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7334   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7335 }
7336 
7337 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7338   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7339 }
7340 
7341 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7342   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7343 }
7344 
7345 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7346   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7347 }
7348 
7349 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7350   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7351 }
7352 
7353 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7354   OptionalImmIndexMap OptionalIdx;
7355 
7356   unsigned I = 1;
7357   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7358   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7359     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7360   }
7361 
7362   int Fi = 0;
7363   for (unsigned E = Operands.size(); I != E; ++I) {
7364     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7365                                             MCOI::TIED_TO);
7366     if (TiedTo != -1) {
7367       assert((unsigned)TiedTo < Inst.getNumOperands());
7368       // handle tied old or src2 for MAC instructions
7369       Inst.addOperand(Inst.getOperand(TiedTo));
7370     }
7371     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7372     // Add the register arguments
7373     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7374       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7375       // Skip it.
7376       continue;
7377     }
7378 
7379     if (IsDPP8) {
7380       if (Op.isDPP8()) {
7381         Op.addImmOperands(Inst, 1);
7382       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7383         Op.addRegWithFPInputModsOperands(Inst, 2);
7384       } else if (Op.isFI()) {
7385         Fi = Op.getImm();
7386       } else if (Op.isReg()) {
7387         Op.addRegOperands(Inst, 1);
7388       } else {
7389         llvm_unreachable("Invalid operand type");
7390       }
7391     } else {
7392       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7393         Op.addRegWithFPInputModsOperands(Inst, 2);
7394       } else if (Op.isDPPCtrl()) {
7395         Op.addImmOperands(Inst, 1);
7396       } else if (Op.isImm()) {
7397         // Handle optional arguments
7398         OptionalIdx[Op.getImmTy()] = I;
7399       } else {
7400         llvm_unreachable("Invalid operand type");
7401       }
7402     }
7403   }
7404 
7405   if (IsDPP8) {
7406     using namespace llvm::AMDGPU::DPP;
7407     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7408   } else {
7409     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7410     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7411     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7412     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7413       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7414     }
7415   }
7416 }
7417 
7418 //===----------------------------------------------------------------------===//
7419 // sdwa
7420 //===----------------------------------------------------------------------===//
7421 
7422 OperandMatchResultTy
7423 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7424                               AMDGPUOperand::ImmTy Type) {
7425   using namespace llvm::AMDGPU::SDWA;
7426 
7427   SMLoc S = Parser.getTok().getLoc();
7428   StringRef Value;
7429   OperandMatchResultTy res;
7430 
7431   res = parseStringWithPrefix(Prefix, Value);
7432   if (res != MatchOperand_Success) {
7433     return res;
7434   }
7435 
7436   int64_t Int;
7437   Int = StringSwitch<int64_t>(Value)
7438         .Case("BYTE_0", SdwaSel::BYTE_0)
7439         .Case("BYTE_1", SdwaSel::BYTE_1)
7440         .Case("BYTE_2", SdwaSel::BYTE_2)
7441         .Case("BYTE_3", SdwaSel::BYTE_3)
7442         .Case("WORD_0", SdwaSel::WORD_0)
7443         .Case("WORD_1", SdwaSel::WORD_1)
7444         .Case("DWORD", SdwaSel::DWORD)
7445         .Default(0xffffffff);
7446   Parser.Lex(); // eat last token
7447 
7448   if (Int == 0xffffffff) {
7449     return MatchOperand_ParseFail;
7450   }
7451 
7452   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
7453   return MatchOperand_Success;
7454 }
7455 
7456 OperandMatchResultTy
7457 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
7458   using namespace llvm::AMDGPU::SDWA;
7459 
7460   SMLoc S = Parser.getTok().getLoc();
7461   StringRef Value;
7462   OperandMatchResultTy res;
7463 
7464   res = parseStringWithPrefix("dst_unused", Value);
7465   if (res != MatchOperand_Success) {
7466     return res;
7467   }
7468 
7469   int64_t Int;
7470   Int = StringSwitch<int64_t>(Value)
7471         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
7472         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
7473         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
7474         .Default(0xffffffff);
7475   Parser.Lex(); // eat last token
7476 
7477   if (Int == 0xffffffff) {
7478     return MatchOperand_ParseFail;
7479   }
7480 
7481   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
7482   return MatchOperand_Success;
7483 }
7484 
7485 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
7486   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
7487 }
7488 
7489 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
7490   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
7491 }
7492 
7493 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
7494   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
7495 }
7496 
7497 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
7498   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
7499 }
7500 
7501 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
7502   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
7503 }
7504 
7505 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
7506                               uint64_t BasicInstType,
7507                               bool SkipDstVcc,
7508                               bool SkipSrcVcc) {
7509   using namespace llvm::AMDGPU::SDWA;
7510 
7511   OptionalImmIndexMap OptionalIdx;
7512   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
7513   bool SkippedVcc = false;
7514 
7515   unsigned I = 1;
7516   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7517   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7518     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7519   }
7520 
7521   for (unsigned E = Operands.size(); I != E; ++I) {
7522     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7523     if (SkipVcc && !SkippedVcc && Op.isReg() &&
7524         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7525       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7526       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7527       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7528       // Skip VCC only if we didn't skip it on previous iteration.
7529       // Note that src0 and src1 occupy 2 slots each because of modifiers.
7530       if (BasicInstType == SIInstrFlags::VOP2 &&
7531           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7532            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7533         SkippedVcc = true;
7534         continue;
7535       } else if (BasicInstType == SIInstrFlags::VOPC &&
7536                  Inst.getNumOperands() == 0) {
7537         SkippedVcc = true;
7538         continue;
7539       }
7540     }
7541     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7542       Op.addRegOrImmWithInputModsOperands(Inst, 2);
7543     } else if (Op.isImm()) {
7544       // Handle optional arguments
7545       OptionalIdx[Op.getImmTy()] = I;
7546     } else {
7547       llvm_unreachable("Invalid operand type");
7548     }
7549     SkippedVcc = false;
7550   }
7551 
7552   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7553       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7554       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7555     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7556     switch (BasicInstType) {
7557     case SIInstrFlags::VOP1:
7558       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7559       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7560         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7561       }
7562       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7563       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7564       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7565       break;
7566 
7567     case SIInstrFlags::VOP2:
7568       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7569       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7570         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7571       }
7572       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7573       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7574       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7575       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7576       break;
7577 
7578     case SIInstrFlags::VOPC:
7579       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7580         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7581       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7582       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7583       break;
7584 
7585     default:
7586       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7587     }
7588   }
7589 
7590   // special case v_mac_{f16, f32}:
7591   // it has src2 register operand that is tied to dst operand
7592   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7593       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7594     auto it = Inst.begin();
7595     std::advance(
7596       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7597     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7598   }
7599 }
7600 
7601 //===----------------------------------------------------------------------===//
7602 // mAI
7603 //===----------------------------------------------------------------------===//
7604 
7605 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7606   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7607 }
7608 
7609 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7610   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7611 }
7612 
7613 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7614   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7615 }
7616 
7617 /// Force static initialization.
7618 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7619   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7620   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7621 }
7622 
7623 #define GET_REGISTER_MATCHER
7624 #define GET_MATCHER_IMPLEMENTATION
7625 #define GET_MNEMONIC_SPELL_CHECKER
7626 #define GET_MNEMONIC_CHECKER
7627 #include "AMDGPUGenAsmMatcher.inc"
7628 
7629 // This fuction should be defined after auto-generated include so that we have
7630 // MatchClassKind enum defined
7631 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7632                                                      unsigned Kind) {
7633   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7634   // But MatchInstructionImpl() expects to meet token and fails to validate
7635   // operand. This method checks if we are given immediate operand but expect to
7636   // get corresponding token.
7637   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7638   switch (Kind) {
7639   case MCK_addr64:
7640     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7641   case MCK_gds:
7642     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7643   case MCK_lds:
7644     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7645   case MCK_glc:
7646     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7647   case MCK_idxen:
7648     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7649   case MCK_offen:
7650     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7651   case MCK_SSrcB32:
7652     // When operands have expression values, they will return true for isToken,
7653     // because it is not possible to distinguish between a token and an
7654     // expression at parse time. MatchInstructionImpl() will always try to
7655     // match an operand as a token, when isToken returns true, and when the
7656     // name of the expression is not a valid token, the match will fail,
7657     // so we need to handle it here.
7658     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7659   case MCK_SSrcF32:
7660     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7661   case MCK_SoppBrTarget:
7662     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7663   case MCK_VReg32OrOff:
7664     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7665   case MCK_InterpSlot:
7666     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7667   case MCK_Attr:
7668     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7669   case MCK_AttrChan:
7670     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7671   case MCK_ImmSMEMOffset:
7672     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
7673   case MCK_SReg_64:
7674   case MCK_SReg_64_XEXEC:
7675     // Null is defined as a 32-bit register but
7676     // it should also be enabled with 64-bit operands.
7677     // The following code enables it for SReg_64 operands
7678     // used as source and destination. Remaining source
7679     // operands are handled in isInlinableImm.
7680     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7681   default:
7682     return Match_InvalidOperand;
7683   }
7684 }
7685 
7686 //===----------------------------------------------------------------------===//
7687 // endpgm
7688 //===----------------------------------------------------------------------===//
7689 
7690 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7691   SMLoc S = Parser.getTok().getLoc();
7692   int64_t Imm = 0;
7693 
7694   if (!parseExpr(Imm)) {
7695     // The operand is optional, if not present default to 0
7696     Imm = 0;
7697   }
7698 
7699   if (!isUInt<16>(Imm)) {
7700     Error(S, "expected a 16-bit value");
7701     return MatchOperand_ParseFail;
7702   }
7703 
7704   Operands.push_back(
7705       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7706   return MatchOperand_Success;
7707 }
7708 
7709 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7710