1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyA16,
167     ImmTyLWE,
168     ImmTyExpTgt,
169     ImmTyExpCompr,
170     ImmTyExpVM,
171     ImmTyFORMAT,
172     ImmTyHwreg,
173     ImmTyOff,
174     ImmTySendMsg,
175     ImmTyInterpSlot,
176     ImmTyInterpAttr,
177     ImmTyAttrChan,
178     ImmTyOpSel,
179     ImmTyOpSelHi,
180     ImmTyNegLo,
181     ImmTyNegHi,
182     ImmTySwizzle,
183     ImmTyGprIdxMode,
184     ImmTyHigh,
185     ImmTyBLGP,
186     ImmTyCBSZ,
187     ImmTyABID,
188     ImmTyEndpgm,
189   };
190 
191 private:
192   struct TokOp {
193     const char *Data;
194     unsigned Length;
195   };
196 
197   struct ImmOp {
198     int64_t Val;
199     ImmTy Type;
200     bool IsFPImm;
201     Modifiers Mods;
202   };
203 
204   struct RegOp {
205     unsigned RegNo;
206     Modifiers Mods;
207   };
208 
209   union {
210     TokOp Tok;
211     ImmOp Imm;
212     RegOp Reg;
213     const MCExpr *Expr;
214   };
215 
216 public:
217   bool isToken() const override {
218     if (Kind == Token)
219       return true;
220 
221     // When parsing operands, we can't always tell if something was meant to be
222     // a token, like 'gds', or an expression that references a global variable.
223     // In this case, we assume the string is an expression, and if we need to
224     // interpret is a token, then we treat the symbol name as the token.
225     return isSymbolRefExpr();
226   }
227 
228   bool isSymbolRefExpr() const {
229     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230   }
231 
232   bool isImm() const override {
233     return Kind == Immediate;
234   }
235 
236   bool isInlinableImm(MVT type) const;
237   bool isLiteralImm(MVT type) const;
238 
239   bool isRegKind() const {
240     return Kind == Register;
241   }
242 
243   bool isReg() const override {
244     return isRegKind() && !hasModifiers();
245   }
246 
247   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
248     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
249   }
250 
251   bool isRegOrImmWithInt16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
253   }
254 
255   bool isRegOrImmWithInt32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
257   }
258 
259   bool isRegOrImmWithInt64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
261   }
262 
263   bool isRegOrImmWithFP16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
265   }
266 
267   bool isRegOrImmWithFP32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
269   }
270 
271   bool isRegOrImmWithFP64InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
273   }
274 
275   bool isVReg() const {
276     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
277            isRegClass(AMDGPU::VReg_64RegClassID) ||
278            isRegClass(AMDGPU::VReg_96RegClassID) ||
279            isRegClass(AMDGPU::VReg_128RegClassID) ||
280            isRegClass(AMDGPU::VReg_160RegClassID) ||
281            isRegClass(AMDGPU::VReg_192RegClassID) ||
282            isRegClass(AMDGPU::VReg_256RegClassID) ||
283            isRegClass(AMDGPU::VReg_512RegClassID) ||
284            isRegClass(AMDGPU::VReg_1024RegClassID);
285   }
286 
287   bool isVReg32() const {
288     return isRegClass(AMDGPU::VGPR_32RegClassID);
289   }
290 
291   bool isVReg32OrOff() const {
292     return isOff() || isVReg32();
293   }
294 
295   bool isNull() const {
296     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
297   }
298 
299   bool isSDWAOperand(MVT type) const;
300   bool isSDWAFP16Operand() const;
301   bool isSDWAFP32Operand() const;
302   bool isSDWAInt16Operand() const;
303   bool isSDWAInt32Operand() const;
304 
305   bool isImmTy(ImmTy ImmT) const {
306     return isImm() && Imm.Type == ImmT;
307   }
308 
309   bool isImmModifier() const {
310     return isImm() && Imm.Type != ImmTyNone;
311   }
312 
313   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
314   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
315   bool isDMask() const { return isImmTy(ImmTyDMask); }
316   bool isDim() const { return isImmTy(ImmTyDim); }
317   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
318   bool isDA() const { return isImmTy(ImmTyDA); }
319   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
320   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
321   bool isLWE() const { return isImmTy(ImmTyLWE); }
322   bool isOff() const { return isImmTy(ImmTyOff); }
323   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
324   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
325   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
326   bool isOffen() const { return isImmTy(ImmTyOffen); }
327   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
328   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
329   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
330   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
331   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
332 
333   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
334   bool isGDS() const { return isImmTy(ImmTyGDS); }
335   bool isLDS() const { return isImmTy(ImmTyLDS); }
336   bool isDLC() const { return isImmTy(ImmTyDLC); }
337   bool isGLC() const { return isImmTy(ImmTyGLC); }
338   // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced
339   // value of the GLC operand.
340   bool isGLC_1() const { return isImmTy(ImmTyGLC); }
341   bool isSLC() const { return isImmTy(ImmTySLC); }
342   bool isSWZ() const { return isImmTy(ImmTySWZ); }
343   bool isTFE() const { return isImmTy(ImmTyTFE); }
344   bool isD16() const { return isImmTy(ImmTyD16); }
345   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
346   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
347   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
348   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
349   bool isFI() const { return isImmTy(ImmTyDppFi); }
350   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
351   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
352   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
353   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
354   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
355   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
356   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
357   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
358   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
359   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
360   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
361   bool isHigh() const { return isImmTy(ImmTyHigh); }
362 
363   bool isMod() const {
364     return isClampSI() || isOModSI();
365   }
366 
367   bool isRegOrImm() const {
368     return isReg() || isImm();
369   }
370 
371   bool isRegClass(unsigned RCID) const;
372 
373   bool isInlineValue() const;
374 
375   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
376     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
377   }
378 
379   bool isSCSrcB16() const {
380     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
381   }
382 
383   bool isSCSrcV2B16() const {
384     return isSCSrcB16();
385   }
386 
387   bool isSCSrcB32() const {
388     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
389   }
390 
391   bool isSCSrcB64() const {
392     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
393   }
394 
395   bool isBoolReg() const;
396 
397   bool isSCSrcF16() const {
398     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
399   }
400 
401   bool isSCSrcV2F16() const {
402     return isSCSrcF16();
403   }
404 
405   bool isSCSrcF32() const {
406     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
407   }
408 
409   bool isSCSrcF64() const {
410     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
411   }
412 
413   bool isSSrcB32() const {
414     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
415   }
416 
417   bool isSSrcB16() const {
418     return isSCSrcB16() || isLiteralImm(MVT::i16);
419   }
420 
421   bool isSSrcV2B16() const {
422     llvm_unreachable("cannot happen");
423     return isSSrcB16();
424   }
425 
426   bool isSSrcB64() const {
427     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
428     // See isVSrc64().
429     return isSCSrcB64() || isLiteralImm(MVT::i64);
430   }
431 
432   bool isSSrcF32() const {
433     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
434   }
435 
436   bool isSSrcF64() const {
437     return isSCSrcB64() || isLiteralImm(MVT::f64);
438   }
439 
440   bool isSSrcF16() const {
441     return isSCSrcB16() || isLiteralImm(MVT::f16);
442   }
443 
444   bool isSSrcV2F16() const {
445     llvm_unreachable("cannot happen");
446     return isSSrcF16();
447   }
448 
449   bool isSSrcOrLdsB32() const {
450     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
451            isLiteralImm(MVT::i32) || isExpr();
452   }
453 
454   bool isVCSrcB32() const {
455     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
456   }
457 
458   bool isVCSrcB64() const {
459     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
460   }
461 
462   bool isVCSrcB16() const {
463     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
464   }
465 
466   bool isVCSrcV2B16() const {
467     return isVCSrcB16();
468   }
469 
470   bool isVCSrcF32() const {
471     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
472   }
473 
474   bool isVCSrcF64() const {
475     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
476   }
477 
478   bool isVCSrcF16() const {
479     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
480   }
481 
482   bool isVCSrcV2F16() const {
483     return isVCSrcF16();
484   }
485 
486   bool isVSrcB32() const {
487     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
488   }
489 
490   bool isVSrcB64() const {
491     return isVCSrcF64() || isLiteralImm(MVT::i64);
492   }
493 
494   bool isVSrcB16() const {
495     return isVCSrcB16() || isLiteralImm(MVT::i16);
496   }
497 
498   bool isVSrcV2B16() const {
499     return isVSrcB16() || isLiteralImm(MVT::v2i16);
500   }
501 
502   bool isVSrcF32() const {
503     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
504   }
505 
506   bool isVSrcF64() const {
507     return isVCSrcF64() || isLiteralImm(MVT::f64);
508   }
509 
510   bool isVSrcF16() const {
511     return isVCSrcF16() || isLiteralImm(MVT::f16);
512   }
513 
514   bool isVSrcV2F16() const {
515     return isVSrcF16() || isLiteralImm(MVT::v2f16);
516   }
517 
518   bool isVISrcB32() const {
519     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
520   }
521 
522   bool isVISrcB16() const {
523     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
524   }
525 
526   bool isVISrcV2B16() const {
527     return isVISrcB16();
528   }
529 
530   bool isVISrcF32() const {
531     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
532   }
533 
534   bool isVISrcF16() const {
535     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
536   }
537 
538   bool isVISrcV2F16() const {
539     return isVISrcF16() || isVISrcB32();
540   }
541 
542   bool isAISrcB32() const {
543     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
544   }
545 
546   bool isAISrcB16() const {
547     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
548   }
549 
550   bool isAISrcV2B16() const {
551     return isAISrcB16();
552   }
553 
554   bool isAISrcF32() const {
555     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
556   }
557 
558   bool isAISrcF16() const {
559     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
560   }
561 
562   bool isAISrcV2F16() const {
563     return isAISrcF16() || isAISrcB32();
564   }
565 
566   bool isAISrc_128B32() const {
567     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
568   }
569 
570   bool isAISrc_128B16() const {
571     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
572   }
573 
574   bool isAISrc_128V2B16() const {
575     return isAISrc_128B16();
576   }
577 
578   bool isAISrc_128F32() const {
579     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
580   }
581 
582   bool isAISrc_128F16() const {
583     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
584   }
585 
586   bool isAISrc_128V2F16() const {
587     return isAISrc_128F16() || isAISrc_128B32();
588   }
589 
590   bool isAISrc_512B32() const {
591     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
592   }
593 
594   bool isAISrc_512B16() const {
595     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
596   }
597 
598   bool isAISrc_512V2B16() const {
599     return isAISrc_512B16();
600   }
601 
602   bool isAISrc_512F32() const {
603     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
604   }
605 
606   bool isAISrc_512F16() const {
607     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
608   }
609 
610   bool isAISrc_512V2F16() const {
611     return isAISrc_512F16() || isAISrc_512B32();
612   }
613 
614   bool isAISrc_1024B32() const {
615     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
616   }
617 
618   bool isAISrc_1024B16() const {
619     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
620   }
621 
622   bool isAISrc_1024V2B16() const {
623     return isAISrc_1024B16();
624   }
625 
626   bool isAISrc_1024F32() const {
627     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
628   }
629 
630   bool isAISrc_1024F16() const {
631     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
632   }
633 
634   bool isAISrc_1024V2F16() const {
635     return isAISrc_1024F16() || isAISrc_1024B32();
636   }
637 
638   bool isKImmFP32() const {
639     return isLiteralImm(MVT::f32);
640   }
641 
642   bool isKImmFP16() const {
643     return isLiteralImm(MVT::f16);
644   }
645 
646   bool isMem() const override {
647     return false;
648   }
649 
650   bool isExpr() const {
651     return Kind == Expression;
652   }
653 
654   bool isSoppBrTarget() const {
655     return isExpr() || isImm();
656   }
657 
658   bool isSWaitCnt() const;
659   bool isHwreg() const;
660   bool isSendMsg() const;
661   bool isSwizzle() const;
662   bool isSMRDOffset8() const;
663   bool isSMEMOffset() const;
664   bool isSMRDLiteralOffset() const;
665   bool isDPP8() const;
666   bool isDPPCtrl() const;
667   bool isBLGP() const;
668   bool isCBSZ() const;
669   bool isABID() const;
670   bool isGPRIdxMode() const;
671   bool isS16Imm() const;
672   bool isU16Imm() const;
673   bool isEndpgm() const;
674 
675   StringRef getExpressionAsToken() const {
676     assert(isExpr());
677     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
678     return S->getSymbol().getName();
679   }
680 
681   StringRef getToken() const {
682     assert(isToken());
683 
684     if (Kind == Expression)
685       return getExpressionAsToken();
686 
687     return StringRef(Tok.Data, Tok.Length);
688   }
689 
690   int64_t getImm() const {
691     assert(isImm());
692     return Imm.Val;
693   }
694 
695   void setImm(int64_t Val) {
696     assert(isImm());
697     Imm.Val = Val;
698   }
699 
700   ImmTy getImmTy() const {
701     assert(isImm());
702     return Imm.Type;
703   }
704 
705   unsigned getReg() const override {
706     assert(isRegKind());
707     return Reg.RegNo;
708   }
709 
710   SMLoc getStartLoc() const override {
711     return StartLoc;
712   }
713 
714   SMLoc getEndLoc() const override {
715     return EndLoc;
716   }
717 
718   SMRange getLocRange() const {
719     return SMRange(StartLoc, EndLoc);
720   }
721 
722   Modifiers getModifiers() const {
723     assert(isRegKind() || isImmTy(ImmTyNone));
724     return isRegKind() ? Reg.Mods : Imm.Mods;
725   }
726 
727   void setModifiers(Modifiers Mods) {
728     assert(isRegKind() || isImmTy(ImmTyNone));
729     if (isRegKind())
730       Reg.Mods = Mods;
731     else
732       Imm.Mods = Mods;
733   }
734 
735   bool hasModifiers() const {
736     return getModifiers().hasModifiers();
737   }
738 
739   bool hasFPModifiers() const {
740     return getModifiers().hasFPModifiers();
741   }
742 
743   bool hasIntModifiers() const {
744     return getModifiers().hasIntModifiers();
745   }
746 
747   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
748 
749   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
750 
751   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
752 
753   template <unsigned Bitwidth>
754   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
755 
756   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
757     addKImmFPOperands<16>(Inst, N);
758   }
759 
760   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
761     addKImmFPOperands<32>(Inst, N);
762   }
763 
764   void addRegOperands(MCInst &Inst, unsigned N) const;
765 
766   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
767     addRegOperands(Inst, N);
768   }
769 
770   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
771     if (isRegKind())
772       addRegOperands(Inst, N);
773     else if (isExpr())
774       Inst.addOperand(MCOperand::createExpr(Expr));
775     else
776       addImmOperands(Inst, N);
777   }
778 
779   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
780     Modifiers Mods = getModifiers();
781     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
782     if (isRegKind()) {
783       addRegOperands(Inst, N);
784     } else {
785       addImmOperands(Inst, N, false);
786     }
787   }
788 
789   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
790     assert(!hasIntModifiers());
791     addRegOrImmWithInputModsOperands(Inst, N);
792   }
793 
794   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
795     assert(!hasFPModifiers());
796     addRegOrImmWithInputModsOperands(Inst, N);
797   }
798 
799   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
800     Modifiers Mods = getModifiers();
801     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
802     assert(isRegKind());
803     addRegOperands(Inst, N);
804   }
805 
806   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
807     assert(!hasIntModifiers());
808     addRegWithInputModsOperands(Inst, N);
809   }
810 
811   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
812     assert(!hasFPModifiers());
813     addRegWithInputModsOperands(Inst, N);
814   }
815 
816   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
817     if (isImm())
818       addImmOperands(Inst, N);
819     else {
820       assert(isExpr());
821       Inst.addOperand(MCOperand::createExpr(Expr));
822     }
823   }
824 
825   static void printImmTy(raw_ostream& OS, ImmTy Type) {
826     switch (Type) {
827     case ImmTyNone: OS << "None"; break;
828     case ImmTyGDS: OS << "GDS"; break;
829     case ImmTyLDS: OS << "LDS"; break;
830     case ImmTyOffen: OS << "Offen"; break;
831     case ImmTyIdxen: OS << "Idxen"; break;
832     case ImmTyAddr64: OS << "Addr64"; break;
833     case ImmTyOffset: OS << "Offset"; break;
834     case ImmTyInstOffset: OS << "InstOffset"; break;
835     case ImmTyOffset0: OS << "Offset0"; break;
836     case ImmTyOffset1: OS << "Offset1"; break;
837     case ImmTyDLC: OS << "DLC"; break;
838     case ImmTyGLC: OS << "GLC"; break;
839     case ImmTySLC: OS << "SLC"; break;
840     case ImmTySWZ: OS << "SWZ"; break;
841     case ImmTyTFE: OS << "TFE"; break;
842     case ImmTyD16: OS << "D16"; break;
843     case ImmTyFORMAT: OS << "FORMAT"; break;
844     case ImmTyClampSI: OS << "ClampSI"; break;
845     case ImmTyOModSI: OS << "OModSI"; break;
846     case ImmTyDPP8: OS << "DPP8"; break;
847     case ImmTyDppCtrl: OS << "DppCtrl"; break;
848     case ImmTyDppRowMask: OS << "DppRowMask"; break;
849     case ImmTyDppBankMask: OS << "DppBankMask"; break;
850     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
851     case ImmTyDppFi: OS << "FI"; break;
852     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
853     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
854     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
855     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
856     case ImmTyDMask: OS << "DMask"; break;
857     case ImmTyDim: OS << "Dim"; break;
858     case ImmTyUNorm: OS << "UNorm"; break;
859     case ImmTyDA: OS << "DA"; break;
860     case ImmTyR128A16: OS << "R128A16"; break;
861     case ImmTyA16: OS << "A16"; break;
862     case ImmTyLWE: OS << "LWE"; break;
863     case ImmTyOff: OS << "Off"; break;
864     case ImmTyExpTgt: OS << "ExpTgt"; break;
865     case ImmTyExpCompr: OS << "ExpCompr"; break;
866     case ImmTyExpVM: OS << "ExpVM"; break;
867     case ImmTyHwreg: OS << "Hwreg"; break;
868     case ImmTySendMsg: OS << "SendMsg"; break;
869     case ImmTyInterpSlot: OS << "InterpSlot"; break;
870     case ImmTyInterpAttr: OS << "InterpAttr"; break;
871     case ImmTyAttrChan: OS << "AttrChan"; break;
872     case ImmTyOpSel: OS << "OpSel"; break;
873     case ImmTyOpSelHi: OS << "OpSelHi"; break;
874     case ImmTyNegLo: OS << "NegLo"; break;
875     case ImmTyNegHi: OS << "NegHi"; break;
876     case ImmTySwizzle: OS << "Swizzle"; break;
877     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
878     case ImmTyHigh: OS << "High"; break;
879     case ImmTyBLGP: OS << "BLGP"; break;
880     case ImmTyCBSZ: OS << "CBSZ"; break;
881     case ImmTyABID: OS << "ABID"; break;
882     case ImmTyEndpgm: OS << "Endpgm"; break;
883     }
884   }
885 
886   void print(raw_ostream &OS) const override {
887     switch (Kind) {
888     case Register:
889       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
890       break;
891     case Immediate:
892       OS << '<' << getImm();
893       if (getImmTy() != ImmTyNone) {
894         OS << " type: "; printImmTy(OS, getImmTy());
895       }
896       OS << " mods: " << Imm.Mods << '>';
897       break;
898     case Token:
899       OS << '\'' << getToken() << '\'';
900       break;
901     case Expression:
902       OS << "<expr " << *Expr << '>';
903       break;
904     }
905   }
906 
907   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
908                                       int64_t Val, SMLoc Loc,
909                                       ImmTy Type = ImmTyNone,
910                                       bool IsFPImm = false) {
911     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
912     Op->Imm.Val = Val;
913     Op->Imm.IsFPImm = IsFPImm;
914     Op->Imm.Type = Type;
915     Op->Imm.Mods = Modifiers();
916     Op->StartLoc = Loc;
917     Op->EndLoc = Loc;
918     return Op;
919   }
920 
921   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
922                                         StringRef Str, SMLoc Loc,
923                                         bool HasExplicitEncodingSize = true) {
924     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
925     Res->Tok.Data = Str.data();
926     Res->Tok.Length = Str.size();
927     Res->StartLoc = Loc;
928     Res->EndLoc = Loc;
929     return Res;
930   }
931 
932   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
933                                       unsigned RegNo, SMLoc S,
934                                       SMLoc E) {
935     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
936     Op->Reg.RegNo = RegNo;
937     Op->Reg.Mods = Modifiers();
938     Op->StartLoc = S;
939     Op->EndLoc = E;
940     return Op;
941   }
942 
943   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
944                                        const class MCExpr *Expr, SMLoc S) {
945     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
946     Op->Expr = Expr;
947     Op->StartLoc = S;
948     Op->EndLoc = S;
949     return Op;
950   }
951 };
952 
953 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
954   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
955   return OS;
956 }
957 
958 //===----------------------------------------------------------------------===//
959 // AsmParser
960 //===----------------------------------------------------------------------===//
961 
962 // Holds info related to the current kernel, e.g. count of SGPRs used.
963 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
964 // .amdgpu_hsa_kernel or at EOF.
965 class KernelScopeInfo {
966   int SgprIndexUnusedMin = -1;
967   int VgprIndexUnusedMin = -1;
968   MCContext *Ctx = nullptr;
969 
970   void usesSgprAt(int i) {
971     if (i >= SgprIndexUnusedMin) {
972       SgprIndexUnusedMin = ++i;
973       if (Ctx) {
974         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
975         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
976       }
977     }
978   }
979 
980   void usesVgprAt(int i) {
981     if (i >= VgprIndexUnusedMin) {
982       VgprIndexUnusedMin = ++i;
983       if (Ctx) {
984         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
985         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
986       }
987     }
988   }
989 
990 public:
991   KernelScopeInfo() = default;
992 
993   void initialize(MCContext &Context) {
994     Ctx = &Context;
995     usesSgprAt(SgprIndexUnusedMin = -1);
996     usesVgprAt(VgprIndexUnusedMin = -1);
997   }
998 
999   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1000     switch (RegKind) {
1001       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1002       case IS_AGPR: // fall through
1003       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1004       default: break;
1005     }
1006   }
1007 };
1008 
1009 class AMDGPUAsmParser : public MCTargetAsmParser {
1010   MCAsmParser &Parser;
1011 
1012   // Number of extra operands parsed after the first optional operand.
1013   // This may be necessary to skip hardcoded mandatory operands.
1014   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1015 
1016   unsigned ForcedEncodingSize = 0;
1017   bool ForcedDPP = false;
1018   bool ForcedSDWA = false;
1019   KernelScopeInfo KernelScope;
1020 
1021   /// @name Auto-generated Match Functions
1022   /// {
1023 
1024 #define GET_ASSEMBLER_HEADER
1025 #include "AMDGPUGenAsmMatcher.inc"
1026 
1027   /// }
1028 
1029 private:
1030   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1031   bool OutOfRangeError(SMRange Range);
1032   /// Calculate VGPR/SGPR blocks required for given target, reserved
1033   /// registers, and user-specified NextFreeXGPR values.
1034   ///
1035   /// \param Features [in] Target features, used for bug corrections.
1036   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1037   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1038   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1039   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1040   /// descriptor field, if valid.
1041   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1042   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1043   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1044   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1045   /// \param VGPRBlocks [out] Result VGPR block count.
1046   /// \param SGPRBlocks [out] Result SGPR block count.
1047   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1048                           bool FlatScrUsed, bool XNACKUsed,
1049                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1050                           SMRange VGPRRange, unsigned NextFreeSGPR,
1051                           SMRange SGPRRange, unsigned &VGPRBlocks,
1052                           unsigned &SGPRBlocks);
1053   bool ParseDirectiveAMDGCNTarget();
1054   bool ParseDirectiveAMDHSAKernel();
1055   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1056   bool ParseDirectiveHSACodeObjectVersion();
1057   bool ParseDirectiveHSACodeObjectISA();
1058   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1059   bool ParseDirectiveAMDKernelCodeT();
1060   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1061   bool ParseDirectiveAMDGPUHsaKernel();
1062 
1063   bool ParseDirectiveISAVersion();
1064   bool ParseDirectiveHSAMetadata();
1065   bool ParseDirectivePALMetadataBegin();
1066   bool ParseDirectivePALMetadata();
1067   bool ParseDirectiveAMDGPULDS();
1068 
1069   /// Common code to parse out a block of text (typically YAML) between start and
1070   /// end directives.
1071   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1072                            const char *AssemblerDirectiveEnd,
1073                            std::string &CollectString);
1074 
1075   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1076                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1077   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1078                            unsigned &RegNum, unsigned &RegWidth,
1079                            bool RestoreOnFailure = false);
1080   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1081                            unsigned &RegNum, unsigned &RegWidth,
1082                            SmallVectorImpl<AsmToken> &Tokens);
1083   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1084                            unsigned &RegWidth,
1085                            SmallVectorImpl<AsmToken> &Tokens);
1086   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1087                            unsigned &RegWidth,
1088                            SmallVectorImpl<AsmToken> &Tokens);
1089   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1090                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1091   bool ParseRegRange(unsigned& Num, unsigned& Width);
1092   unsigned getRegularReg(RegisterKind RegKind,
1093                          unsigned RegNum,
1094                          unsigned RegWidth,
1095                          SMLoc Loc);
1096 
1097   bool isRegister();
1098   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1099   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1100   void initializeGprCountSymbol(RegisterKind RegKind);
1101   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1102                              unsigned RegWidth);
1103   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1104                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1105   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1106                  bool IsGdsHardcoded);
1107 
1108 public:
1109   enum AMDGPUMatchResultTy {
1110     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1111   };
1112   enum OperandMode {
1113     OperandMode_Default,
1114     OperandMode_NSA,
1115   };
1116 
1117   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1118 
1119   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1120                const MCInstrInfo &MII,
1121                const MCTargetOptions &Options)
1122       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1123     MCAsmParserExtension::Initialize(Parser);
1124 
1125     if (getFeatureBits().none()) {
1126       // Set default features.
1127       copySTI().ToggleFeature("southern-islands");
1128     }
1129 
1130     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1131 
1132     {
1133       // TODO: make those pre-defined variables read-only.
1134       // Currently there is none suitable machinery in the core llvm-mc for this.
1135       // MCSymbol::isRedefinable is intended for another purpose, and
1136       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1137       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1138       MCContext &Ctx = getContext();
1139       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1140         MCSymbol *Sym =
1141             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1142         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1143         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1144         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1145         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1146         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1147       } else {
1148         MCSymbol *Sym =
1149             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1150         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1151         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1152         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1153         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1154         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1155       }
1156       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1157         initializeGprCountSymbol(IS_VGPR);
1158         initializeGprCountSymbol(IS_SGPR);
1159       } else
1160         KernelScope.initialize(getContext());
1161     }
1162   }
1163 
1164   bool hasXNACK() const {
1165     return AMDGPU::hasXNACK(getSTI());
1166   }
1167 
1168   bool hasMIMG_R128() const {
1169     return AMDGPU::hasMIMG_R128(getSTI());
1170   }
1171 
1172   bool hasPackedD16() const {
1173     return AMDGPU::hasPackedD16(getSTI());
1174   }
1175 
1176   bool hasGFX10A16() const {
1177     return AMDGPU::hasGFX10A16(getSTI());
1178   }
1179 
1180   bool isSI() const {
1181     return AMDGPU::isSI(getSTI());
1182   }
1183 
1184   bool isCI() const {
1185     return AMDGPU::isCI(getSTI());
1186   }
1187 
1188   bool isVI() const {
1189     return AMDGPU::isVI(getSTI());
1190   }
1191 
1192   bool isGFX9() const {
1193     return AMDGPU::isGFX9(getSTI());
1194   }
1195 
1196   bool isGFX9Plus() const {
1197     return AMDGPU::isGFX9Plus(getSTI());
1198   }
1199 
1200   bool isGFX10() const {
1201     return AMDGPU::isGFX10(getSTI());
1202   }
1203 
1204   bool isGFX10_BEncoding() const {
1205     return AMDGPU::isGFX10_BEncoding(getSTI());
1206   }
1207 
1208   bool hasInv2PiInlineImm() const {
1209     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1210   }
1211 
1212   bool hasFlatOffsets() const {
1213     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1214   }
1215 
1216   bool hasSGPR102_SGPR103() const {
1217     return !isVI() && !isGFX9();
1218   }
1219 
1220   bool hasSGPR104_SGPR105() const {
1221     return isGFX10();
1222   }
1223 
1224   bool hasIntClamp() const {
1225     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1226   }
1227 
1228   AMDGPUTargetStreamer &getTargetStreamer() {
1229     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1230     return static_cast<AMDGPUTargetStreamer &>(TS);
1231   }
1232 
1233   const MCRegisterInfo *getMRI() const {
1234     // We need this const_cast because for some reason getContext() is not const
1235     // in MCAsmParser.
1236     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1237   }
1238 
1239   const MCInstrInfo *getMII() const {
1240     return &MII;
1241   }
1242 
1243   const FeatureBitset &getFeatureBits() const {
1244     return getSTI().getFeatureBits();
1245   }
1246 
1247   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1248   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1249   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1250 
1251   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1252   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1253   bool isForcedDPP() const { return ForcedDPP; }
1254   bool isForcedSDWA() const { return ForcedSDWA; }
1255   ArrayRef<unsigned> getMatchedVariants() const;
1256   StringRef getMatchedVariantName() const;
1257 
1258   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1259   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1260                      bool RestoreOnFailure);
1261   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1262   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1263                                         SMLoc &EndLoc) override;
1264   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1265   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1266                                       unsigned Kind) override;
1267   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1268                                OperandVector &Operands, MCStreamer &Out,
1269                                uint64_t &ErrorInfo,
1270                                bool MatchingInlineAsm) override;
1271   bool ParseDirective(AsmToken DirectiveID) override;
1272   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1273                                     OperandMode Mode = OperandMode_Default);
1274   StringRef parseMnemonicSuffix(StringRef Name);
1275   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1276                         SMLoc NameLoc, OperandVector &Operands) override;
1277   //bool ProcessInstruction(MCInst &Inst);
1278 
1279   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1280 
1281   OperandMatchResultTy
1282   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1283                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1284                      bool (*ConvertResult)(int64_t &) = nullptr);
1285 
1286   OperandMatchResultTy
1287   parseOperandArrayWithPrefix(const char *Prefix,
1288                               OperandVector &Operands,
1289                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1290                               bool (*ConvertResult)(int64_t&) = nullptr);
1291 
1292   OperandMatchResultTy
1293   parseNamedBit(const char *Name, OperandVector &Operands,
1294                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1295   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1296                                              StringRef &Value);
1297 
1298   bool isModifier();
1299   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1300   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1301   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1302   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1303   bool parseSP3NegModifier();
1304   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1305   OperandMatchResultTy parseReg(OperandVector &Operands);
1306   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1307   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1308   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1309   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1310   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1311   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1312   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1313   OperandMatchResultTy parseUfmt(int64_t &Format);
1314   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1315   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1316   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1317   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1318   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1319   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1320   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1321 
1322   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1323   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1324   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1325   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1326 
1327   bool parseCnt(int64_t &IntVal);
1328   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1329   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1330 
1331 private:
1332   struct OperandInfoTy {
1333     int64_t Id;
1334     bool IsSymbolic = false;
1335     bool IsDefined = false;
1336 
1337     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1338   };
1339 
1340   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1341   bool validateSendMsg(const OperandInfoTy &Msg,
1342                        const OperandInfoTy &Op,
1343                        const OperandInfoTy &Stream,
1344                        const SMLoc Loc);
1345 
1346   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1347   bool validateHwreg(const OperandInfoTy &HwReg,
1348                      const int64_t Offset,
1349                      const int64_t Width,
1350                      const SMLoc Loc);
1351 
1352   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1353   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1354   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1355 
1356   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1357   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1358   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1359   bool validateSOPLiteral(const MCInst &Inst) const;
1360   bool validateConstantBusLimitations(const MCInst &Inst);
1361   bool validateEarlyClobberLimitations(const MCInst &Inst);
1362   bool validateIntClampSupported(const MCInst &Inst);
1363   bool validateMIMGAtomicDMask(const MCInst &Inst);
1364   bool validateMIMGGatherDMask(const MCInst &Inst);
1365   bool validateMovrels(const MCInst &Inst);
1366   bool validateMIMGDataSize(const MCInst &Inst);
1367   bool validateMIMGAddrSize(const MCInst &Inst);
1368   bool validateMIMGD16(const MCInst &Inst);
1369   bool validateMIMGDim(const MCInst &Inst);
1370   bool validateLdsDirect(const MCInst &Inst);
1371   bool validateOpSel(const MCInst &Inst);
1372   bool validateVccOperand(unsigned Reg) const;
1373   bool validateVOP3Literal(const MCInst &Inst) const;
1374   bool validateMAIAccWrite(const MCInst &Inst);
1375   bool validateDivScale(const MCInst &Inst);
1376   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1377                              const SMLoc &IDLoc);
1378   unsigned getConstantBusLimit(unsigned Opcode) const;
1379   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1380   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1381   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1382 
1383   bool isSupportedMnemo(StringRef Mnemo,
1384                         const FeatureBitset &FBS);
1385   bool isSupportedMnemo(StringRef Mnemo,
1386                         const FeatureBitset &FBS,
1387                         ArrayRef<unsigned> Variants);
1388   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1389 
1390   bool isId(const StringRef Id) const;
1391   bool isId(const AsmToken &Token, const StringRef Id) const;
1392   bool isToken(const AsmToken::TokenKind Kind) const;
1393   bool trySkipId(const StringRef Id);
1394   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1395   bool trySkipToken(const AsmToken::TokenKind Kind);
1396   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1397   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1398   bool parseId(StringRef &Val, const StringRef ErrMsg);
1399 
1400   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1401   AsmToken::TokenKind getTokenKind() const;
1402   bool parseExpr(int64_t &Imm);
1403   bool parseExpr(OperandVector &Operands);
1404   StringRef getTokenStr() const;
1405   AsmToken peekToken();
1406   AsmToken getToken() const;
1407   SMLoc getLoc() const;
1408   void lex();
1409 
1410 public:
1411   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1412   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1413 
1414   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1415   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1416   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1417   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1418   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1419   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1420 
1421   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1422                             const unsigned MinVal,
1423                             const unsigned MaxVal,
1424                             const StringRef ErrMsg);
1425   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1426   bool parseSwizzleOffset(int64_t &Imm);
1427   bool parseSwizzleMacro(int64_t &Imm);
1428   bool parseSwizzleQuadPerm(int64_t &Imm);
1429   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1430   bool parseSwizzleBroadcast(int64_t &Imm);
1431   bool parseSwizzleSwap(int64_t &Imm);
1432   bool parseSwizzleReverse(int64_t &Imm);
1433 
1434   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1435   int64_t parseGPRIdxMacro();
1436 
1437   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1438   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1439   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1440   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1441   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1442 
1443   AMDGPUOperand::Ptr defaultDLC() const;
1444   AMDGPUOperand::Ptr defaultGLC() const;
1445   AMDGPUOperand::Ptr defaultGLC_1() const;
1446   AMDGPUOperand::Ptr defaultSLC() const;
1447 
1448   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1449   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1450   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1451   AMDGPUOperand::Ptr defaultFlatOffset() const;
1452 
1453   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1454 
1455   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1456                OptionalImmIndexMap &OptionalIdx);
1457   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1458   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1459   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1460 
1461   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1462 
1463   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1464                bool IsAtomic = false);
1465   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1466   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1467 
1468   OperandMatchResultTy parseDim(OperandVector &Operands);
1469   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1470   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1471   AMDGPUOperand::Ptr defaultRowMask() const;
1472   AMDGPUOperand::Ptr defaultBankMask() const;
1473   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1474   AMDGPUOperand::Ptr defaultFI() const;
1475   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1476   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1477 
1478   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1479                                     AMDGPUOperand::ImmTy Type);
1480   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1481   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1482   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1483   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1484   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1485   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1486   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1487                uint64_t BasicInstType,
1488                bool SkipDstVcc = false,
1489                bool SkipSrcVcc = false);
1490 
1491   AMDGPUOperand::Ptr defaultBLGP() const;
1492   AMDGPUOperand::Ptr defaultCBSZ() const;
1493   AMDGPUOperand::Ptr defaultABID() const;
1494 
1495   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1496   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1497 };
1498 
1499 struct OptionalOperand {
1500   const char *Name;
1501   AMDGPUOperand::ImmTy Type;
1502   bool IsBit;
1503   bool (*ConvertResult)(int64_t&);
1504 };
1505 
1506 } // end anonymous namespace
1507 
1508 // May be called with integer type with equivalent bitwidth.
1509 static const fltSemantics *getFltSemantics(unsigned Size) {
1510   switch (Size) {
1511   case 4:
1512     return &APFloat::IEEEsingle();
1513   case 8:
1514     return &APFloat::IEEEdouble();
1515   case 2:
1516     return &APFloat::IEEEhalf();
1517   default:
1518     llvm_unreachable("unsupported fp type");
1519   }
1520 }
1521 
1522 static const fltSemantics *getFltSemantics(MVT VT) {
1523   return getFltSemantics(VT.getSizeInBits() / 8);
1524 }
1525 
1526 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1527   switch (OperandType) {
1528   case AMDGPU::OPERAND_REG_IMM_INT32:
1529   case AMDGPU::OPERAND_REG_IMM_FP32:
1530   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1531   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1532   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1533   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1534     return &APFloat::IEEEsingle();
1535   case AMDGPU::OPERAND_REG_IMM_INT64:
1536   case AMDGPU::OPERAND_REG_IMM_FP64:
1537   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1538   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1539     return &APFloat::IEEEdouble();
1540   case AMDGPU::OPERAND_REG_IMM_INT16:
1541   case AMDGPU::OPERAND_REG_IMM_FP16:
1542   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1543   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1544   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1545   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1546   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1547   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1548   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1549   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1550   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1551   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1552     return &APFloat::IEEEhalf();
1553   default:
1554     llvm_unreachable("unsupported fp type");
1555   }
1556 }
1557 
1558 //===----------------------------------------------------------------------===//
1559 // Operand
1560 //===----------------------------------------------------------------------===//
1561 
1562 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1563   bool Lost;
1564 
1565   // Convert literal to single precision
1566   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1567                                                APFloat::rmNearestTiesToEven,
1568                                                &Lost);
1569   // We allow precision lost but not overflow or underflow
1570   if (Status != APFloat::opOK &&
1571       Lost &&
1572       ((Status & APFloat::opOverflow)  != 0 ||
1573        (Status & APFloat::opUnderflow) != 0)) {
1574     return false;
1575   }
1576 
1577   return true;
1578 }
1579 
1580 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1581   return isUIntN(Size, Val) || isIntN(Size, Val);
1582 }
1583 
1584 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1585   if (VT.getScalarType() == MVT::i16) {
1586     // FP immediate values are broken.
1587     return isInlinableIntLiteral(Val);
1588   }
1589 
1590   // f16/v2f16 operands work correctly for all values.
1591   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1592 }
1593 
1594 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1595 
1596   // This is a hack to enable named inline values like
1597   // shared_base with both 32-bit and 64-bit operands.
1598   // Note that these values are defined as
1599   // 32-bit operands only.
1600   if (isInlineValue()) {
1601     return true;
1602   }
1603 
1604   if (!isImmTy(ImmTyNone)) {
1605     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1606     return false;
1607   }
1608   // TODO: We should avoid using host float here. It would be better to
1609   // check the float bit values which is what a few other places do.
1610   // We've had bot failures before due to weird NaN support on mips hosts.
1611 
1612   APInt Literal(64, Imm.Val);
1613 
1614   if (Imm.IsFPImm) { // We got fp literal token
1615     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1616       return AMDGPU::isInlinableLiteral64(Imm.Val,
1617                                           AsmParser->hasInv2PiInlineImm());
1618     }
1619 
1620     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1621     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1622       return false;
1623 
1624     if (type.getScalarSizeInBits() == 16) {
1625       return isInlineableLiteralOp16(
1626         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1627         type, AsmParser->hasInv2PiInlineImm());
1628     }
1629 
1630     // Check if single precision literal is inlinable
1631     return AMDGPU::isInlinableLiteral32(
1632       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1633       AsmParser->hasInv2PiInlineImm());
1634   }
1635 
1636   // We got int literal token.
1637   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1638     return AMDGPU::isInlinableLiteral64(Imm.Val,
1639                                         AsmParser->hasInv2PiInlineImm());
1640   }
1641 
1642   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1643     return false;
1644   }
1645 
1646   if (type.getScalarSizeInBits() == 16) {
1647     return isInlineableLiteralOp16(
1648       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1649       type, AsmParser->hasInv2PiInlineImm());
1650   }
1651 
1652   return AMDGPU::isInlinableLiteral32(
1653     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1654     AsmParser->hasInv2PiInlineImm());
1655 }
1656 
1657 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1658   // Check that this immediate can be added as literal
1659   if (!isImmTy(ImmTyNone)) {
1660     return false;
1661   }
1662 
1663   if (!Imm.IsFPImm) {
1664     // We got int literal token.
1665 
1666     if (type == MVT::f64 && hasFPModifiers()) {
1667       // Cannot apply fp modifiers to int literals preserving the same semantics
1668       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1669       // disable these cases.
1670       return false;
1671     }
1672 
1673     unsigned Size = type.getSizeInBits();
1674     if (Size == 64)
1675       Size = 32;
1676 
1677     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1678     // types.
1679     return isSafeTruncation(Imm.Val, Size);
1680   }
1681 
1682   // We got fp literal token
1683   if (type == MVT::f64) { // Expected 64-bit fp operand
1684     // We would set low 64-bits of literal to zeroes but we accept this literals
1685     return true;
1686   }
1687 
1688   if (type == MVT::i64) { // Expected 64-bit int operand
1689     // We don't allow fp literals in 64-bit integer instructions. It is
1690     // unclear how we should encode them.
1691     return false;
1692   }
1693 
1694   // We allow fp literals with f16x2 operands assuming that the specified
1695   // literal goes into the lower half and the upper half is zero. We also
1696   // require that the literal may be losslesly converted to f16.
1697   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1698                      (type == MVT::v2i16)? MVT::i16 : type;
1699 
1700   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1701   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1702 }
1703 
1704 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1705   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1706 }
1707 
1708 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1709   if (AsmParser->isVI())
1710     return isVReg32();
1711   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1712     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1713   else
1714     return false;
1715 }
1716 
1717 bool AMDGPUOperand::isSDWAFP16Operand() const {
1718   return isSDWAOperand(MVT::f16);
1719 }
1720 
1721 bool AMDGPUOperand::isSDWAFP32Operand() const {
1722   return isSDWAOperand(MVT::f32);
1723 }
1724 
1725 bool AMDGPUOperand::isSDWAInt16Operand() const {
1726   return isSDWAOperand(MVT::i16);
1727 }
1728 
1729 bool AMDGPUOperand::isSDWAInt32Operand() const {
1730   return isSDWAOperand(MVT::i32);
1731 }
1732 
1733 bool AMDGPUOperand::isBoolReg() const {
1734   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1735          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1736 }
1737 
1738 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1739 {
1740   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1741   assert(Size == 2 || Size == 4 || Size == 8);
1742 
1743   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1744 
1745   if (Imm.Mods.Abs) {
1746     Val &= ~FpSignMask;
1747   }
1748   if (Imm.Mods.Neg) {
1749     Val ^= FpSignMask;
1750   }
1751 
1752   return Val;
1753 }
1754 
1755 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1756   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1757                              Inst.getNumOperands())) {
1758     addLiteralImmOperand(Inst, Imm.Val,
1759                          ApplyModifiers &
1760                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1761   } else {
1762     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1763     Inst.addOperand(MCOperand::createImm(Imm.Val));
1764   }
1765 }
1766 
1767 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1768   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1769   auto OpNum = Inst.getNumOperands();
1770   // Check that this operand accepts literals
1771   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1772 
1773   if (ApplyModifiers) {
1774     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1775     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1776     Val = applyInputFPModifiers(Val, Size);
1777   }
1778 
1779   APInt Literal(64, Val);
1780   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1781 
1782   if (Imm.IsFPImm) { // We got fp literal token
1783     switch (OpTy) {
1784     case AMDGPU::OPERAND_REG_IMM_INT64:
1785     case AMDGPU::OPERAND_REG_IMM_FP64:
1786     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1787     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1788       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1789                                        AsmParser->hasInv2PiInlineImm())) {
1790         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1791         return;
1792       }
1793 
1794       // Non-inlineable
1795       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1796         // For fp operands we check if low 32 bits are zeros
1797         if (Literal.getLoBits(32) != 0) {
1798           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1799           "Can't encode literal as exact 64-bit floating-point operand. "
1800           "Low 32-bits will be set to zero");
1801         }
1802 
1803         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1804         return;
1805       }
1806 
1807       // We don't allow fp literals in 64-bit integer instructions. It is
1808       // unclear how we should encode them. This case should be checked earlier
1809       // in predicate methods (isLiteralImm())
1810       llvm_unreachable("fp literal in 64-bit integer instruction.");
1811 
1812     case AMDGPU::OPERAND_REG_IMM_INT32:
1813     case AMDGPU::OPERAND_REG_IMM_FP32:
1814     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1815     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1816     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1817     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1818     case AMDGPU::OPERAND_REG_IMM_INT16:
1819     case AMDGPU::OPERAND_REG_IMM_FP16:
1820     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1821     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1822     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1823     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1824     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1825     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1826     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1827     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1828     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1829     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1830       bool lost;
1831       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1832       // Convert literal to single precision
1833       FPLiteral.convert(*getOpFltSemantics(OpTy),
1834                         APFloat::rmNearestTiesToEven, &lost);
1835       // We allow precision lost but not overflow or underflow. This should be
1836       // checked earlier in isLiteralImm()
1837 
1838       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1839       Inst.addOperand(MCOperand::createImm(ImmVal));
1840       return;
1841     }
1842     default:
1843       llvm_unreachable("invalid operand size");
1844     }
1845 
1846     return;
1847   }
1848 
1849   // We got int literal token.
1850   // Only sign extend inline immediates.
1851   switch (OpTy) {
1852   case AMDGPU::OPERAND_REG_IMM_INT32:
1853   case AMDGPU::OPERAND_REG_IMM_FP32:
1854   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1855   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1856   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1857   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1858   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1859   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1860     if (isSafeTruncation(Val, 32) &&
1861         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1862                                      AsmParser->hasInv2PiInlineImm())) {
1863       Inst.addOperand(MCOperand::createImm(Val));
1864       return;
1865     }
1866 
1867     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1868     return;
1869 
1870   case AMDGPU::OPERAND_REG_IMM_INT64:
1871   case AMDGPU::OPERAND_REG_IMM_FP64:
1872   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1873   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1874     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1875       Inst.addOperand(MCOperand::createImm(Val));
1876       return;
1877     }
1878 
1879     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1880     return;
1881 
1882   case AMDGPU::OPERAND_REG_IMM_INT16:
1883   case AMDGPU::OPERAND_REG_IMM_FP16:
1884   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1885   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1886   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1887   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1888     if (isSafeTruncation(Val, 16) &&
1889         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1890                                      AsmParser->hasInv2PiInlineImm())) {
1891       Inst.addOperand(MCOperand::createImm(Val));
1892       return;
1893     }
1894 
1895     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1896     return;
1897 
1898   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1899   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1900   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1901   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1902     assert(isSafeTruncation(Val, 16));
1903     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1904                                         AsmParser->hasInv2PiInlineImm()));
1905 
1906     Inst.addOperand(MCOperand::createImm(Val));
1907     return;
1908   }
1909   default:
1910     llvm_unreachable("invalid operand size");
1911   }
1912 }
1913 
1914 template <unsigned Bitwidth>
1915 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1916   APInt Literal(64, Imm.Val);
1917 
1918   if (!Imm.IsFPImm) {
1919     // We got int literal token.
1920     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1921     return;
1922   }
1923 
1924   bool Lost;
1925   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1926   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1927                     APFloat::rmNearestTiesToEven, &Lost);
1928   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1929 }
1930 
1931 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1932   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1933 }
1934 
1935 static bool isInlineValue(unsigned Reg) {
1936   switch (Reg) {
1937   case AMDGPU::SRC_SHARED_BASE:
1938   case AMDGPU::SRC_SHARED_LIMIT:
1939   case AMDGPU::SRC_PRIVATE_BASE:
1940   case AMDGPU::SRC_PRIVATE_LIMIT:
1941   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1942     return true;
1943   case AMDGPU::SRC_VCCZ:
1944   case AMDGPU::SRC_EXECZ:
1945   case AMDGPU::SRC_SCC:
1946     return true;
1947   case AMDGPU::SGPR_NULL:
1948     return true;
1949   default:
1950     return false;
1951   }
1952 }
1953 
1954 bool AMDGPUOperand::isInlineValue() const {
1955   return isRegKind() && ::isInlineValue(getReg());
1956 }
1957 
1958 //===----------------------------------------------------------------------===//
1959 // AsmParser
1960 //===----------------------------------------------------------------------===//
1961 
1962 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1963   if (Is == IS_VGPR) {
1964     switch (RegWidth) {
1965       default: return -1;
1966       case 1: return AMDGPU::VGPR_32RegClassID;
1967       case 2: return AMDGPU::VReg_64RegClassID;
1968       case 3: return AMDGPU::VReg_96RegClassID;
1969       case 4: return AMDGPU::VReg_128RegClassID;
1970       case 5: return AMDGPU::VReg_160RegClassID;
1971       case 6: return AMDGPU::VReg_192RegClassID;
1972       case 8: return AMDGPU::VReg_256RegClassID;
1973       case 16: return AMDGPU::VReg_512RegClassID;
1974       case 32: return AMDGPU::VReg_1024RegClassID;
1975     }
1976   } else if (Is == IS_TTMP) {
1977     switch (RegWidth) {
1978       default: return -1;
1979       case 1: return AMDGPU::TTMP_32RegClassID;
1980       case 2: return AMDGPU::TTMP_64RegClassID;
1981       case 4: return AMDGPU::TTMP_128RegClassID;
1982       case 8: return AMDGPU::TTMP_256RegClassID;
1983       case 16: return AMDGPU::TTMP_512RegClassID;
1984     }
1985   } else if (Is == IS_SGPR) {
1986     switch (RegWidth) {
1987       default: return -1;
1988       case 1: return AMDGPU::SGPR_32RegClassID;
1989       case 2: return AMDGPU::SGPR_64RegClassID;
1990       case 3: return AMDGPU::SGPR_96RegClassID;
1991       case 4: return AMDGPU::SGPR_128RegClassID;
1992       case 5: return AMDGPU::SGPR_160RegClassID;
1993       case 6: return AMDGPU::SGPR_192RegClassID;
1994       case 8: return AMDGPU::SGPR_256RegClassID;
1995       case 16: return AMDGPU::SGPR_512RegClassID;
1996     }
1997   } else if (Is == IS_AGPR) {
1998     switch (RegWidth) {
1999       default: return -1;
2000       case 1: return AMDGPU::AGPR_32RegClassID;
2001       case 2: return AMDGPU::AReg_64RegClassID;
2002       case 3: return AMDGPU::AReg_96RegClassID;
2003       case 4: return AMDGPU::AReg_128RegClassID;
2004       case 5: return AMDGPU::AReg_160RegClassID;
2005       case 6: return AMDGPU::AReg_192RegClassID;
2006       case 8: return AMDGPU::AReg_256RegClassID;
2007       case 16: return AMDGPU::AReg_512RegClassID;
2008       case 32: return AMDGPU::AReg_1024RegClassID;
2009     }
2010   }
2011   return -1;
2012 }
2013 
2014 static unsigned getSpecialRegForName(StringRef RegName) {
2015   return StringSwitch<unsigned>(RegName)
2016     .Case("exec", AMDGPU::EXEC)
2017     .Case("vcc", AMDGPU::VCC)
2018     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2019     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2020     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2021     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2022     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2023     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2024     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2025     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2026     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2027     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2028     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2029     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2030     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2031     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2032     .Case("m0", AMDGPU::M0)
2033     .Case("vccz", AMDGPU::SRC_VCCZ)
2034     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2035     .Case("execz", AMDGPU::SRC_EXECZ)
2036     .Case("src_execz", AMDGPU::SRC_EXECZ)
2037     .Case("scc", AMDGPU::SRC_SCC)
2038     .Case("src_scc", AMDGPU::SRC_SCC)
2039     .Case("tba", AMDGPU::TBA)
2040     .Case("tma", AMDGPU::TMA)
2041     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2042     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2043     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2044     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2045     .Case("vcc_lo", AMDGPU::VCC_LO)
2046     .Case("vcc_hi", AMDGPU::VCC_HI)
2047     .Case("exec_lo", AMDGPU::EXEC_LO)
2048     .Case("exec_hi", AMDGPU::EXEC_HI)
2049     .Case("tma_lo", AMDGPU::TMA_LO)
2050     .Case("tma_hi", AMDGPU::TMA_HI)
2051     .Case("tba_lo", AMDGPU::TBA_LO)
2052     .Case("tba_hi", AMDGPU::TBA_HI)
2053     .Case("pc", AMDGPU::PC_REG)
2054     .Case("null", AMDGPU::SGPR_NULL)
2055     .Default(AMDGPU::NoRegister);
2056 }
2057 
2058 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2059                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2060   auto R = parseRegister();
2061   if (!R) return true;
2062   assert(R->isReg());
2063   RegNo = R->getReg();
2064   StartLoc = R->getStartLoc();
2065   EndLoc = R->getEndLoc();
2066   return false;
2067 }
2068 
2069 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2070                                     SMLoc &EndLoc) {
2071   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2072 }
2073 
2074 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2075                                                        SMLoc &StartLoc,
2076                                                        SMLoc &EndLoc) {
2077   bool Result =
2078       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2079   bool PendingErrors = getParser().hasPendingError();
2080   getParser().clearPendingErrors();
2081   if (PendingErrors)
2082     return MatchOperand_ParseFail;
2083   if (Result)
2084     return MatchOperand_NoMatch;
2085   return MatchOperand_Success;
2086 }
2087 
2088 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2089                                             RegisterKind RegKind, unsigned Reg1,
2090                                             SMLoc Loc) {
2091   switch (RegKind) {
2092   case IS_SPECIAL:
2093     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2094       Reg = AMDGPU::EXEC;
2095       RegWidth = 2;
2096       return true;
2097     }
2098     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2099       Reg = AMDGPU::FLAT_SCR;
2100       RegWidth = 2;
2101       return true;
2102     }
2103     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2104       Reg = AMDGPU::XNACK_MASK;
2105       RegWidth = 2;
2106       return true;
2107     }
2108     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2109       Reg = AMDGPU::VCC;
2110       RegWidth = 2;
2111       return true;
2112     }
2113     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2114       Reg = AMDGPU::TBA;
2115       RegWidth = 2;
2116       return true;
2117     }
2118     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2119       Reg = AMDGPU::TMA;
2120       RegWidth = 2;
2121       return true;
2122     }
2123     Error(Loc, "register does not fit in the list");
2124     return false;
2125   case IS_VGPR:
2126   case IS_SGPR:
2127   case IS_AGPR:
2128   case IS_TTMP:
2129     if (Reg1 != Reg + RegWidth) {
2130       Error(Loc, "registers in a list must have consecutive indices");
2131       return false;
2132     }
2133     RegWidth++;
2134     return true;
2135   default:
2136     llvm_unreachable("unexpected register kind");
2137   }
2138 }
2139 
2140 struct RegInfo {
2141   StringLiteral Name;
2142   RegisterKind Kind;
2143 };
2144 
2145 static constexpr RegInfo RegularRegisters[] = {
2146   {{"v"},    IS_VGPR},
2147   {{"s"},    IS_SGPR},
2148   {{"ttmp"}, IS_TTMP},
2149   {{"acc"},  IS_AGPR},
2150   {{"a"},    IS_AGPR},
2151 };
2152 
2153 static bool isRegularReg(RegisterKind Kind) {
2154   return Kind == IS_VGPR ||
2155          Kind == IS_SGPR ||
2156          Kind == IS_TTMP ||
2157          Kind == IS_AGPR;
2158 }
2159 
2160 static const RegInfo* getRegularRegInfo(StringRef Str) {
2161   for (const RegInfo &Reg : RegularRegisters)
2162     if (Str.startswith(Reg.Name))
2163       return &Reg;
2164   return nullptr;
2165 }
2166 
2167 static bool getRegNum(StringRef Str, unsigned& Num) {
2168   return !Str.getAsInteger(10, Num);
2169 }
2170 
2171 bool
2172 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2173                             const AsmToken &NextToken) const {
2174 
2175   // A list of consecutive registers: [s0,s1,s2,s3]
2176   if (Token.is(AsmToken::LBrac))
2177     return true;
2178 
2179   if (!Token.is(AsmToken::Identifier))
2180     return false;
2181 
2182   // A single register like s0 or a range of registers like s[0:1]
2183 
2184   StringRef Str = Token.getString();
2185   const RegInfo *Reg = getRegularRegInfo(Str);
2186   if (Reg) {
2187     StringRef RegName = Reg->Name;
2188     StringRef RegSuffix = Str.substr(RegName.size());
2189     if (!RegSuffix.empty()) {
2190       unsigned Num;
2191       // A single register with an index: rXX
2192       if (getRegNum(RegSuffix, Num))
2193         return true;
2194     } else {
2195       // A range of registers: r[XX:YY].
2196       if (NextToken.is(AsmToken::LBrac))
2197         return true;
2198     }
2199   }
2200 
2201   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2202 }
2203 
2204 bool
2205 AMDGPUAsmParser::isRegister()
2206 {
2207   return isRegister(getToken(), peekToken());
2208 }
2209 
2210 unsigned
2211 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2212                                unsigned RegNum,
2213                                unsigned RegWidth,
2214                                SMLoc Loc) {
2215 
2216   assert(isRegularReg(RegKind));
2217 
2218   unsigned AlignSize = 1;
2219   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2220     // SGPR and TTMP registers must be aligned.
2221     // Max required alignment is 4 dwords.
2222     AlignSize = std::min(RegWidth, 4u);
2223   }
2224 
2225   if (RegNum % AlignSize != 0) {
2226     Error(Loc, "invalid register alignment");
2227     return AMDGPU::NoRegister;
2228   }
2229 
2230   unsigned RegIdx = RegNum / AlignSize;
2231   int RCID = getRegClass(RegKind, RegWidth);
2232   if (RCID == -1) {
2233     Error(Loc, "invalid or unsupported register size");
2234     return AMDGPU::NoRegister;
2235   }
2236 
2237   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2238   const MCRegisterClass RC = TRI->getRegClass(RCID);
2239   if (RegIdx >= RC.getNumRegs()) {
2240     Error(Loc, "register index is out of range");
2241     return AMDGPU::NoRegister;
2242   }
2243 
2244   return RC.getRegister(RegIdx);
2245 }
2246 
2247 bool
2248 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2249   int64_t RegLo, RegHi;
2250   if (!skipToken(AsmToken::LBrac, "missing register index"))
2251     return false;
2252 
2253   SMLoc FirstIdxLoc = getLoc();
2254   SMLoc SecondIdxLoc;
2255 
2256   if (!parseExpr(RegLo))
2257     return false;
2258 
2259   if (trySkipToken(AsmToken::Colon)) {
2260     SecondIdxLoc = getLoc();
2261     if (!parseExpr(RegHi))
2262       return false;
2263   } else {
2264     RegHi = RegLo;
2265   }
2266 
2267   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2268     return false;
2269 
2270   if (!isUInt<32>(RegLo)) {
2271     Error(FirstIdxLoc, "invalid register index");
2272     return false;
2273   }
2274 
2275   if (!isUInt<32>(RegHi)) {
2276     Error(SecondIdxLoc, "invalid register index");
2277     return false;
2278   }
2279 
2280   if (RegLo > RegHi) {
2281     Error(FirstIdxLoc, "first register index should not exceed second index");
2282     return false;
2283   }
2284 
2285   Num = static_cast<unsigned>(RegLo);
2286   Width = (RegHi - RegLo) + 1;
2287   return true;
2288 }
2289 
2290 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2291                                           unsigned &RegNum, unsigned &RegWidth,
2292                                           SmallVectorImpl<AsmToken> &Tokens) {
2293   assert(isToken(AsmToken::Identifier));
2294   unsigned Reg = getSpecialRegForName(getTokenStr());
2295   if (Reg) {
2296     RegNum = 0;
2297     RegWidth = 1;
2298     RegKind = IS_SPECIAL;
2299     Tokens.push_back(getToken());
2300     lex(); // skip register name
2301   }
2302   return Reg;
2303 }
2304 
2305 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2306                                           unsigned &RegNum, unsigned &RegWidth,
2307                                           SmallVectorImpl<AsmToken> &Tokens) {
2308   assert(isToken(AsmToken::Identifier));
2309   StringRef RegName = getTokenStr();
2310   auto Loc = getLoc();
2311 
2312   const RegInfo *RI = getRegularRegInfo(RegName);
2313   if (!RI) {
2314     Error(Loc, "invalid register name");
2315     return AMDGPU::NoRegister;
2316   }
2317 
2318   Tokens.push_back(getToken());
2319   lex(); // skip register name
2320 
2321   RegKind = RI->Kind;
2322   StringRef RegSuffix = RegName.substr(RI->Name.size());
2323   if (!RegSuffix.empty()) {
2324     // Single 32-bit register: vXX.
2325     if (!getRegNum(RegSuffix, RegNum)) {
2326       Error(Loc, "invalid register index");
2327       return AMDGPU::NoRegister;
2328     }
2329     RegWidth = 1;
2330   } else {
2331     // Range of registers: v[XX:YY]. ":YY" is optional.
2332     if (!ParseRegRange(RegNum, RegWidth))
2333       return AMDGPU::NoRegister;
2334   }
2335 
2336   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2337 }
2338 
2339 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2340                                        unsigned &RegWidth,
2341                                        SmallVectorImpl<AsmToken> &Tokens) {
2342   unsigned Reg = AMDGPU::NoRegister;
2343   auto ListLoc = getLoc();
2344 
2345   if (!skipToken(AsmToken::LBrac,
2346                  "expected a register or a list of registers")) {
2347     return AMDGPU::NoRegister;
2348   }
2349 
2350   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2351 
2352   auto Loc = getLoc();
2353   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2354     return AMDGPU::NoRegister;
2355   if (RegWidth != 1) {
2356     Error(Loc, "expected a single 32-bit register");
2357     return AMDGPU::NoRegister;
2358   }
2359 
2360   for (; trySkipToken(AsmToken::Comma); ) {
2361     RegisterKind NextRegKind;
2362     unsigned NextReg, NextRegNum, NextRegWidth;
2363     Loc = getLoc();
2364 
2365     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2366                              NextRegNum, NextRegWidth,
2367                              Tokens)) {
2368       return AMDGPU::NoRegister;
2369     }
2370     if (NextRegWidth != 1) {
2371       Error(Loc, "expected a single 32-bit register");
2372       return AMDGPU::NoRegister;
2373     }
2374     if (NextRegKind != RegKind) {
2375       Error(Loc, "registers in a list must be of the same kind");
2376       return AMDGPU::NoRegister;
2377     }
2378     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2379       return AMDGPU::NoRegister;
2380   }
2381 
2382   if (!skipToken(AsmToken::RBrac,
2383                  "expected a comma or a closing square bracket")) {
2384     return AMDGPU::NoRegister;
2385   }
2386 
2387   if (isRegularReg(RegKind))
2388     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2389 
2390   return Reg;
2391 }
2392 
2393 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2394                                           unsigned &RegNum, unsigned &RegWidth,
2395                                           SmallVectorImpl<AsmToken> &Tokens) {
2396   auto Loc = getLoc();
2397   Reg = AMDGPU::NoRegister;
2398 
2399   if (isToken(AsmToken::Identifier)) {
2400     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2401     if (Reg == AMDGPU::NoRegister)
2402       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2403   } else {
2404     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2405   }
2406 
2407   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2408   if (Reg == AMDGPU::NoRegister) {
2409     assert(Parser.hasPendingError());
2410     return false;
2411   }
2412 
2413   if (!subtargetHasRegister(*TRI, Reg)) {
2414     if (Reg == AMDGPU::SGPR_NULL) {
2415       Error(Loc, "'null' operand is not supported on this GPU");
2416     } else {
2417       Error(Loc, "register not available on this GPU");
2418     }
2419     return false;
2420   }
2421 
2422   return true;
2423 }
2424 
2425 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2426                                           unsigned &RegNum, unsigned &RegWidth,
2427                                           bool RestoreOnFailure /*=false*/) {
2428   Reg = AMDGPU::NoRegister;
2429 
2430   SmallVector<AsmToken, 1> Tokens;
2431   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2432     if (RestoreOnFailure) {
2433       while (!Tokens.empty()) {
2434         getLexer().UnLex(Tokens.pop_back_val());
2435       }
2436     }
2437     return true;
2438   }
2439   return false;
2440 }
2441 
2442 Optional<StringRef>
2443 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2444   switch (RegKind) {
2445   case IS_VGPR:
2446     return StringRef(".amdgcn.next_free_vgpr");
2447   case IS_SGPR:
2448     return StringRef(".amdgcn.next_free_sgpr");
2449   default:
2450     return None;
2451   }
2452 }
2453 
2454 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2455   auto SymbolName = getGprCountSymbolName(RegKind);
2456   assert(SymbolName && "initializing invalid register kind");
2457   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2458   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2459 }
2460 
2461 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2462                                             unsigned DwordRegIndex,
2463                                             unsigned RegWidth) {
2464   // Symbols are only defined for GCN targets
2465   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2466     return true;
2467 
2468   auto SymbolName = getGprCountSymbolName(RegKind);
2469   if (!SymbolName)
2470     return true;
2471   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2472 
2473   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2474   int64_t OldCount;
2475 
2476   if (!Sym->isVariable())
2477     return !Error(getParser().getTok().getLoc(),
2478                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2479   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2480     return !Error(
2481         getParser().getTok().getLoc(),
2482         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2483 
2484   if (OldCount <= NewMax)
2485     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2486 
2487   return true;
2488 }
2489 
2490 std::unique_ptr<AMDGPUOperand>
2491 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2492   const auto &Tok = Parser.getTok();
2493   SMLoc StartLoc = Tok.getLoc();
2494   SMLoc EndLoc = Tok.getEndLoc();
2495   RegisterKind RegKind;
2496   unsigned Reg, RegNum, RegWidth;
2497 
2498   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2499     return nullptr;
2500   }
2501   if (isHsaAbiVersion3(&getSTI())) {
2502     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2503       return nullptr;
2504   } else
2505     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2506   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2507 }
2508 
2509 OperandMatchResultTy
2510 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2511   // TODO: add syntactic sugar for 1/(2*PI)
2512 
2513   assert(!isRegister());
2514   assert(!isModifier());
2515 
2516   const auto& Tok = getToken();
2517   const auto& NextTok = peekToken();
2518   bool IsReal = Tok.is(AsmToken::Real);
2519   SMLoc S = getLoc();
2520   bool Negate = false;
2521 
2522   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2523     lex();
2524     IsReal = true;
2525     Negate = true;
2526   }
2527 
2528   if (IsReal) {
2529     // Floating-point expressions are not supported.
2530     // Can only allow floating-point literals with an
2531     // optional sign.
2532 
2533     StringRef Num = getTokenStr();
2534     lex();
2535 
2536     APFloat RealVal(APFloat::IEEEdouble());
2537     auto roundMode = APFloat::rmNearestTiesToEven;
2538     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2539       return MatchOperand_ParseFail;
2540     }
2541     if (Negate)
2542       RealVal.changeSign();
2543 
2544     Operands.push_back(
2545       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2546                                AMDGPUOperand::ImmTyNone, true));
2547 
2548     return MatchOperand_Success;
2549 
2550   } else {
2551     int64_t IntVal;
2552     const MCExpr *Expr;
2553     SMLoc S = getLoc();
2554 
2555     if (HasSP3AbsModifier) {
2556       // This is a workaround for handling expressions
2557       // as arguments of SP3 'abs' modifier, for example:
2558       //     |1.0|
2559       //     |-1|
2560       //     |1+x|
2561       // This syntax is not compatible with syntax of standard
2562       // MC expressions (due to the trailing '|').
2563       SMLoc EndLoc;
2564       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2565         return MatchOperand_ParseFail;
2566     } else {
2567       if (Parser.parseExpression(Expr))
2568         return MatchOperand_ParseFail;
2569     }
2570 
2571     if (Expr->evaluateAsAbsolute(IntVal)) {
2572       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2573     } else {
2574       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2575     }
2576 
2577     return MatchOperand_Success;
2578   }
2579 
2580   return MatchOperand_NoMatch;
2581 }
2582 
2583 OperandMatchResultTy
2584 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2585   if (!isRegister())
2586     return MatchOperand_NoMatch;
2587 
2588   if (auto R = parseRegister()) {
2589     assert(R->isReg());
2590     Operands.push_back(std::move(R));
2591     return MatchOperand_Success;
2592   }
2593   return MatchOperand_ParseFail;
2594 }
2595 
2596 OperandMatchResultTy
2597 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2598   auto res = parseReg(Operands);
2599   if (res != MatchOperand_NoMatch) {
2600     return res;
2601   } else if (isModifier()) {
2602     return MatchOperand_NoMatch;
2603   } else {
2604     return parseImm(Operands, HasSP3AbsMod);
2605   }
2606 }
2607 
2608 bool
2609 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2610   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2611     const auto &str = Token.getString();
2612     return str == "abs" || str == "neg" || str == "sext";
2613   }
2614   return false;
2615 }
2616 
2617 bool
2618 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2619   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2620 }
2621 
2622 bool
2623 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2624   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2625 }
2626 
2627 bool
2628 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2629   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2630 }
2631 
2632 // Check if this is an operand modifier or an opcode modifier
2633 // which may look like an expression but it is not. We should
2634 // avoid parsing these modifiers as expressions. Currently
2635 // recognized sequences are:
2636 //   |...|
2637 //   abs(...)
2638 //   neg(...)
2639 //   sext(...)
2640 //   -reg
2641 //   -|...|
2642 //   -abs(...)
2643 //   name:...
2644 // Note that simple opcode modifiers like 'gds' may be parsed as
2645 // expressions; this is a special case. See getExpressionAsToken.
2646 //
2647 bool
2648 AMDGPUAsmParser::isModifier() {
2649 
2650   AsmToken Tok = getToken();
2651   AsmToken NextToken[2];
2652   peekTokens(NextToken);
2653 
2654   return isOperandModifier(Tok, NextToken[0]) ||
2655          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2656          isOpcodeModifierWithVal(Tok, NextToken[0]);
2657 }
2658 
2659 // Check if the current token is an SP3 'neg' modifier.
2660 // Currently this modifier is allowed in the following context:
2661 //
2662 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2663 // 2. Before an 'abs' modifier: -abs(...)
2664 // 3. Before an SP3 'abs' modifier: -|...|
2665 //
2666 // In all other cases "-" is handled as a part
2667 // of an expression that follows the sign.
2668 //
2669 // Note: When "-" is followed by an integer literal,
2670 // this is interpreted as integer negation rather
2671 // than a floating-point NEG modifier applied to N.
2672 // Beside being contr-intuitive, such use of floating-point
2673 // NEG modifier would have resulted in different meaning
2674 // of integer literals used with VOP1/2/C and VOP3,
2675 // for example:
2676 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2677 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2678 // Negative fp literals with preceding "-" are
2679 // handled likewise for unifomtity
2680 //
2681 bool
2682 AMDGPUAsmParser::parseSP3NegModifier() {
2683 
2684   AsmToken NextToken[2];
2685   peekTokens(NextToken);
2686 
2687   if (isToken(AsmToken::Minus) &&
2688       (isRegister(NextToken[0], NextToken[1]) ||
2689        NextToken[0].is(AsmToken::Pipe) ||
2690        isId(NextToken[0], "abs"))) {
2691     lex();
2692     return true;
2693   }
2694 
2695   return false;
2696 }
2697 
2698 OperandMatchResultTy
2699 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2700                                               bool AllowImm) {
2701   bool Neg, SP3Neg;
2702   bool Abs, SP3Abs;
2703   SMLoc Loc;
2704 
2705   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2706   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2707     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2708     return MatchOperand_ParseFail;
2709   }
2710 
2711   SP3Neg = parseSP3NegModifier();
2712 
2713   Loc = getLoc();
2714   Neg = trySkipId("neg");
2715   if (Neg && SP3Neg) {
2716     Error(Loc, "expected register or immediate");
2717     return MatchOperand_ParseFail;
2718   }
2719   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2720     return MatchOperand_ParseFail;
2721 
2722   Abs = trySkipId("abs");
2723   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2724     return MatchOperand_ParseFail;
2725 
2726   Loc = getLoc();
2727   SP3Abs = trySkipToken(AsmToken::Pipe);
2728   if (Abs && SP3Abs) {
2729     Error(Loc, "expected register or immediate");
2730     return MatchOperand_ParseFail;
2731   }
2732 
2733   OperandMatchResultTy Res;
2734   if (AllowImm) {
2735     Res = parseRegOrImm(Operands, SP3Abs);
2736   } else {
2737     Res = parseReg(Operands);
2738   }
2739   if (Res != MatchOperand_Success) {
2740     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2741   }
2742 
2743   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2744     return MatchOperand_ParseFail;
2745   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2746     return MatchOperand_ParseFail;
2747   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2748     return MatchOperand_ParseFail;
2749 
2750   AMDGPUOperand::Modifiers Mods;
2751   Mods.Abs = Abs || SP3Abs;
2752   Mods.Neg = Neg || SP3Neg;
2753 
2754   if (Mods.hasFPModifiers()) {
2755     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2756     if (Op.isExpr()) {
2757       Error(Op.getStartLoc(), "expected an absolute expression");
2758       return MatchOperand_ParseFail;
2759     }
2760     Op.setModifiers(Mods);
2761   }
2762   return MatchOperand_Success;
2763 }
2764 
2765 OperandMatchResultTy
2766 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2767                                                bool AllowImm) {
2768   bool Sext = trySkipId("sext");
2769   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2770     return MatchOperand_ParseFail;
2771 
2772   OperandMatchResultTy Res;
2773   if (AllowImm) {
2774     Res = parseRegOrImm(Operands);
2775   } else {
2776     Res = parseReg(Operands);
2777   }
2778   if (Res != MatchOperand_Success) {
2779     return Sext? MatchOperand_ParseFail : Res;
2780   }
2781 
2782   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2783     return MatchOperand_ParseFail;
2784 
2785   AMDGPUOperand::Modifiers Mods;
2786   Mods.Sext = Sext;
2787 
2788   if (Mods.hasIntModifiers()) {
2789     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2790     if (Op.isExpr()) {
2791       Error(Op.getStartLoc(), "expected an absolute expression");
2792       return MatchOperand_ParseFail;
2793     }
2794     Op.setModifiers(Mods);
2795   }
2796 
2797   return MatchOperand_Success;
2798 }
2799 
2800 OperandMatchResultTy
2801 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2802   return parseRegOrImmWithFPInputMods(Operands, false);
2803 }
2804 
2805 OperandMatchResultTy
2806 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2807   return parseRegOrImmWithIntInputMods(Operands, false);
2808 }
2809 
2810 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2811   auto Loc = getLoc();
2812   if (trySkipId("off")) {
2813     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2814                                                 AMDGPUOperand::ImmTyOff, false));
2815     return MatchOperand_Success;
2816   }
2817 
2818   if (!isRegister())
2819     return MatchOperand_NoMatch;
2820 
2821   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2822   if (Reg) {
2823     Operands.push_back(std::move(Reg));
2824     return MatchOperand_Success;
2825   }
2826 
2827   return MatchOperand_ParseFail;
2828 
2829 }
2830 
2831 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2832   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2833 
2834   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2835       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2836       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2837       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2838     return Match_InvalidOperand;
2839 
2840   if ((TSFlags & SIInstrFlags::VOP3) &&
2841       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2842       getForcedEncodingSize() != 64)
2843     return Match_PreferE32;
2844 
2845   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2846       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2847     // v_mac_f32/16 allow only dst_sel == DWORD;
2848     auto OpNum =
2849         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2850     const auto &Op = Inst.getOperand(OpNum);
2851     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2852       return Match_InvalidOperand;
2853     }
2854   }
2855 
2856   return Match_Success;
2857 }
2858 
2859 static ArrayRef<unsigned> getAllVariants() {
2860   static const unsigned Variants[] = {
2861     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2862     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2863   };
2864 
2865   return makeArrayRef(Variants);
2866 }
2867 
2868 // What asm variants we should check
2869 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2870   if (getForcedEncodingSize() == 32) {
2871     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2872     return makeArrayRef(Variants);
2873   }
2874 
2875   if (isForcedVOP3()) {
2876     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2877     return makeArrayRef(Variants);
2878   }
2879 
2880   if (isForcedSDWA()) {
2881     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2882                                         AMDGPUAsmVariants::SDWA9};
2883     return makeArrayRef(Variants);
2884   }
2885 
2886   if (isForcedDPP()) {
2887     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2888     return makeArrayRef(Variants);
2889   }
2890 
2891   return getAllVariants();
2892 }
2893 
2894 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
2895   if (getForcedEncodingSize() == 32)
2896     return "e32";
2897 
2898   if (isForcedVOP3())
2899     return "e64";
2900 
2901   if (isForcedSDWA())
2902     return "sdwa";
2903 
2904   if (isForcedDPP())
2905     return "dpp";
2906 
2907   return "";
2908 }
2909 
2910 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2911   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2912   const unsigned Num = Desc.getNumImplicitUses();
2913   for (unsigned i = 0; i < Num; ++i) {
2914     unsigned Reg = Desc.ImplicitUses[i];
2915     switch (Reg) {
2916     case AMDGPU::FLAT_SCR:
2917     case AMDGPU::VCC:
2918     case AMDGPU::VCC_LO:
2919     case AMDGPU::VCC_HI:
2920     case AMDGPU::M0:
2921       return Reg;
2922     default:
2923       break;
2924     }
2925   }
2926   return AMDGPU::NoRegister;
2927 }
2928 
2929 // NB: This code is correct only when used to check constant
2930 // bus limitations because GFX7 support no f16 inline constants.
2931 // Note that there are no cases when a GFX7 opcode violates
2932 // constant bus limitations due to the use of an f16 constant.
2933 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2934                                        unsigned OpIdx) const {
2935   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2936 
2937   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2938     return false;
2939   }
2940 
2941   const MCOperand &MO = Inst.getOperand(OpIdx);
2942 
2943   int64_t Val = MO.getImm();
2944   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2945 
2946   switch (OpSize) { // expected operand size
2947   case 8:
2948     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2949   case 4:
2950     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2951   case 2: {
2952     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2953     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
2954         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
2955         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
2956       return AMDGPU::isInlinableIntLiteral(Val);
2957 
2958     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2959         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2960         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
2961       return AMDGPU::isInlinableIntLiteralV216(Val);
2962 
2963     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2964         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2965         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
2966       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2967 
2968     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2969   }
2970   default:
2971     llvm_unreachable("invalid operand size");
2972   }
2973 }
2974 
2975 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2976   if (!isGFX10())
2977     return 1;
2978 
2979   switch (Opcode) {
2980   // 64-bit shift instructions can use only one scalar value input
2981   case AMDGPU::V_LSHLREV_B64:
2982   case AMDGPU::V_LSHLREV_B64_gfx10:
2983   case AMDGPU::V_LSHL_B64:
2984   case AMDGPU::V_LSHRREV_B64:
2985   case AMDGPU::V_LSHRREV_B64_gfx10:
2986   case AMDGPU::V_LSHR_B64:
2987   case AMDGPU::V_ASHRREV_I64:
2988   case AMDGPU::V_ASHRREV_I64_gfx10:
2989   case AMDGPU::V_ASHR_I64:
2990     return 1;
2991   default:
2992     return 2;
2993   }
2994 }
2995 
2996 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2997   const MCOperand &MO = Inst.getOperand(OpIdx);
2998   if (MO.isImm()) {
2999     return !isInlineConstant(Inst, OpIdx);
3000   } else if (MO.isReg()) {
3001     auto Reg = MO.getReg();
3002     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3003     auto PReg = mc2PseudoReg(Reg);
3004     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3005   } else {
3006     return true;
3007   }
3008 }
3009 
3010 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
3011   const unsigned Opcode = Inst.getOpcode();
3012   const MCInstrDesc &Desc = MII.get(Opcode);
3013   unsigned ConstantBusUseCount = 0;
3014   unsigned NumLiterals = 0;
3015   unsigned LiteralSize;
3016 
3017   if (Desc.TSFlags &
3018       (SIInstrFlags::VOPC |
3019        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3020        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3021        SIInstrFlags::SDWA)) {
3022     // Check special imm operands (used by madmk, etc)
3023     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3024       ++ConstantBusUseCount;
3025     }
3026 
3027     SmallDenseSet<unsigned> SGPRsUsed;
3028     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3029     if (SGPRUsed != AMDGPU::NoRegister) {
3030       SGPRsUsed.insert(SGPRUsed);
3031       ++ConstantBusUseCount;
3032     }
3033 
3034     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3035     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3036     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3037 
3038     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3039 
3040     for (int OpIdx : OpIndices) {
3041       if (OpIdx == -1) break;
3042 
3043       const MCOperand &MO = Inst.getOperand(OpIdx);
3044       if (usesConstantBus(Inst, OpIdx)) {
3045         if (MO.isReg()) {
3046           const unsigned Reg = mc2PseudoReg(MO.getReg());
3047           // Pairs of registers with a partial intersections like these
3048           //   s0, s[0:1]
3049           //   flat_scratch_lo, flat_scratch
3050           //   flat_scratch_lo, flat_scratch_hi
3051           // are theoretically valid but they are disabled anyway.
3052           // Note that this code mimics SIInstrInfo::verifyInstruction
3053           if (!SGPRsUsed.count(Reg)) {
3054             SGPRsUsed.insert(Reg);
3055             ++ConstantBusUseCount;
3056           }
3057         } else { // Expression or a literal
3058 
3059           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3060             continue; // special operand like VINTERP attr_chan
3061 
3062           // An instruction may use only one literal.
3063           // This has been validated on the previous step.
3064           // See validateVOP3Literal.
3065           // This literal may be used as more than one operand.
3066           // If all these operands are of the same size,
3067           // this literal counts as one scalar value.
3068           // Otherwise it counts as 2 scalar values.
3069           // See "GFX10 Shader Programming", section 3.6.2.3.
3070 
3071           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3072           if (Size < 4) Size = 4;
3073 
3074           if (NumLiterals == 0) {
3075             NumLiterals = 1;
3076             LiteralSize = Size;
3077           } else if (LiteralSize != Size) {
3078             NumLiterals = 2;
3079           }
3080         }
3081       }
3082     }
3083   }
3084   ConstantBusUseCount += NumLiterals;
3085 
3086   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
3087 }
3088 
3089 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
3090   const unsigned Opcode = Inst.getOpcode();
3091   const MCInstrDesc &Desc = MII.get(Opcode);
3092 
3093   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3094   if (DstIdx == -1 ||
3095       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3096     return true;
3097   }
3098 
3099   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3100 
3101   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3102   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3103   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3104 
3105   assert(DstIdx != -1);
3106   const MCOperand &Dst = Inst.getOperand(DstIdx);
3107   assert(Dst.isReg());
3108   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3109 
3110   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3111 
3112   for (int SrcIdx : SrcIndices) {
3113     if (SrcIdx == -1) break;
3114     const MCOperand &Src = Inst.getOperand(SrcIdx);
3115     if (Src.isReg()) {
3116       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3117       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3118         return false;
3119       }
3120     }
3121   }
3122 
3123   return true;
3124 }
3125 
3126 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3127 
3128   const unsigned Opc = Inst.getOpcode();
3129   const MCInstrDesc &Desc = MII.get(Opc);
3130 
3131   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3132     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3133     assert(ClampIdx != -1);
3134     return Inst.getOperand(ClampIdx).getImm() == 0;
3135   }
3136 
3137   return true;
3138 }
3139 
3140 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3141 
3142   const unsigned Opc = Inst.getOpcode();
3143   const MCInstrDesc &Desc = MII.get(Opc);
3144 
3145   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3146     return true;
3147 
3148   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3149   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3150   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3151 
3152   assert(VDataIdx != -1);
3153 
3154   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3155     return true;
3156 
3157   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3158   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3159   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3160   if (DMask == 0)
3161     DMask = 1;
3162 
3163   unsigned DataSize =
3164     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3165   if (hasPackedD16()) {
3166     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3167     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3168       DataSize = (DataSize + 1) / 2;
3169   }
3170 
3171   return (VDataSize / 4) == DataSize + TFESize;
3172 }
3173 
3174 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3175   const unsigned Opc = Inst.getOpcode();
3176   const MCInstrDesc &Desc = MII.get(Opc);
3177 
3178   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
3179     return true;
3180 
3181   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3182 
3183   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3184       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3185   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3186   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3187   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3188 
3189   assert(VAddr0Idx != -1);
3190   assert(SrsrcIdx != -1);
3191   assert(SrsrcIdx > VAddr0Idx);
3192 
3193   if (DimIdx == -1)
3194     return true; // intersect_ray
3195 
3196   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3197   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3198   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3199   unsigned VAddrSize =
3200       IsNSA ? SrsrcIdx - VAddr0Idx
3201             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3202 
3203   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3204                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3205                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3206                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3207   if (!IsNSA) {
3208     if (AddrSize > 8)
3209       AddrSize = 16;
3210     else if (AddrSize > 4)
3211       AddrSize = 8;
3212   }
3213 
3214   return VAddrSize == AddrSize;
3215 }
3216 
3217 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3218 
3219   const unsigned Opc = Inst.getOpcode();
3220   const MCInstrDesc &Desc = MII.get(Opc);
3221 
3222   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3223     return true;
3224   if (!Desc.mayLoad() || !Desc.mayStore())
3225     return true; // Not atomic
3226 
3227   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3228   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3229 
3230   // This is an incomplete check because image_atomic_cmpswap
3231   // may only use 0x3 and 0xf while other atomic operations
3232   // may use 0x1 and 0x3. However these limitations are
3233   // verified when we check that dmask matches dst size.
3234   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3235 }
3236 
3237 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3238 
3239   const unsigned Opc = Inst.getOpcode();
3240   const MCInstrDesc &Desc = MII.get(Opc);
3241 
3242   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3243     return true;
3244 
3245   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3246   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3247 
3248   // GATHER4 instructions use dmask in a different fashion compared to
3249   // other MIMG instructions. The only useful DMASK values are
3250   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3251   // (red,red,red,red) etc.) The ISA document doesn't mention
3252   // this.
3253   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3254 }
3255 
3256 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3257 {
3258   switch (Opcode) {
3259   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3260   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3261   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3262     return true;
3263   default:
3264     return false;
3265   }
3266 }
3267 
3268 // movrels* opcodes should only allow VGPRS as src0.
3269 // This is specified in .td description for vop1/vop3,
3270 // but sdwa is handled differently. See isSDWAOperand.
3271 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3272 
3273   const unsigned Opc = Inst.getOpcode();
3274   const MCInstrDesc &Desc = MII.get(Opc);
3275 
3276   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3277     return true;
3278 
3279   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3280   assert(Src0Idx != -1);
3281 
3282   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3283   if (!Src0.isReg())
3284     return false;
3285 
3286   auto Reg = Src0.getReg();
3287   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3288   return !isSGPR(mc2PseudoReg(Reg), TRI);
3289 }
3290 
3291 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) {
3292 
3293   const unsigned Opc = Inst.getOpcode();
3294 
3295   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3296     return true;
3297 
3298   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3299   assert(Src0Idx != -1);
3300 
3301   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3302   if (!Src0.isReg())
3303     return true;
3304 
3305   auto Reg = Src0.getReg();
3306   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3307   if (isSGPR(mc2PseudoReg(Reg), TRI)) {
3308     Error(getLoc(), "source operand must be either a VGPR or an inline constant");
3309     return false;
3310   }
3311 
3312   return true;
3313 }
3314 
3315 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3316   switch (Inst.getOpcode()) {
3317   default:
3318     return true;
3319   case V_DIV_SCALE_F32_gfx6_gfx7:
3320   case V_DIV_SCALE_F32_vi:
3321   case V_DIV_SCALE_F32_gfx10:
3322   case V_DIV_SCALE_F64_gfx6_gfx7:
3323   case V_DIV_SCALE_F64_vi:
3324   case V_DIV_SCALE_F64_gfx10:
3325     break;
3326   }
3327 
3328   // TODO: Check that src0 = src1 or src2.
3329 
3330   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3331                     AMDGPU::OpName::src2_modifiers,
3332                     AMDGPU::OpName::src2_modifiers}) {
3333     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3334             .getImm() &
3335         SISrcMods::ABS) {
3336       Error(getLoc(), "ABS not allowed in VOP3B instructions");
3337       return false;
3338     }
3339   }
3340 
3341   return true;
3342 }
3343 
3344 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3345 
3346   const unsigned Opc = Inst.getOpcode();
3347   const MCInstrDesc &Desc = MII.get(Opc);
3348 
3349   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3350     return true;
3351 
3352   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3353   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3354     if (isCI() || isSI())
3355       return false;
3356   }
3357 
3358   return true;
3359 }
3360 
3361 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3362   const unsigned Opc = Inst.getOpcode();
3363   const MCInstrDesc &Desc = MII.get(Opc);
3364 
3365   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3366     return true;
3367 
3368   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3369   if (DimIdx < 0)
3370     return true;
3371 
3372   long Imm = Inst.getOperand(DimIdx).getImm();
3373   if (Imm < 0 || Imm >= 8)
3374     return false;
3375 
3376   return true;
3377 }
3378 
3379 static bool IsRevOpcode(const unsigned Opcode)
3380 {
3381   switch (Opcode) {
3382   case AMDGPU::V_SUBREV_F32_e32:
3383   case AMDGPU::V_SUBREV_F32_e64:
3384   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3385   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3386   case AMDGPU::V_SUBREV_F32_e32_vi:
3387   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3388   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3389   case AMDGPU::V_SUBREV_F32_e64_vi:
3390 
3391   case AMDGPU::V_SUBREV_CO_U32_e32:
3392   case AMDGPU::V_SUBREV_CO_U32_e64:
3393   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3394   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3395 
3396   case AMDGPU::V_SUBBREV_U32_e32:
3397   case AMDGPU::V_SUBBREV_U32_e64:
3398   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3399   case AMDGPU::V_SUBBREV_U32_e32_vi:
3400   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3401   case AMDGPU::V_SUBBREV_U32_e64_vi:
3402 
3403   case AMDGPU::V_SUBREV_U32_e32:
3404   case AMDGPU::V_SUBREV_U32_e64:
3405   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3406   case AMDGPU::V_SUBREV_U32_e32_vi:
3407   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3408   case AMDGPU::V_SUBREV_U32_e64_vi:
3409 
3410   case AMDGPU::V_SUBREV_F16_e32:
3411   case AMDGPU::V_SUBREV_F16_e64:
3412   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3413   case AMDGPU::V_SUBREV_F16_e32_vi:
3414   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3415   case AMDGPU::V_SUBREV_F16_e64_vi:
3416 
3417   case AMDGPU::V_SUBREV_U16_e32:
3418   case AMDGPU::V_SUBREV_U16_e64:
3419   case AMDGPU::V_SUBREV_U16_e32_vi:
3420   case AMDGPU::V_SUBREV_U16_e64_vi:
3421 
3422   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3423   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3424   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3425 
3426   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3427   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3428 
3429   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3430   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3431 
3432   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3433   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3434 
3435   case AMDGPU::V_LSHRREV_B32_e32:
3436   case AMDGPU::V_LSHRREV_B32_e64:
3437   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3438   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3439   case AMDGPU::V_LSHRREV_B32_e32_vi:
3440   case AMDGPU::V_LSHRREV_B32_e64_vi:
3441   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3442   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3443 
3444   case AMDGPU::V_ASHRREV_I32_e32:
3445   case AMDGPU::V_ASHRREV_I32_e64:
3446   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3447   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3448   case AMDGPU::V_ASHRREV_I32_e32_vi:
3449   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3450   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3451   case AMDGPU::V_ASHRREV_I32_e64_vi:
3452 
3453   case AMDGPU::V_LSHLREV_B32_e32:
3454   case AMDGPU::V_LSHLREV_B32_e64:
3455   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3456   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3457   case AMDGPU::V_LSHLREV_B32_e32_vi:
3458   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3459   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3460   case AMDGPU::V_LSHLREV_B32_e64_vi:
3461 
3462   case AMDGPU::V_LSHLREV_B16_e32:
3463   case AMDGPU::V_LSHLREV_B16_e64:
3464   case AMDGPU::V_LSHLREV_B16_e32_vi:
3465   case AMDGPU::V_LSHLREV_B16_e64_vi:
3466   case AMDGPU::V_LSHLREV_B16_gfx10:
3467 
3468   case AMDGPU::V_LSHRREV_B16_e32:
3469   case AMDGPU::V_LSHRREV_B16_e64:
3470   case AMDGPU::V_LSHRREV_B16_e32_vi:
3471   case AMDGPU::V_LSHRREV_B16_e64_vi:
3472   case AMDGPU::V_LSHRREV_B16_gfx10:
3473 
3474   case AMDGPU::V_ASHRREV_I16_e32:
3475   case AMDGPU::V_ASHRREV_I16_e64:
3476   case AMDGPU::V_ASHRREV_I16_e32_vi:
3477   case AMDGPU::V_ASHRREV_I16_e64_vi:
3478   case AMDGPU::V_ASHRREV_I16_gfx10:
3479 
3480   case AMDGPU::V_LSHLREV_B64:
3481   case AMDGPU::V_LSHLREV_B64_gfx10:
3482   case AMDGPU::V_LSHLREV_B64_vi:
3483 
3484   case AMDGPU::V_LSHRREV_B64:
3485   case AMDGPU::V_LSHRREV_B64_gfx10:
3486   case AMDGPU::V_LSHRREV_B64_vi:
3487 
3488   case AMDGPU::V_ASHRREV_I64:
3489   case AMDGPU::V_ASHRREV_I64_gfx10:
3490   case AMDGPU::V_ASHRREV_I64_vi:
3491 
3492   case AMDGPU::V_PK_LSHLREV_B16:
3493   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3494   case AMDGPU::V_PK_LSHLREV_B16_vi:
3495 
3496   case AMDGPU::V_PK_LSHRREV_B16:
3497   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3498   case AMDGPU::V_PK_LSHRREV_B16_vi:
3499   case AMDGPU::V_PK_ASHRREV_I16:
3500   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3501   case AMDGPU::V_PK_ASHRREV_I16_vi:
3502     return true;
3503   default:
3504     return false;
3505   }
3506 }
3507 
3508 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3509 
3510   using namespace SIInstrFlags;
3511   const unsigned Opcode = Inst.getOpcode();
3512   const MCInstrDesc &Desc = MII.get(Opcode);
3513 
3514   // lds_direct register is defined so that it can be used
3515   // with 9-bit operands only. Ignore encodings which do not accept these.
3516   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3517     return true;
3518 
3519   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3520   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3521   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3522 
3523   const int SrcIndices[] = { Src1Idx, Src2Idx };
3524 
3525   // lds_direct cannot be specified as either src1 or src2.
3526   for (int SrcIdx : SrcIndices) {
3527     if (SrcIdx == -1) break;
3528     const MCOperand &Src = Inst.getOperand(SrcIdx);
3529     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3530       return false;
3531     }
3532   }
3533 
3534   if (Src0Idx == -1)
3535     return true;
3536 
3537   const MCOperand &Src = Inst.getOperand(Src0Idx);
3538   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3539     return true;
3540 
3541   // lds_direct is specified as src0. Check additional limitations.
3542   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3543 }
3544 
3545 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3546   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3547     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3548     if (Op.isFlatOffset())
3549       return Op.getStartLoc();
3550   }
3551   return getLoc();
3552 }
3553 
3554 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3555                                          const OperandVector &Operands) {
3556   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3557   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3558     return true;
3559 
3560   auto Opcode = Inst.getOpcode();
3561   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3562   assert(OpNum != -1);
3563 
3564   const auto &Op = Inst.getOperand(OpNum);
3565   if (!hasFlatOffsets() && Op.getImm() != 0) {
3566     Error(getFlatOffsetLoc(Operands),
3567           "flat offset modifier is not supported on this GPU");
3568     return false;
3569   }
3570 
3571   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3572   // For FLAT segment the offset must be positive;
3573   // MSB is ignored and forced to zero.
3574   unsigned OffsetSize = isGFX9() ? 13 : 12;
3575   if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) {
3576     if (!isIntN(OffsetSize, Op.getImm())) {
3577       Error(getFlatOffsetLoc(Operands),
3578             isGFX9() ? "expected a 13-bit signed offset" :
3579                        "expected a 12-bit signed offset");
3580       return false;
3581     }
3582   } else {
3583     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3584       Error(getFlatOffsetLoc(Operands),
3585             isGFX9() ? "expected a 12-bit unsigned offset" :
3586                        "expected an 11-bit unsigned offset");
3587       return false;
3588     }
3589   }
3590 
3591   return true;
3592 }
3593 
3594 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3595   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3596     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3597     if (Op.isSMEMOffset())
3598       return Op.getStartLoc();
3599   }
3600   return getLoc();
3601 }
3602 
3603 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3604                                          const OperandVector &Operands) {
3605   if (isCI() || isSI())
3606     return true;
3607 
3608   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3609   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3610     return true;
3611 
3612   auto Opcode = Inst.getOpcode();
3613   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3614   if (OpNum == -1)
3615     return true;
3616 
3617   const auto &Op = Inst.getOperand(OpNum);
3618   if (!Op.isImm())
3619     return true;
3620 
3621   uint64_t Offset = Op.getImm();
3622   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3623   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3624       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3625     return true;
3626 
3627   Error(getSMEMOffsetLoc(Operands),
3628         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3629                                "expected a 21-bit signed offset");
3630 
3631   return false;
3632 }
3633 
3634 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3635   unsigned Opcode = Inst.getOpcode();
3636   const MCInstrDesc &Desc = MII.get(Opcode);
3637   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3638     return true;
3639 
3640   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3641   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3642 
3643   const int OpIndices[] = { Src0Idx, Src1Idx };
3644 
3645   unsigned NumExprs = 0;
3646   unsigned NumLiterals = 0;
3647   uint32_t LiteralValue;
3648 
3649   for (int OpIdx : OpIndices) {
3650     if (OpIdx == -1) break;
3651 
3652     const MCOperand &MO = Inst.getOperand(OpIdx);
3653     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3654     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3655       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3656         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3657         if (NumLiterals == 0 || LiteralValue != Value) {
3658           LiteralValue = Value;
3659           ++NumLiterals;
3660         }
3661       } else if (MO.isExpr()) {
3662         ++NumExprs;
3663       }
3664     }
3665   }
3666 
3667   return NumLiterals + NumExprs <= 1;
3668 }
3669 
3670 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3671   const unsigned Opc = Inst.getOpcode();
3672   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3673       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3674     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3675     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3676 
3677     if (OpSel & ~3)
3678       return false;
3679   }
3680   return true;
3681 }
3682 
3683 // Check if VCC register matches wavefront size
3684 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3685   auto FB = getFeatureBits();
3686   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3687     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3688 }
3689 
3690 // VOP3 literal is only allowed in GFX10+ and only one can be used
3691 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3692   unsigned Opcode = Inst.getOpcode();
3693   const MCInstrDesc &Desc = MII.get(Opcode);
3694   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3695     return true;
3696 
3697   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3698   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3699   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3700 
3701   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3702 
3703   unsigned NumExprs = 0;
3704   unsigned NumLiterals = 0;
3705   uint32_t LiteralValue;
3706 
3707   for (int OpIdx : OpIndices) {
3708     if (OpIdx == -1) break;
3709 
3710     const MCOperand &MO = Inst.getOperand(OpIdx);
3711     if (!MO.isImm() && !MO.isExpr())
3712       continue;
3713     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3714       continue;
3715 
3716     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3717         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3718       return false;
3719 
3720     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3721       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3722       if (NumLiterals == 0 || LiteralValue != Value) {
3723         LiteralValue = Value;
3724         ++NumLiterals;
3725       }
3726     } else if (MO.isExpr()) {
3727       ++NumExprs;
3728     }
3729   }
3730   NumLiterals += NumExprs;
3731 
3732   return !NumLiterals ||
3733          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3734 }
3735 
3736 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
3737                                             const OperandVector &Operands,
3738                                             const SMLoc &IDLoc) {
3739   int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
3740                                           AMDGPU::OpName::glc1);
3741   if (GLCPos != -1) {
3742     // -1 is set by GLC_1 default operand. In all cases "glc" must be present
3743     // in the asm string, and the default value means it is not present.
3744     if (Inst.getOperand(GLCPos).getImm() == -1) {
3745       Error(IDLoc, "instruction must use glc");
3746       return false;
3747     }
3748   }
3749 
3750   return true;
3751 }
3752 
3753 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3754                                           const SMLoc &IDLoc,
3755                                           const OperandVector &Operands) {
3756   if (!validateLdsDirect(Inst)) {
3757     Error(IDLoc,
3758       "invalid use of lds_direct");
3759     return false;
3760   }
3761   if (!validateSOPLiteral(Inst)) {
3762     Error(IDLoc,
3763       "only one literal operand is allowed");
3764     return false;
3765   }
3766   if (!validateVOP3Literal(Inst)) {
3767     Error(IDLoc,
3768       "invalid literal operand");
3769     return false;
3770   }
3771   if (!validateConstantBusLimitations(Inst)) {
3772     Error(IDLoc,
3773       "invalid operand (violates constant bus restrictions)");
3774     return false;
3775   }
3776   if (!validateEarlyClobberLimitations(Inst)) {
3777     Error(IDLoc,
3778       "destination must be different than all sources");
3779     return false;
3780   }
3781   if (!validateIntClampSupported(Inst)) {
3782     Error(IDLoc,
3783       "integer clamping is not supported on this GPU");
3784     return false;
3785   }
3786   if (!validateOpSel(Inst)) {
3787     Error(IDLoc,
3788       "invalid op_sel operand");
3789     return false;
3790   }
3791   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3792   if (!validateMIMGD16(Inst)) {
3793     Error(IDLoc,
3794       "d16 modifier is not supported on this GPU");
3795     return false;
3796   }
3797   if (!validateMIMGDim(Inst)) {
3798     Error(IDLoc, "dim modifier is required on this GPU");
3799     return false;
3800   }
3801   if (!validateMIMGDataSize(Inst)) {
3802     Error(IDLoc,
3803       "image data size does not match dmask and tfe");
3804     return false;
3805   }
3806   if (!validateMIMGAddrSize(Inst)) {
3807     Error(IDLoc,
3808       "image address size does not match dim and a16");
3809     return false;
3810   }
3811   if (!validateMIMGAtomicDMask(Inst)) {
3812     Error(IDLoc,
3813       "invalid atomic image dmask");
3814     return false;
3815   }
3816   if (!validateMIMGGatherDMask(Inst)) {
3817     Error(IDLoc,
3818       "invalid image_gather dmask: only one bit must be set");
3819     return false;
3820   }
3821   if (!validateMovrels(Inst)) {
3822     Error(IDLoc, "source operand must be a VGPR");
3823     return false;
3824   }
3825   if (!validateFlatOffset(Inst, Operands)) {
3826     return false;
3827   }
3828   if (!validateSMEMOffset(Inst, Operands)) {
3829     return false;
3830   }
3831   if (!validateMAIAccWrite(Inst)) {
3832     return false;
3833   }
3834   if (!validateDivScale(Inst)) {
3835     return false;
3836   }
3837   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
3838     return false;
3839   }
3840 
3841   return true;
3842 }
3843 
3844 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3845                                             const FeatureBitset &FBS,
3846                                             unsigned VariantID = 0);
3847 
3848 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
3849                                 const FeatureBitset &AvailableFeatures,
3850                                 unsigned VariantID);
3851 
3852 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3853                                        const FeatureBitset &FBS) {
3854   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
3855 }
3856 
3857 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3858                                        const FeatureBitset &FBS,
3859                                        ArrayRef<unsigned> Variants) {
3860   for (auto Variant : Variants) {
3861     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
3862       return true;
3863   }
3864 
3865   return false;
3866 }
3867 
3868 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
3869                                                   const SMLoc &IDLoc) {
3870   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3871 
3872   // Check if requested instruction variant is supported.
3873   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
3874     return false;
3875 
3876   // This instruction is not supported.
3877   // Clear any other pending errors because they are no longer relevant.
3878   getParser().clearPendingErrors();
3879 
3880   // Requested instruction variant is not supported.
3881   // Check if any other variants are supported.
3882   StringRef VariantName = getMatchedVariantName();
3883   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
3884     return Error(IDLoc,
3885                  Twine(VariantName,
3886                        " variant of this instruction is not supported"));
3887   }
3888 
3889   // Finally check if this instruction is supported on any other GPU.
3890   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
3891     return Error(IDLoc, "instruction not supported on this GPU");
3892   }
3893 
3894   // Instruction not supported on any GPU. Probably a typo.
3895   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
3896   return Error(IDLoc, "invalid instruction" + Suggestion);
3897 }
3898 
3899 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3900                                               OperandVector &Operands,
3901                                               MCStreamer &Out,
3902                                               uint64_t &ErrorInfo,
3903                                               bool MatchingInlineAsm) {
3904   MCInst Inst;
3905   unsigned Result = Match_Success;
3906   for (auto Variant : getMatchedVariants()) {
3907     uint64_t EI;
3908     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3909                                   Variant);
3910     // We order match statuses from least to most specific. We use most specific
3911     // status as resulting
3912     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3913     if ((R == Match_Success) ||
3914         (R == Match_PreferE32) ||
3915         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3916         (R == Match_InvalidOperand && Result != Match_MissingFeature
3917                                    && Result != Match_PreferE32) ||
3918         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3919                                    && Result != Match_MissingFeature
3920                                    && Result != Match_PreferE32)) {
3921       Result = R;
3922       ErrorInfo = EI;
3923     }
3924     if (R == Match_Success)
3925       break;
3926   }
3927 
3928   if (Result == Match_Success) {
3929     if (!validateInstruction(Inst, IDLoc, Operands)) {
3930       return true;
3931     }
3932     Inst.setLoc(IDLoc);
3933     Out.emitInstruction(Inst, getSTI());
3934     return false;
3935   }
3936 
3937   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
3938   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
3939     return true;
3940   }
3941 
3942   switch (Result) {
3943   default: break;
3944   case Match_MissingFeature:
3945     // It has been verified that the specified instruction
3946     // mnemonic is valid. A match was found but it requires
3947     // features which are not supported on this GPU.
3948     return Error(IDLoc, "operands are not valid for this GPU or mode");
3949 
3950   case Match_InvalidOperand: {
3951     SMLoc ErrorLoc = IDLoc;
3952     if (ErrorInfo != ~0ULL) {
3953       if (ErrorInfo >= Operands.size()) {
3954         return Error(IDLoc, "too few operands for instruction");
3955       }
3956       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3957       if (ErrorLoc == SMLoc())
3958         ErrorLoc = IDLoc;
3959     }
3960     return Error(ErrorLoc, "invalid operand for instruction");
3961   }
3962 
3963   case Match_PreferE32:
3964     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3965                         "should be encoded as e32");
3966   case Match_MnemonicFail:
3967     llvm_unreachable("Invalid instructions should have been handled already");
3968   }
3969   llvm_unreachable("Implement any new match types added!");
3970 }
3971 
3972 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3973   int64_t Tmp = -1;
3974   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3975     return true;
3976   }
3977   if (getParser().parseAbsoluteExpression(Tmp)) {
3978     return true;
3979   }
3980   Ret = static_cast<uint32_t>(Tmp);
3981   return false;
3982 }
3983 
3984 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3985                                                uint32_t &Minor) {
3986   if (ParseAsAbsoluteExpression(Major))
3987     return TokError("invalid major version");
3988 
3989   if (getLexer().isNot(AsmToken::Comma))
3990     return TokError("minor version number required, comma expected");
3991   Lex();
3992 
3993   if (ParseAsAbsoluteExpression(Minor))
3994     return TokError("invalid minor version");
3995 
3996   return false;
3997 }
3998 
3999 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4000   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4001     return TokError("directive only supported for amdgcn architecture");
4002 
4003   std::string Target;
4004 
4005   SMLoc TargetStart = getTok().getLoc();
4006   if (getParser().parseEscapedString(Target))
4007     return true;
4008   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4009 
4010   std::string ExpectedTarget;
4011   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
4012   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
4013 
4014   if (Target != ExpectedTargetOS.str())
4015     return getParser().Error(TargetRange.Start, "target must match options",
4016                              TargetRange);
4017 
4018   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
4019   return false;
4020 }
4021 
4022 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4023   return getParser().Error(Range.Start, "value out of range", Range);
4024 }
4025 
4026 bool AMDGPUAsmParser::calculateGPRBlocks(
4027     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4028     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4029     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4030     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4031   // TODO(scott.linder): These calculations are duplicated from
4032   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4033   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4034 
4035   unsigned NumVGPRs = NextFreeVGPR;
4036   unsigned NumSGPRs = NextFreeSGPR;
4037 
4038   if (Version.Major >= 10)
4039     NumSGPRs = 0;
4040   else {
4041     unsigned MaxAddressableNumSGPRs =
4042         IsaInfo::getAddressableNumSGPRs(&getSTI());
4043 
4044     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4045         NumSGPRs > MaxAddressableNumSGPRs)
4046       return OutOfRangeError(SGPRRange);
4047 
4048     NumSGPRs +=
4049         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4050 
4051     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4052         NumSGPRs > MaxAddressableNumSGPRs)
4053       return OutOfRangeError(SGPRRange);
4054 
4055     if (Features.test(FeatureSGPRInitBug))
4056       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4057   }
4058 
4059   VGPRBlocks =
4060       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4061   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4062 
4063   return false;
4064 }
4065 
4066 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4067   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4068     return TokError("directive only supported for amdgcn architecture");
4069 
4070   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4071     return TokError("directive only supported for amdhsa OS");
4072 
4073   StringRef KernelName;
4074   if (getParser().parseIdentifier(KernelName))
4075     return true;
4076 
4077   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4078 
4079   StringSet<> Seen;
4080 
4081   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4082 
4083   SMRange VGPRRange;
4084   uint64_t NextFreeVGPR = 0;
4085   SMRange SGPRRange;
4086   uint64_t NextFreeSGPR = 0;
4087   unsigned UserSGPRCount = 0;
4088   bool ReserveVCC = true;
4089   bool ReserveFlatScr = true;
4090   bool ReserveXNACK = hasXNACK();
4091   Optional<bool> EnableWavefrontSize32;
4092 
4093   while (true) {
4094     while (getLexer().is(AsmToken::EndOfStatement))
4095       Lex();
4096 
4097     if (getLexer().isNot(AsmToken::Identifier))
4098       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
4099 
4100     StringRef ID = getTok().getIdentifier();
4101     SMRange IDRange = getTok().getLocRange();
4102     Lex();
4103 
4104     if (ID == ".end_amdhsa_kernel")
4105       break;
4106 
4107     if (Seen.find(ID) != Seen.end())
4108       return TokError(".amdhsa_ directives cannot be repeated");
4109     Seen.insert(ID);
4110 
4111     SMLoc ValStart = getTok().getLoc();
4112     int64_t IVal;
4113     if (getParser().parseAbsoluteExpression(IVal))
4114       return true;
4115     SMLoc ValEnd = getTok().getLoc();
4116     SMRange ValRange = SMRange(ValStart, ValEnd);
4117 
4118     if (IVal < 0)
4119       return OutOfRangeError(ValRange);
4120 
4121     uint64_t Val = IVal;
4122 
4123 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4124   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4125     return OutOfRangeError(RANGE);                                             \
4126   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4127 
4128     if (ID == ".amdhsa_group_segment_fixed_size") {
4129       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4130         return OutOfRangeError(ValRange);
4131       KD.group_segment_fixed_size = Val;
4132     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4133       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4134         return OutOfRangeError(ValRange);
4135       KD.private_segment_fixed_size = Val;
4136     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4137       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4138                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4139                        Val, ValRange);
4140       if (Val)
4141         UserSGPRCount += 4;
4142     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4143       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4144                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4145                        ValRange);
4146       if (Val)
4147         UserSGPRCount += 2;
4148     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4149       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4150                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4151                        ValRange);
4152       if (Val)
4153         UserSGPRCount += 2;
4154     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4155       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4156                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4157                        Val, ValRange);
4158       if (Val)
4159         UserSGPRCount += 2;
4160     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4161       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4162                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4163                        ValRange);
4164       if (Val)
4165         UserSGPRCount += 2;
4166     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4167       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4168                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4169                        ValRange);
4170       if (Val)
4171         UserSGPRCount += 2;
4172     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4173       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4174                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4175                        Val, ValRange);
4176       if (Val)
4177         UserSGPRCount += 1;
4178     } else if (ID == ".amdhsa_wavefront_size32") {
4179       if (IVersion.Major < 10)
4180         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4181                                  IDRange);
4182       EnableWavefrontSize32 = Val;
4183       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4184                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4185                        Val, ValRange);
4186     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4187       PARSE_BITS_ENTRY(
4188           KD.compute_pgm_rsrc2,
4189           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
4190           ValRange);
4191     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4192       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4193                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4194                        ValRange);
4195     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4196       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4197                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4198                        ValRange);
4199     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4200       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4201                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4202                        ValRange);
4203     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4204       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4205                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4206                        ValRange);
4207     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4208       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4209                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4210                        ValRange);
4211     } else if (ID == ".amdhsa_next_free_vgpr") {
4212       VGPRRange = ValRange;
4213       NextFreeVGPR = Val;
4214     } else if (ID == ".amdhsa_next_free_sgpr") {
4215       SGPRRange = ValRange;
4216       NextFreeSGPR = Val;
4217     } else if (ID == ".amdhsa_reserve_vcc") {
4218       if (!isUInt<1>(Val))
4219         return OutOfRangeError(ValRange);
4220       ReserveVCC = Val;
4221     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4222       if (IVersion.Major < 7)
4223         return getParser().Error(IDRange.Start, "directive requires gfx7+",
4224                                  IDRange);
4225       if (!isUInt<1>(Val))
4226         return OutOfRangeError(ValRange);
4227       ReserveFlatScr = Val;
4228     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4229       if (IVersion.Major < 8)
4230         return getParser().Error(IDRange.Start, "directive requires gfx8+",
4231                                  IDRange);
4232       if (!isUInt<1>(Val))
4233         return OutOfRangeError(ValRange);
4234       ReserveXNACK = Val;
4235     } else if (ID == ".amdhsa_float_round_mode_32") {
4236       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4237                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4238     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4239       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4240                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4241     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4242       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4243                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4244     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4245       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4246                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4247                        ValRange);
4248     } else if (ID == ".amdhsa_dx10_clamp") {
4249       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4250                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4251     } else if (ID == ".amdhsa_ieee_mode") {
4252       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4253                        Val, ValRange);
4254     } else if (ID == ".amdhsa_fp16_overflow") {
4255       if (IVersion.Major < 9)
4256         return getParser().Error(IDRange.Start, "directive requires gfx9+",
4257                                  IDRange);
4258       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4259                        ValRange);
4260     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4261       if (IVersion.Major < 10)
4262         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4263                                  IDRange);
4264       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4265                        ValRange);
4266     } else if (ID == ".amdhsa_memory_ordered") {
4267       if (IVersion.Major < 10)
4268         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4269                                  IDRange);
4270       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4271                        ValRange);
4272     } else if (ID == ".amdhsa_forward_progress") {
4273       if (IVersion.Major < 10)
4274         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4275                                  IDRange);
4276       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4277                        ValRange);
4278     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4279       PARSE_BITS_ENTRY(
4280           KD.compute_pgm_rsrc2,
4281           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4282           ValRange);
4283     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4284       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4285                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4286                        Val, ValRange);
4287     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4288       PARSE_BITS_ENTRY(
4289           KD.compute_pgm_rsrc2,
4290           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4291           ValRange);
4292     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4293       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4294                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4295                        Val, ValRange);
4296     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4297       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4298                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4299                        Val, ValRange);
4300     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4301       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4302                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4303                        Val, ValRange);
4304     } else if (ID == ".amdhsa_exception_int_div_zero") {
4305       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4306                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4307                        Val, ValRange);
4308     } else {
4309       return getParser().Error(IDRange.Start,
4310                                "unknown .amdhsa_kernel directive", IDRange);
4311     }
4312 
4313 #undef PARSE_BITS_ENTRY
4314   }
4315 
4316   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4317     return TokError(".amdhsa_next_free_vgpr directive is required");
4318 
4319   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4320     return TokError(".amdhsa_next_free_sgpr directive is required");
4321 
4322   unsigned VGPRBlocks;
4323   unsigned SGPRBlocks;
4324   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4325                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4326                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4327                          SGPRBlocks))
4328     return true;
4329 
4330   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4331           VGPRBlocks))
4332     return OutOfRangeError(VGPRRange);
4333   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4334                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4335 
4336   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4337           SGPRBlocks))
4338     return OutOfRangeError(SGPRRange);
4339   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4340                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4341                   SGPRBlocks);
4342 
4343   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4344     return TokError("too many user SGPRs enabled");
4345   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4346                   UserSGPRCount);
4347 
4348   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4349       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4350       ReserveFlatScr, ReserveXNACK);
4351   return false;
4352 }
4353 
4354 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4355   uint32_t Major;
4356   uint32_t Minor;
4357 
4358   if (ParseDirectiveMajorMinor(Major, Minor))
4359     return true;
4360 
4361   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4362   return false;
4363 }
4364 
4365 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4366   uint32_t Major;
4367   uint32_t Minor;
4368   uint32_t Stepping;
4369   StringRef VendorName;
4370   StringRef ArchName;
4371 
4372   // If this directive has no arguments, then use the ISA version for the
4373   // targeted GPU.
4374   if (getLexer().is(AsmToken::EndOfStatement)) {
4375     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4376     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4377                                                       ISA.Stepping,
4378                                                       "AMD", "AMDGPU");
4379     return false;
4380   }
4381 
4382   if (ParseDirectiveMajorMinor(Major, Minor))
4383     return true;
4384 
4385   if (getLexer().isNot(AsmToken::Comma))
4386     return TokError("stepping version number required, comma expected");
4387   Lex();
4388 
4389   if (ParseAsAbsoluteExpression(Stepping))
4390     return TokError("invalid stepping version");
4391 
4392   if (getLexer().isNot(AsmToken::Comma))
4393     return TokError("vendor name required, comma expected");
4394   Lex();
4395 
4396   if (getLexer().isNot(AsmToken::String))
4397     return TokError("invalid vendor name");
4398 
4399   VendorName = getLexer().getTok().getStringContents();
4400   Lex();
4401 
4402   if (getLexer().isNot(AsmToken::Comma))
4403     return TokError("arch name required, comma expected");
4404   Lex();
4405 
4406   if (getLexer().isNot(AsmToken::String))
4407     return TokError("invalid arch name");
4408 
4409   ArchName = getLexer().getTok().getStringContents();
4410   Lex();
4411 
4412   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4413                                                     VendorName, ArchName);
4414   return false;
4415 }
4416 
4417 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4418                                                amd_kernel_code_t &Header) {
4419   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4420   // assembly for backwards compatibility.
4421   if (ID == "max_scratch_backing_memory_byte_size") {
4422     Parser.eatToEndOfStatement();
4423     return false;
4424   }
4425 
4426   SmallString<40> ErrStr;
4427   raw_svector_ostream Err(ErrStr);
4428   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4429     return TokError(Err.str());
4430   }
4431   Lex();
4432 
4433   if (ID == "enable_wavefront_size32") {
4434     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4435       if (!isGFX10())
4436         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4437       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4438         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4439     } else {
4440       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4441         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4442     }
4443   }
4444 
4445   if (ID == "wavefront_size") {
4446     if (Header.wavefront_size == 5) {
4447       if (!isGFX10())
4448         return TokError("wavefront_size=5 is only allowed on GFX10+");
4449       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4450         return TokError("wavefront_size=5 requires +WavefrontSize32");
4451     } else if (Header.wavefront_size == 6) {
4452       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4453         return TokError("wavefront_size=6 requires +WavefrontSize64");
4454     }
4455   }
4456 
4457   if (ID == "enable_wgp_mode") {
4458     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4459       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4460   }
4461 
4462   if (ID == "enable_mem_ordered") {
4463     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4464       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4465   }
4466 
4467   if (ID == "enable_fwd_progress") {
4468     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4469       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4470   }
4471 
4472   return false;
4473 }
4474 
4475 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4476   amd_kernel_code_t Header;
4477   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4478 
4479   while (true) {
4480     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4481     // will set the current token to EndOfStatement.
4482     while(getLexer().is(AsmToken::EndOfStatement))
4483       Lex();
4484 
4485     if (getLexer().isNot(AsmToken::Identifier))
4486       return TokError("expected value identifier or .end_amd_kernel_code_t");
4487 
4488     StringRef ID = getLexer().getTok().getIdentifier();
4489     Lex();
4490 
4491     if (ID == ".end_amd_kernel_code_t")
4492       break;
4493 
4494     if (ParseAMDKernelCodeTValue(ID, Header))
4495       return true;
4496   }
4497 
4498   getTargetStreamer().EmitAMDKernelCodeT(Header);
4499 
4500   return false;
4501 }
4502 
4503 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4504   if (getLexer().isNot(AsmToken::Identifier))
4505     return TokError("expected symbol name");
4506 
4507   StringRef KernelName = Parser.getTok().getString();
4508 
4509   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4510                                            ELF::STT_AMDGPU_HSA_KERNEL);
4511   Lex();
4512 
4513   KernelScope.initialize(getContext());
4514   return false;
4515 }
4516 
4517 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4518   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4519     return Error(getParser().getTok().getLoc(),
4520                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4521                  "architectures");
4522   }
4523 
4524   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4525 
4526   std::string ISAVersionStringFromSTI;
4527   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4528   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4529 
4530   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4531     return Error(getParser().getTok().getLoc(),
4532                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4533                  "arguments specified through the command line");
4534   }
4535 
4536   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4537   Lex();
4538 
4539   return false;
4540 }
4541 
4542 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4543   const char *AssemblerDirectiveBegin;
4544   const char *AssemblerDirectiveEnd;
4545   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4546       isHsaAbiVersion3(&getSTI())
4547           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4548                             HSAMD::V3::AssemblerDirectiveEnd)
4549           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4550                             HSAMD::AssemblerDirectiveEnd);
4551 
4552   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4553     return Error(getParser().getTok().getLoc(),
4554                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4555                  "not available on non-amdhsa OSes")).str());
4556   }
4557 
4558   std::string HSAMetadataString;
4559   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4560                           HSAMetadataString))
4561     return true;
4562 
4563   if (isHsaAbiVersion3(&getSTI())) {
4564     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4565       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4566   } else {
4567     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4568       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4569   }
4570 
4571   return false;
4572 }
4573 
4574 /// Common code to parse out a block of text (typically YAML) between start and
4575 /// end directives.
4576 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4577                                           const char *AssemblerDirectiveEnd,
4578                                           std::string &CollectString) {
4579 
4580   raw_string_ostream CollectStream(CollectString);
4581 
4582   getLexer().setSkipSpace(false);
4583 
4584   bool FoundEnd = false;
4585   while (!getLexer().is(AsmToken::Eof)) {
4586     while (getLexer().is(AsmToken::Space)) {
4587       CollectStream << getLexer().getTok().getString();
4588       Lex();
4589     }
4590 
4591     if (getLexer().is(AsmToken::Identifier)) {
4592       StringRef ID = getLexer().getTok().getIdentifier();
4593       if (ID == AssemblerDirectiveEnd) {
4594         Lex();
4595         FoundEnd = true;
4596         break;
4597       }
4598     }
4599 
4600     CollectStream << Parser.parseStringToEndOfStatement()
4601                   << getContext().getAsmInfo()->getSeparatorString();
4602 
4603     Parser.eatToEndOfStatement();
4604   }
4605 
4606   getLexer().setSkipSpace(true);
4607 
4608   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4609     return TokError(Twine("expected directive ") +
4610                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4611   }
4612 
4613   CollectStream.flush();
4614   return false;
4615 }
4616 
4617 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4618 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4619   std::string String;
4620   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4621                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4622     return true;
4623 
4624   auto PALMetadata = getTargetStreamer().getPALMetadata();
4625   if (!PALMetadata->setFromString(String))
4626     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4627   return false;
4628 }
4629 
4630 /// Parse the assembler directive for old linear-format PAL metadata.
4631 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4632   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4633     return Error(getParser().getTok().getLoc(),
4634                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4635                  "not available on non-amdpal OSes")).str());
4636   }
4637 
4638   auto PALMetadata = getTargetStreamer().getPALMetadata();
4639   PALMetadata->setLegacy();
4640   for (;;) {
4641     uint32_t Key, Value;
4642     if (ParseAsAbsoluteExpression(Key)) {
4643       return TokError(Twine("invalid value in ") +
4644                       Twine(PALMD::AssemblerDirective));
4645     }
4646     if (getLexer().isNot(AsmToken::Comma)) {
4647       return TokError(Twine("expected an even number of values in ") +
4648                       Twine(PALMD::AssemblerDirective));
4649     }
4650     Lex();
4651     if (ParseAsAbsoluteExpression(Value)) {
4652       return TokError(Twine("invalid value in ") +
4653                       Twine(PALMD::AssemblerDirective));
4654     }
4655     PALMetadata->setRegister(Key, Value);
4656     if (getLexer().isNot(AsmToken::Comma))
4657       break;
4658     Lex();
4659   }
4660   return false;
4661 }
4662 
4663 /// ParseDirectiveAMDGPULDS
4664 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4665 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4666   if (getParser().checkForValidSection())
4667     return true;
4668 
4669   StringRef Name;
4670   SMLoc NameLoc = getLexer().getLoc();
4671   if (getParser().parseIdentifier(Name))
4672     return TokError("expected identifier in directive");
4673 
4674   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4675   if (parseToken(AsmToken::Comma, "expected ','"))
4676     return true;
4677 
4678   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4679 
4680   int64_t Size;
4681   SMLoc SizeLoc = getLexer().getLoc();
4682   if (getParser().parseAbsoluteExpression(Size))
4683     return true;
4684   if (Size < 0)
4685     return Error(SizeLoc, "size must be non-negative");
4686   if (Size > LocalMemorySize)
4687     return Error(SizeLoc, "size is too large");
4688 
4689   int64_t Alignment = 4;
4690   if (getLexer().is(AsmToken::Comma)) {
4691     Lex();
4692     SMLoc AlignLoc = getLexer().getLoc();
4693     if (getParser().parseAbsoluteExpression(Alignment))
4694       return true;
4695     if (Alignment < 0 || !isPowerOf2_64(Alignment))
4696       return Error(AlignLoc, "alignment must be a power of two");
4697 
4698     // Alignment larger than the size of LDS is possible in theory, as long
4699     // as the linker manages to place to symbol at address 0, but we do want
4700     // to make sure the alignment fits nicely into a 32-bit integer.
4701     if (Alignment >= 1u << 31)
4702       return Error(AlignLoc, "alignment is too large");
4703   }
4704 
4705   if (parseToken(AsmToken::EndOfStatement,
4706                  "unexpected token in '.amdgpu_lds' directive"))
4707     return true;
4708 
4709   Symbol->redefineIfPossible();
4710   if (!Symbol->isUndefined())
4711     return Error(NameLoc, "invalid symbol redefinition");
4712 
4713   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
4714   return false;
4715 }
4716 
4717 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4718   StringRef IDVal = DirectiveID.getString();
4719 
4720   if (isHsaAbiVersion3(&getSTI())) {
4721     if (IDVal == ".amdgcn_target")
4722       return ParseDirectiveAMDGCNTarget();
4723 
4724     if (IDVal == ".amdhsa_kernel")
4725       return ParseDirectiveAMDHSAKernel();
4726 
4727     // TODO: Restructure/combine with PAL metadata directive.
4728     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4729       return ParseDirectiveHSAMetadata();
4730   } else {
4731     if (IDVal == ".hsa_code_object_version")
4732       return ParseDirectiveHSACodeObjectVersion();
4733 
4734     if (IDVal == ".hsa_code_object_isa")
4735       return ParseDirectiveHSACodeObjectISA();
4736 
4737     if (IDVal == ".amd_kernel_code_t")
4738       return ParseDirectiveAMDKernelCodeT();
4739 
4740     if (IDVal == ".amdgpu_hsa_kernel")
4741       return ParseDirectiveAMDGPUHsaKernel();
4742 
4743     if (IDVal == ".amd_amdgpu_isa")
4744       return ParseDirectiveISAVersion();
4745 
4746     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4747       return ParseDirectiveHSAMetadata();
4748   }
4749 
4750   if (IDVal == ".amdgpu_lds")
4751     return ParseDirectiveAMDGPULDS();
4752 
4753   if (IDVal == PALMD::AssemblerDirectiveBegin)
4754     return ParseDirectivePALMetadataBegin();
4755 
4756   if (IDVal == PALMD::AssemblerDirective)
4757     return ParseDirectivePALMetadata();
4758 
4759   return true;
4760 }
4761 
4762 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4763                                            unsigned RegNo) const {
4764 
4765   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4766        R.isValid(); ++R) {
4767     if (*R == RegNo)
4768       return isGFX9Plus();
4769   }
4770 
4771   // GFX10 has 2 more SGPRs 104 and 105.
4772   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4773        R.isValid(); ++R) {
4774     if (*R == RegNo)
4775       return hasSGPR104_SGPR105();
4776   }
4777 
4778   switch (RegNo) {
4779   case AMDGPU::SRC_SHARED_BASE:
4780   case AMDGPU::SRC_SHARED_LIMIT:
4781   case AMDGPU::SRC_PRIVATE_BASE:
4782   case AMDGPU::SRC_PRIVATE_LIMIT:
4783   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4784     return !isCI() && !isSI() && !isVI();
4785   case AMDGPU::TBA:
4786   case AMDGPU::TBA_LO:
4787   case AMDGPU::TBA_HI:
4788   case AMDGPU::TMA:
4789   case AMDGPU::TMA_LO:
4790   case AMDGPU::TMA_HI:
4791     return !isGFX9() && !isGFX10();
4792   case AMDGPU::XNACK_MASK:
4793   case AMDGPU::XNACK_MASK_LO:
4794   case AMDGPU::XNACK_MASK_HI:
4795     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4796   case AMDGPU::SGPR_NULL:
4797     return isGFX10();
4798   default:
4799     break;
4800   }
4801 
4802   if (isCI())
4803     return true;
4804 
4805   if (isSI() || isGFX10()) {
4806     // No flat_scr on SI.
4807     // On GFX10 flat scratch is not a valid register operand and can only be
4808     // accessed with s_setreg/s_getreg.
4809     switch (RegNo) {
4810     case AMDGPU::FLAT_SCR:
4811     case AMDGPU::FLAT_SCR_LO:
4812     case AMDGPU::FLAT_SCR_HI:
4813       return false;
4814     default:
4815       return true;
4816     }
4817   }
4818 
4819   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4820   // SI/CI have.
4821   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4822        R.isValid(); ++R) {
4823     if (*R == RegNo)
4824       return hasSGPR102_SGPR103();
4825   }
4826 
4827   return true;
4828 }
4829 
4830 OperandMatchResultTy
4831 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4832                               OperandMode Mode) {
4833   // Try to parse with a custom parser
4834   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4835 
4836   // If we successfully parsed the operand or if there as an error parsing,
4837   // we are done.
4838   //
4839   // If we are parsing after we reach EndOfStatement then this means we
4840   // are appending default values to the Operands list.  This is only done
4841   // by custom parser, so we shouldn't continue on to the generic parsing.
4842   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4843       getLexer().is(AsmToken::EndOfStatement))
4844     return ResTy;
4845 
4846   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4847     unsigned Prefix = Operands.size();
4848     SMLoc LBraceLoc = getTok().getLoc();
4849     Parser.Lex(); // eat the '['
4850 
4851     for (;;) {
4852       ResTy = parseReg(Operands);
4853       if (ResTy != MatchOperand_Success)
4854         return ResTy;
4855 
4856       if (getLexer().is(AsmToken::RBrac))
4857         break;
4858 
4859       if (getLexer().isNot(AsmToken::Comma))
4860         return MatchOperand_ParseFail;
4861       Parser.Lex();
4862     }
4863 
4864     if (Operands.size() - Prefix > 1) {
4865       Operands.insert(Operands.begin() + Prefix,
4866                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4867       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4868                                                     getTok().getLoc()));
4869     }
4870 
4871     Parser.Lex(); // eat the ']'
4872     return MatchOperand_Success;
4873   }
4874 
4875   return parseRegOrImm(Operands);
4876 }
4877 
4878 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4879   // Clear any forced encodings from the previous instruction.
4880   setForcedEncodingSize(0);
4881   setForcedDPP(false);
4882   setForcedSDWA(false);
4883 
4884   if (Name.endswith("_e64")) {
4885     setForcedEncodingSize(64);
4886     return Name.substr(0, Name.size() - 4);
4887   } else if (Name.endswith("_e32")) {
4888     setForcedEncodingSize(32);
4889     return Name.substr(0, Name.size() - 4);
4890   } else if (Name.endswith("_dpp")) {
4891     setForcedDPP(true);
4892     return Name.substr(0, Name.size() - 4);
4893   } else if (Name.endswith("_sdwa")) {
4894     setForcedSDWA(true);
4895     return Name.substr(0, Name.size() - 5);
4896   }
4897   return Name;
4898 }
4899 
4900 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4901                                        StringRef Name,
4902                                        SMLoc NameLoc, OperandVector &Operands) {
4903   // Add the instruction mnemonic
4904   Name = parseMnemonicSuffix(Name);
4905   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4906 
4907   bool IsMIMG = Name.startswith("image_");
4908 
4909   while (!getLexer().is(AsmToken::EndOfStatement)) {
4910     OperandMode Mode = OperandMode_Default;
4911     if (IsMIMG && isGFX10() && Operands.size() == 2)
4912       Mode = OperandMode_NSA;
4913     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4914 
4915     // Eat the comma or space if there is one.
4916     if (getLexer().is(AsmToken::Comma))
4917       Parser.Lex();
4918 
4919     if (Res != MatchOperand_Success) {
4920       checkUnsupportedInstruction(Name, NameLoc);
4921       if (!Parser.hasPendingError()) {
4922         // FIXME: use real operand location rather than the current location.
4923         StringRef Msg =
4924           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
4925                                             "not a valid operand.";
4926         Error(getLexer().getLoc(), Msg);
4927       }
4928       while (!getLexer().is(AsmToken::EndOfStatement)) {
4929         Parser.Lex();
4930       }
4931       return true;
4932     }
4933   }
4934 
4935   return false;
4936 }
4937 
4938 //===----------------------------------------------------------------------===//
4939 // Utility functions
4940 //===----------------------------------------------------------------------===//
4941 
4942 OperandMatchResultTy
4943 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4944 
4945   if (!trySkipId(Prefix, AsmToken::Colon))
4946     return MatchOperand_NoMatch;
4947 
4948   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4949 }
4950 
4951 OperandMatchResultTy
4952 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4953                                     AMDGPUOperand::ImmTy ImmTy,
4954                                     bool (*ConvertResult)(int64_t&)) {
4955   SMLoc S = getLoc();
4956   int64_t Value = 0;
4957 
4958   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4959   if (Res != MatchOperand_Success)
4960     return Res;
4961 
4962   if (ConvertResult && !ConvertResult(Value)) {
4963     Error(S, "invalid " + StringRef(Prefix) + " value.");
4964   }
4965 
4966   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4967   return MatchOperand_Success;
4968 }
4969 
4970 OperandMatchResultTy
4971 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4972                                              OperandVector &Operands,
4973                                              AMDGPUOperand::ImmTy ImmTy,
4974                                              bool (*ConvertResult)(int64_t&)) {
4975   SMLoc S = getLoc();
4976   if (!trySkipId(Prefix, AsmToken::Colon))
4977     return MatchOperand_NoMatch;
4978 
4979   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4980     return MatchOperand_ParseFail;
4981 
4982   unsigned Val = 0;
4983   const unsigned MaxSize = 4;
4984 
4985   // FIXME: How to verify the number of elements matches the number of src
4986   // operands?
4987   for (int I = 0; ; ++I) {
4988     int64_t Op;
4989     SMLoc Loc = getLoc();
4990     if (!parseExpr(Op))
4991       return MatchOperand_ParseFail;
4992 
4993     if (Op != 0 && Op != 1) {
4994       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4995       return MatchOperand_ParseFail;
4996     }
4997 
4998     Val |= (Op << I);
4999 
5000     if (trySkipToken(AsmToken::RBrac))
5001       break;
5002 
5003     if (I + 1 == MaxSize) {
5004       Error(getLoc(), "expected a closing square bracket");
5005       return MatchOperand_ParseFail;
5006     }
5007 
5008     if (!skipToken(AsmToken::Comma, "expected a comma"))
5009       return MatchOperand_ParseFail;
5010   }
5011 
5012   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5013   return MatchOperand_Success;
5014 }
5015 
5016 OperandMatchResultTy
5017 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
5018                                AMDGPUOperand::ImmTy ImmTy) {
5019   int64_t Bit = 0;
5020   SMLoc S = Parser.getTok().getLoc();
5021 
5022   // We are at the end of the statement, and this is a default argument, so
5023   // use a default value.
5024   if (getLexer().isNot(AsmToken::EndOfStatement)) {
5025     switch(getLexer().getKind()) {
5026       case AsmToken::Identifier: {
5027         StringRef Tok = Parser.getTok().getString();
5028         if (Tok == Name) {
5029           if (Tok == "r128" && !hasMIMG_R128())
5030             Error(S, "r128 modifier is not supported on this GPU");
5031           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
5032             Error(S, "a16 modifier is not supported on this GPU");
5033           Bit = 1;
5034           Parser.Lex();
5035         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
5036           Bit = 0;
5037           Parser.Lex();
5038         } else {
5039           return MatchOperand_NoMatch;
5040         }
5041         break;
5042       }
5043       default:
5044         return MatchOperand_NoMatch;
5045     }
5046   }
5047 
5048   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
5049     return MatchOperand_ParseFail;
5050 
5051   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5052     ImmTy = AMDGPUOperand::ImmTyR128A16;
5053 
5054   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5055   return MatchOperand_Success;
5056 }
5057 
5058 static void addOptionalImmOperand(
5059   MCInst& Inst, const OperandVector& Operands,
5060   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5061   AMDGPUOperand::ImmTy ImmT,
5062   int64_t Default = 0) {
5063   auto i = OptionalIdx.find(ImmT);
5064   if (i != OptionalIdx.end()) {
5065     unsigned Idx = i->second;
5066     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5067   } else {
5068     Inst.addOperand(MCOperand::createImm(Default));
5069   }
5070 }
5071 
5072 OperandMatchResultTy
5073 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
5074   if (getLexer().isNot(AsmToken::Identifier)) {
5075     return MatchOperand_NoMatch;
5076   }
5077   StringRef Tok = Parser.getTok().getString();
5078   if (Tok != Prefix) {
5079     return MatchOperand_NoMatch;
5080   }
5081 
5082   Parser.Lex();
5083   if (getLexer().isNot(AsmToken::Colon)) {
5084     return MatchOperand_ParseFail;
5085   }
5086 
5087   Parser.Lex();
5088   if (getLexer().isNot(AsmToken::Identifier)) {
5089     return MatchOperand_ParseFail;
5090   }
5091 
5092   Value = Parser.getTok().getString();
5093   return MatchOperand_Success;
5094 }
5095 
5096 //===----------------------------------------------------------------------===//
5097 // MTBUF format
5098 //===----------------------------------------------------------------------===//
5099 
5100 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5101                                   int64_t MaxVal,
5102                                   int64_t &Fmt) {
5103   int64_t Val;
5104   SMLoc Loc = getLoc();
5105 
5106   auto Res = parseIntWithPrefix(Pref, Val);
5107   if (Res == MatchOperand_ParseFail)
5108     return false;
5109   if (Res == MatchOperand_NoMatch)
5110     return true;
5111 
5112   if (Val < 0 || Val > MaxVal) {
5113     Error(Loc, Twine("out of range ", StringRef(Pref)));
5114     return false;
5115   }
5116 
5117   Fmt = Val;
5118   return true;
5119 }
5120 
5121 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5122 // values to live in a joint format operand in the MCInst encoding.
5123 OperandMatchResultTy
5124 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5125   using namespace llvm::AMDGPU::MTBUFFormat;
5126 
5127   int64_t Dfmt = DFMT_UNDEF;
5128   int64_t Nfmt = NFMT_UNDEF;
5129 
5130   // dfmt and nfmt can appear in either order, and each is optional.
5131   for (int I = 0; I < 2; ++I) {
5132     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5133       return MatchOperand_ParseFail;
5134 
5135     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5136       return MatchOperand_ParseFail;
5137     }
5138     // Skip optional comma between dfmt/nfmt
5139     // but guard against 2 commas following each other.
5140     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5141         !peekToken().is(AsmToken::Comma)) {
5142       trySkipToken(AsmToken::Comma);
5143     }
5144   }
5145 
5146   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5147     return MatchOperand_NoMatch;
5148 
5149   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5150   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5151 
5152   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5153   return MatchOperand_Success;
5154 }
5155 
5156 OperandMatchResultTy
5157 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5158   using namespace llvm::AMDGPU::MTBUFFormat;
5159 
5160   int64_t Fmt = UFMT_UNDEF;
5161 
5162   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5163     return MatchOperand_ParseFail;
5164 
5165   if (Fmt == UFMT_UNDEF)
5166     return MatchOperand_NoMatch;
5167 
5168   Format = Fmt;
5169   return MatchOperand_Success;
5170 }
5171 
5172 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5173                                     int64_t &Nfmt,
5174                                     StringRef FormatStr,
5175                                     SMLoc Loc) {
5176   using namespace llvm::AMDGPU::MTBUFFormat;
5177   int64_t Format;
5178 
5179   Format = getDfmt(FormatStr);
5180   if (Format != DFMT_UNDEF) {
5181     Dfmt = Format;
5182     return true;
5183   }
5184 
5185   Format = getNfmt(FormatStr, getSTI());
5186   if (Format != NFMT_UNDEF) {
5187     Nfmt = Format;
5188     return true;
5189   }
5190 
5191   Error(Loc, "unsupported format");
5192   return false;
5193 }
5194 
5195 OperandMatchResultTy
5196 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5197                                           SMLoc FormatLoc,
5198                                           int64_t &Format) {
5199   using namespace llvm::AMDGPU::MTBUFFormat;
5200 
5201   int64_t Dfmt = DFMT_UNDEF;
5202   int64_t Nfmt = NFMT_UNDEF;
5203   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5204     return MatchOperand_ParseFail;
5205 
5206   if (trySkipToken(AsmToken::Comma)) {
5207     StringRef Str;
5208     SMLoc Loc = getLoc();
5209     if (!parseId(Str, "expected a format string") ||
5210         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5211       return MatchOperand_ParseFail;
5212     }
5213     if (Dfmt == DFMT_UNDEF) {
5214       Error(Loc, "duplicate numeric format");
5215       return MatchOperand_ParseFail;
5216     } else if (Nfmt == NFMT_UNDEF) {
5217       Error(Loc, "duplicate data format");
5218       return MatchOperand_ParseFail;
5219     }
5220   }
5221 
5222   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5223   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5224 
5225   if (isGFX10()) {
5226     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5227     if (Ufmt == UFMT_UNDEF) {
5228       Error(FormatLoc, "unsupported format");
5229       return MatchOperand_ParseFail;
5230     }
5231     Format = Ufmt;
5232   } else {
5233     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5234   }
5235 
5236   return MatchOperand_Success;
5237 }
5238 
5239 OperandMatchResultTy
5240 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5241                                             SMLoc Loc,
5242                                             int64_t &Format) {
5243   using namespace llvm::AMDGPU::MTBUFFormat;
5244 
5245   auto Id = getUnifiedFormat(FormatStr);
5246   if (Id == UFMT_UNDEF)
5247     return MatchOperand_NoMatch;
5248 
5249   if (!isGFX10()) {
5250     Error(Loc, "unified format is not supported on this GPU");
5251     return MatchOperand_ParseFail;
5252   }
5253 
5254   Format = Id;
5255   return MatchOperand_Success;
5256 }
5257 
5258 OperandMatchResultTy
5259 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5260   using namespace llvm::AMDGPU::MTBUFFormat;
5261   SMLoc Loc = getLoc();
5262 
5263   if (!parseExpr(Format))
5264     return MatchOperand_ParseFail;
5265   if (!isValidFormatEncoding(Format, getSTI())) {
5266     Error(Loc, "out of range format");
5267     return MatchOperand_ParseFail;
5268   }
5269 
5270   return MatchOperand_Success;
5271 }
5272 
5273 OperandMatchResultTy
5274 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5275   using namespace llvm::AMDGPU::MTBUFFormat;
5276 
5277   if (!trySkipId("format", AsmToken::Colon))
5278     return MatchOperand_NoMatch;
5279 
5280   if (trySkipToken(AsmToken::LBrac)) {
5281     StringRef FormatStr;
5282     SMLoc Loc = getLoc();
5283     if (!parseId(FormatStr, "expected a format string"))
5284       return MatchOperand_ParseFail;
5285 
5286     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5287     if (Res == MatchOperand_NoMatch)
5288       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5289     if (Res != MatchOperand_Success)
5290       return Res;
5291 
5292     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5293       return MatchOperand_ParseFail;
5294 
5295     return MatchOperand_Success;
5296   }
5297 
5298   return parseNumericFormat(Format);
5299 }
5300 
5301 OperandMatchResultTy
5302 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5303   using namespace llvm::AMDGPU::MTBUFFormat;
5304 
5305   int64_t Format = getDefaultFormatEncoding(getSTI());
5306   OperandMatchResultTy Res;
5307   SMLoc Loc = getLoc();
5308 
5309   // Parse legacy format syntax.
5310   Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5311   if (Res == MatchOperand_ParseFail)
5312     return Res;
5313 
5314   bool FormatFound = (Res == MatchOperand_Success);
5315 
5316   Operands.push_back(
5317     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5318 
5319   if (FormatFound)
5320     trySkipToken(AsmToken::Comma);
5321 
5322   if (isToken(AsmToken::EndOfStatement)) {
5323     // We are expecting an soffset operand,
5324     // but let matcher handle the error.
5325     return MatchOperand_Success;
5326   }
5327 
5328   // Parse soffset.
5329   Res = parseRegOrImm(Operands);
5330   if (Res != MatchOperand_Success)
5331     return Res;
5332 
5333   trySkipToken(AsmToken::Comma);
5334 
5335   if (!FormatFound) {
5336     Res = parseSymbolicOrNumericFormat(Format);
5337     if (Res == MatchOperand_ParseFail)
5338       return Res;
5339     if (Res == MatchOperand_Success) {
5340       auto Size = Operands.size();
5341       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5342       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5343       Op.setImm(Format);
5344     }
5345     return MatchOperand_Success;
5346   }
5347 
5348   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5349     Error(getLoc(), "duplicate format");
5350     return MatchOperand_ParseFail;
5351   }
5352   return MatchOperand_Success;
5353 }
5354 
5355 //===----------------------------------------------------------------------===//
5356 // ds
5357 //===----------------------------------------------------------------------===//
5358 
5359 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5360                                     const OperandVector &Operands) {
5361   OptionalImmIndexMap OptionalIdx;
5362 
5363   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5364     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5365 
5366     // Add the register arguments
5367     if (Op.isReg()) {
5368       Op.addRegOperands(Inst, 1);
5369       continue;
5370     }
5371 
5372     // Handle optional arguments
5373     OptionalIdx[Op.getImmTy()] = i;
5374   }
5375 
5376   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5377   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5378   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5379 
5380   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5381 }
5382 
5383 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5384                                 bool IsGdsHardcoded) {
5385   OptionalImmIndexMap OptionalIdx;
5386 
5387   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5388     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5389 
5390     // Add the register arguments
5391     if (Op.isReg()) {
5392       Op.addRegOperands(Inst, 1);
5393       continue;
5394     }
5395 
5396     if (Op.isToken() && Op.getToken() == "gds") {
5397       IsGdsHardcoded = true;
5398       continue;
5399     }
5400 
5401     // Handle optional arguments
5402     OptionalIdx[Op.getImmTy()] = i;
5403   }
5404 
5405   AMDGPUOperand::ImmTy OffsetType =
5406     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5407      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5408      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5409                                                       AMDGPUOperand::ImmTyOffset;
5410 
5411   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5412 
5413   if (!IsGdsHardcoded) {
5414     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5415   }
5416   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5417 }
5418 
5419 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5420   OptionalImmIndexMap OptionalIdx;
5421 
5422   unsigned OperandIdx[4];
5423   unsigned EnMask = 0;
5424   int SrcIdx = 0;
5425 
5426   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5427     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5428 
5429     // Add the register arguments
5430     if (Op.isReg()) {
5431       assert(SrcIdx < 4);
5432       OperandIdx[SrcIdx] = Inst.size();
5433       Op.addRegOperands(Inst, 1);
5434       ++SrcIdx;
5435       continue;
5436     }
5437 
5438     if (Op.isOff()) {
5439       assert(SrcIdx < 4);
5440       OperandIdx[SrcIdx] = Inst.size();
5441       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5442       ++SrcIdx;
5443       continue;
5444     }
5445 
5446     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5447       Op.addImmOperands(Inst, 1);
5448       continue;
5449     }
5450 
5451     if (Op.isToken() && Op.getToken() == "done")
5452       continue;
5453 
5454     // Handle optional arguments
5455     OptionalIdx[Op.getImmTy()] = i;
5456   }
5457 
5458   assert(SrcIdx == 4);
5459 
5460   bool Compr = false;
5461   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5462     Compr = true;
5463     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5464     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5465     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5466   }
5467 
5468   for (auto i = 0; i < SrcIdx; ++i) {
5469     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5470       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5471     }
5472   }
5473 
5474   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5475   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5476 
5477   Inst.addOperand(MCOperand::createImm(EnMask));
5478 }
5479 
5480 //===----------------------------------------------------------------------===//
5481 // s_waitcnt
5482 //===----------------------------------------------------------------------===//
5483 
5484 static bool
5485 encodeCnt(
5486   const AMDGPU::IsaVersion ISA,
5487   int64_t &IntVal,
5488   int64_t CntVal,
5489   bool Saturate,
5490   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5491   unsigned (*decode)(const IsaVersion &Version, unsigned))
5492 {
5493   bool Failed = false;
5494 
5495   IntVal = encode(ISA, IntVal, CntVal);
5496   if (CntVal != decode(ISA, IntVal)) {
5497     if (Saturate) {
5498       IntVal = encode(ISA, IntVal, -1);
5499     } else {
5500       Failed = true;
5501     }
5502   }
5503   return Failed;
5504 }
5505 
5506 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5507 
5508   SMLoc CntLoc = getLoc();
5509   StringRef CntName = getTokenStr();
5510 
5511   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5512       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5513     return false;
5514 
5515   int64_t CntVal;
5516   SMLoc ValLoc = getLoc();
5517   if (!parseExpr(CntVal))
5518     return false;
5519 
5520   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5521 
5522   bool Failed = true;
5523   bool Sat = CntName.endswith("_sat");
5524 
5525   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5526     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5527   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5528     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5529   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5530     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5531   } else {
5532     Error(CntLoc, "invalid counter name " + CntName);
5533     return false;
5534   }
5535 
5536   if (Failed) {
5537     Error(ValLoc, "too large value for " + CntName);
5538     return false;
5539   }
5540 
5541   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5542     return false;
5543 
5544   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5545     if (isToken(AsmToken::EndOfStatement)) {
5546       Error(getLoc(), "expected a counter name");
5547       return false;
5548     }
5549   }
5550 
5551   return true;
5552 }
5553 
5554 OperandMatchResultTy
5555 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5556   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5557   int64_t Waitcnt = getWaitcntBitMask(ISA);
5558   SMLoc S = getLoc();
5559 
5560   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5561     while (!isToken(AsmToken::EndOfStatement)) {
5562       if (!parseCnt(Waitcnt))
5563         return MatchOperand_ParseFail;
5564     }
5565   } else {
5566     if (!parseExpr(Waitcnt))
5567       return MatchOperand_ParseFail;
5568   }
5569 
5570   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5571   return MatchOperand_Success;
5572 }
5573 
5574 bool
5575 AMDGPUOperand::isSWaitCnt() const {
5576   return isImm();
5577 }
5578 
5579 //===----------------------------------------------------------------------===//
5580 // hwreg
5581 //===----------------------------------------------------------------------===//
5582 
5583 bool
5584 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5585                                 int64_t &Offset,
5586                                 int64_t &Width) {
5587   using namespace llvm::AMDGPU::Hwreg;
5588 
5589   // The register may be specified by name or using a numeric code
5590   if (isToken(AsmToken::Identifier) &&
5591       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5592     HwReg.IsSymbolic = true;
5593     lex(); // skip message name
5594   } else if (!parseExpr(HwReg.Id)) {
5595     return false;
5596   }
5597 
5598   if (trySkipToken(AsmToken::RParen))
5599     return true;
5600 
5601   // parse optional params
5602   return
5603     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
5604     parseExpr(Offset) &&
5605     skipToken(AsmToken::Comma, "expected a comma") &&
5606     parseExpr(Width) &&
5607     skipToken(AsmToken::RParen, "expected a closing parenthesis");
5608 }
5609 
5610 bool
5611 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5612                                const int64_t Offset,
5613                                const int64_t Width,
5614                                const SMLoc Loc) {
5615 
5616   using namespace llvm::AMDGPU::Hwreg;
5617 
5618   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5619     Error(Loc, "specified hardware register is not supported on this GPU");
5620     return false;
5621   } else if (!isValidHwreg(HwReg.Id)) {
5622     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
5623     return false;
5624   } else if (!isValidHwregOffset(Offset)) {
5625     Error(Loc, "invalid bit offset: only 5-bit values are legal");
5626     return false;
5627   } else if (!isValidHwregWidth(Width)) {
5628     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
5629     return false;
5630   }
5631   return true;
5632 }
5633 
5634 OperandMatchResultTy
5635 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5636   using namespace llvm::AMDGPU::Hwreg;
5637 
5638   int64_t ImmVal = 0;
5639   SMLoc Loc = getLoc();
5640 
5641   if (trySkipId("hwreg", AsmToken::LParen)) {
5642     OperandInfoTy HwReg(ID_UNKNOWN_);
5643     int64_t Offset = OFFSET_DEFAULT_;
5644     int64_t Width = WIDTH_DEFAULT_;
5645     if (parseHwregBody(HwReg, Offset, Width) &&
5646         validateHwreg(HwReg, Offset, Width, Loc)) {
5647       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5648     } else {
5649       return MatchOperand_ParseFail;
5650     }
5651   } else if (parseExpr(ImmVal)) {
5652     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5653       Error(Loc, "invalid immediate: only 16-bit values are legal");
5654       return MatchOperand_ParseFail;
5655     }
5656   } else {
5657     return MatchOperand_ParseFail;
5658   }
5659 
5660   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5661   return MatchOperand_Success;
5662 }
5663 
5664 bool AMDGPUOperand::isHwreg() const {
5665   return isImmTy(ImmTyHwreg);
5666 }
5667 
5668 //===----------------------------------------------------------------------===//
5669 // sendmsg
5670 //===----------------------------------------------------------------------===//
5671 
5672 bool
5673 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5674                                   OperandInfoTy &Op,
5675                                   OperandInfoTy &Stream) {
5676   using namespace llvm::AMDGPU::SendMsg;
5677 
5678   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5679     Msg.IsSymbolic = true;
5680     lex(); // skip message name
5681   } else if (!parseExpr(Msg.Id)) {
5682     return false;
5683   }
5684 
5685   if (trySkipToken(AsmToken::Comma)) {
5686     Op.IsDefined = true;
5687     if (isToken(AsmToken::Identifier) &&
5688         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5689       lex(); // skip operation name
5690     } else if (!parseExpr(Op.Id)) {
5691       return false;
5692     }
5693 
5694     if (trySkipToken(AsmToken::Comma)) {
5695       Stream.IsDefined = true;
5696       if (!parseExpr(Stream.Id))
5697         return false;
5698     }
5699   }
5700 
5701   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5702 }
5703 
5704 bool
5705 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5706                                  const OperandInfoTy &Op,
5707                                  const OperandInfoTy &Stream,
5708                                  const SMLoc S) {
5709   using namespace llvm::AMDGPU::SendMsg;
5710 
5711   // Validation strictness depends on whether message is specified
5712   // in a symbolc or in a numeric form. In the latter case
5713   // only encoding possibility is checked.
5714   bool Strict = Msg.IsSymbolic;
5715 
5716   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5717     Error(S, "invalid message id");
5718     return false;
5719   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5720     Error(S, Op.IsDefined ?
5721              "message does not support operations" :
5722              "missing message operation");
5723     return false;
5724   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5725     Error(S, "invalid operation id");
5726     return false;
5727   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5728     Error(S, "message operation does not support streams");
5729     return false;
5730   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5731     Error(S, "invalid message stream id");
5732     return false;
5733   }
5734   return true;
5735 }
5736 
5737 OperandMatchResultTy
5738 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5739   using namespace llvm::AMDGPU::SendMsg;
5740 
5741   int64_t ImmVal = 0;
5742   SMLoc Loc = getLoc();
5743 
5744   if (trySkipId("sendmsg", AsmToken::LParen)) {
5745     OperandInfoTy Msg(ID_UNKNOWN_);
5746     OperandInfoTy Op(OP_NONE_);
5747     OperandInfoTy Stream(STREAM_ID_NONE_);
5748     if (parseSendMsgBody(Msg, Op, Stream) &&
5749         validateSendMsg(Msg, Op, Stream, Loc)) {
5750       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5751     } else {
5752       return MatchOperand_ParseFail;
5753     }
5754   } else if (parseExpr(ImmVal)) {
5755     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5756       Error(Loc, "invalid immediate: only 16-bit values are legal");
5757       return MatchOperand_ParseFail;
5758     }
5759   } else {
5760     return MatchOperand_ParseFail;
5761   }
5762 
5763   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5764   return MatchOperand_Success;
5765 }
5766 
5767 bool AMDGPUOperand::isSendMsg() const {
5768   return isImmTy(ImmTySendMsg);
5769 }
5770 
5771 //===----------------------------------------------------------------------===//
5772 // v_interp
5773 //===----------------------------------------------------------------------===//
5774 
5775 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5776   if (getLexer().getKind() != AsmToken::Identifier)
5777     return MatchOperand_NoMatch;
5778 
5779   StringRef Str = Parser.getTok().getString();
5780   int Slot = StringSwitch<int>(Str)
5781     .Case("p10", 0)
5782     .Case("p20", 1)
5783     .Case("p0", 2)
5784     .Default(-1);
5785 
5786   SMLoc S = Parser.getTok().getLoc();
5787   if (Slot == -1)
5788     return MatchOperand_ParseFail;
5789 
5790   Parser.Lex();
5791   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5792                                               AMDGPUOperand::ImmTyInterpSlot));
5793   return MatchOperand_Success;
5794 }
5795 
5796 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5797   if (getLexer().getKind() != AsmToken::Identifier)
5798     return MatchOperand_NoMatch;
5799 
5800   StringRef Str = Parser.getTok().getString();
5801   if (!Str.startswith("attr"))
5802     return MatchOperand_NoMatch;
5803 
5804   StringRef Chan = Str.take_back(2);
5805   int AttrChan = StringSwitch<int>(Chan)
5806     .Case(".x", 0)
5807     .Case(".y", 1)
5808     .Case(".z", 2)
5809     .Case(".w", 3)
5810     .Default(-1);
5811   if (AttrChan == -1)
5812     return MatchOperand_ParseFail;
5813 
5814   Str = Str.drop_back(2).drop_front(4);
5815 
5816   uint8_t Attr;
5817   if (Str.getAsInteger(10, Attr))
5818     return MatchOperand_ParseFail;
5819 
5820   SMLoc S = Parser.getTok().getLoc();
5821   Parser.Lex();
5822   if (Attr > 63) {
5823     Error(S, "out of bounds attr");
5824     return MatchOperand_ParseFail;
5825   }
5826 
5827   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5828 
5829   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5830                                               AMDGPUOperand::ImmTyInterpAttr));
5831   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5832                                               AMDGPUOperand::ImmTyAttrChan));
5833   return MatchOperand_Success;
5834 }
5835 
5836 //===----------------------------------------------------------------------===//
5837 // exp
5838 //===----------------------------------------------------------------------===//
5839 
5840 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5841                                                       uint8_t &Val) {
5842   if (Str == "null") {
5843     Val = 9;
5844     return MatchOperand_Success;
5845   }
5846 
5847   if (Str.startswith("mrt")) {
5848     Str = Str.drop_front(3);
5849     if (Str == "z") { // == mrtz
5850       Val = 8;
5851       return MatchOperand_Success;
5852     }
5853 
5854     if (Str.getAsInteger(10, Val))
5855       return MatchOperand_ParseFail;
5856 
5857     if (Val > 7)
5858       return MatchOperand_ParseFail;
5859 
5860     return MatchOperand_Success;
5861   }
5862 
5863   if (Str.startswith("pos")) {
5864     Str = Str.drop_front(3);
5865     if (Str.getAsInteger(10, Val))
5866       return MatchOperand_ParseFail;
5867 
5868     if (Val > 4 || (Val == 4 && !isGFX10()))
5869       return MatchOperand_ParseFail;
5870 
5871     Val += 12;
5872     return MatchOperand_Success;
5873   }
5874 
5875   if (isGFX10() && Str == "prim") {
5876     Val = 20;
5877     return MatchOperand_Success;
5878   }
5879 
5880   if (Str.startswith("param")) {
5881     Str = Str.drop_front(5);
5882     if (Str.getAsInteger(10, Val))
5883       return MatchOperand_ParseFail;
5884 
5885     if (Val >= 32)
5886       return MatchOperand_ParseFail;
5887 
5888     Val += 32;
5889     return MatchOperand_Success;
5890   }
5891 
5892   return MatchOperand_ParseFail;
5893 }
5894 
5895 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5896   if (!isToken(AsmToken::Identifier))
5897     return MatchOperand_NoMatch;
5898 
5899   SMLoc S = getLoc();
5900 
5901   uint8_t Val;
5902   auto Res = parseExpTgtImpl(getTokenStr(), Val);
5903   if (Res != MatchOperand_Success) {
5904     Error(S, "invalid exp target");
5905     return Res;
5906   }
5907 
5908   Parser.Lex();
5909   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5910                                               AMDGPUOperand::ImmTyExpTgt));
5911   return MatchOperand_Success;
5912 }
5913 
5914 //===----------------------------------------------------------------------===//
5915 // parser helpers
5916 //===----------------------------------------------------------------------===//
5917 
5918 bool
5919 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5920   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5921 }
5922 
5923 bool
5924 AMDGPUAsmParser::isId(const StringRef Id) const {
5925   return isId(getToken(), Id);
5926 }
5927 
5928 bool
5929 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5930   return getTokenKind() == Kind;
5931 }
5932 
5933 bool
5934 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5935   if (isId(Id)) {
5936     lex();
5937     return true;
5938   }
5939   return false;
5940 }
5941 
5942 bool
5943 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5944   if (isId(Id) && peekToken().is(Kind)) {
5945     lex();
5946     lex();
5947     return true;
5948   }
5949   return false;
5950 }
5951 
5952 bool
5953 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5954   if (isToken(Kind)) {
5955     lex();
5956     return true;
5957   }
5958   return false;
5959 }
5960 
5961 bool
5962 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5963                            const StringRef ErrMsg) {
5964   if (!trySkipToken(Kind)) {
5965     Error(getLoc(), ErrMsg);
5966     return false;
5967   }
5968   return true;
5969 }
5970 
5971 bool
5972 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5973   return !getParser().parseAbsoluteExpression(Imm);
5974 }
5975 
5976 bool
5977 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5978   SMLoc S = getLoc();
5979 
5980   const MCExpr *Expr;
5981   if (Parser.parseExpression(Expr))
5982     return false;
5983 
5984   int64_t IntVal;
5985   if (Expr->evaluateAsAbsolute(IntVal)) {
5986     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5987   } else {
5988     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5989   }
5990   return true;
5991 }
5992 
5993 bool
5994 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5995   if (isToken(AsmToken::String)) {
5996     Val = getToken().getStringContents();
5997     lex();
5998     return true;
5999   } else {
6000     Error(getLoc(), ErrMsg);
6001     return false;
6002   }
6003 }
6004 
6005 bool
6006 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6007   if (isToken(AsmToken::Identifier)) {
6008     Val = getTokenStr();
6009     lex();
6010     return true;
6011   } else {
6012     Error(getLoc(), ErrMsg);
6013     return false;
6014   }
6015 }
6016 
6017 AsmToken
6018 AMDGPUAsmParser::getToken() const {
6019   return Parser.getTok();
6020 }
6021 
6022 AsmToken
6023 AMDGPUAsmParser::peekToken() {
6024   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6025 }
6026 
6027 void
6028 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6029   auto TokCount = getLexer().peekTokens(Tokens);
6030 
6031   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6032     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6033 }
6034 
6035 AsmToken::TokenKind
6036 AMDGPUAsmParser::getTokenKind() const {
6037   return getLexer().getKind();
6038 }
6039 
6040 SMLoc
6041 AMDGPUAsmParser::getLoc() const {
6042   return getToken().getLoc();
6043 }
6044 
6045 StringRef
6046 AMDGPUAsmParser::getTokenStr() const {
6047   return getToken().getString();
6048 }
6049 
6050 void
6051 AMDGPUAsmParser::lex() {
6052   Parser.Lex();
6053 }
6054 
6055 //===----------------------------------------------------------------------===//
6056 // swizzle
6057 //===----------------------------------------------------------------------===//
6058 
6059 LLVM_READNONE
6060 static unsigned
6061 encodeBitmaskPerm(const unsigned AndMask,
6062                   const unsigned OrMask,
6063                   const unsigned XorMask) {
6064   using namespace llvm::AMDGPU::Swizzle;
6065 
6066   return BITMASK_PERM_ENC |
6067          (AndMask << BITMASK_AND_SHIFT) |
6068          (OrMask  << BITMASK_OR_SHIFT)  |
6069          (XorMask << BITMASK_XOR_SHIFT);
6070 }
6071 
6072 bool
6073 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6074                                       const unsigned MinVal,
6075                                       const unsigned MaxVal,
6076                                       const StringRef ErrMsg) {
6077   for (unsigned i = 0; i < OpNum; ++i) {
6078     if (!skipToken(AsmToken::Comma, "expected a comma")){
6079       return false;
6080     }
6081     SMLoc ExprLoc = Parser.getTok().getLoc();
6082     if (!parseExpr(Op[i])) {
6083       return false;
6084     }
6085     if (Op[i] < MinVal || Op[i] > MaxVal) {
6086       Error(ExprLoc, ErrMsg);
6087       return false;
6088     }
6089   }
6090 
6091   return true;
6092 }
6093 
6094 bool
6095 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6096   using namespace llvm::AMDGPU::Swizzle;
6097 
6098   int64_t Lane[LANE_NUM];
6099   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6100                            "expected a 2-bit lane id")) {
6101     Imm = QUAD_PERM_ENC;
6102     for (unsigned I = 0; I < LANE_NUM; ++I) {
6103       Imm |= Lane[I] << (LANE_SHIFT * I);
6104     }
6105     return true;
6106   }
6107   return false;
6108 }
6109 
6110 bool
6111 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6112   using namespace llvm::AMDGPU::Swizzle;
6113 
6114   SMLoc S = Parser.getTok().getLoc();
6115   int64_t GroupSize;
6116   int64_t LaneIdx;
6117 
6118   if (!parseSwizzleOperands(1, &GroupSize,
6119                             2, 32,
6120                             "group size must be in the interval [2,32]")) {
6121     return false;
6122   }
6123   if (!isPowerOf2_64(GroupSize)) {
6124     Error(S, "group size must be a power of two");
6125     return false;
6126   }
6127   if (parseSwizzleOperands(1, &LaneIdx,
6128                            0, GroupSize - 1,
6129                            "lane id must be in the interval [0,group size - 1]")) {
6130     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6131     return true;
6132   }
6133   return false;
6134 }
6135 
6136 bool
6137 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6138   using namespace llvm::AMDGPU::Swizzle;
6139 
6140   SMLoc S = Parser.getTok().getLoc();
6141   int64_t GroupSize;
6142 
6143   if (!parseSwizzleOperands(1, &GroupSize,
6144       2, 32, "group size must be in the interval [2,32]")) {
6145     return false;
6146   }
6147   if (!isPowerOf2_64(GroupSize)) {
6148     Error(S, "group size must be a power of two");
6149     return false;
6150   }
6151 
6152   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6153   return true;
6154 }
6155 
6156 bool
6157 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6158   using namespace llvm::AMDGPU::Swizzle;
6159 
6160   SMLoc S = Parser.getTok().getLoc();
6161   int64_t GroupSize;
6162 
6163   if (!parseSwizzleOperands(1, &GroupSize,
6164       1, 16, "group size must be in the interval [1,16]")) {
6165     return false;
6166   }
6167   if (!isPowerOf2_64(GroupSize)) {
6168     Error(S, "group size must be a power of two");
6169     return false;
6170   }
6171 
6172   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6173   return true;
6174 }
6175 
6176 bool
6177 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6178   using namespace llvm::AMDGPU::Swizzle;
6179 
6180   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6181     return false;
6182   }
6183 
6184   StringRef Ctl;
6185   SMLoc StrLoc = Parser.getTok().getLoc();
6186   if (!parseString(Ctl)) {
6187     return false;
6188   }
6189   if (Ctl.size() != BITMASK_WIDTH) {
6190     Error(StrLoc, "expected a 5-character mask");
6191     return false;
6192   }
6193 
6194   unsigned AndMask = 0;
6195   unsigned OrMask = 0;
6196   unsigned XorMask = 0;
6197 
6198   for (size_t i = 0; i < Ctl.size(); ++i) {
6199     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6200     switch(Ctl[i]) {
6201     default:
6202       Error(StrLoc, "invalid mask");
6203       return false;
6204     case '0':
6205       break;
6206     case '1':
6207       OrMask |= Mask;
6208       break;
6209     case 'p':
6210       AndMask |= Mask;
6211       break;
6212     case 'i':
6213       AndMask |= Mask;
6214       XorMask |= Mask;
6215       break;
6216     }
6217   }
6218 
6219   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6220   return true;
6221 }
6222 
6223 bool
6224 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6225 
6226   SMLoc OffsetLoc = Parser.getTok().getLoc();
6227 
6228   if (!parseExpr(Imm)) {
6229     return false;
6230   }
6231   if (!isUInt<16>(Imm)) {
6232     Error(OffsetLoc, "expected a 16-bit offset");
6233     return false;
6234   }
6235   return true;
6236 }
6237 
6238 bool
6239 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6240   using namespace llvm::AMDGPU::Swizzle;
6241 
6242   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6243 
6244     SMLoc ModeLoc = Parser.getTok().getLoc();
6245     bool Ok = false;
6246 
6247     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6248       Ok = parseSwizzleQuadPerm(Imm);
6249     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6250       Ok = parseSwizzleBitmaskPerm(Imm);
6251     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6252       Ok = parseSwizzleBroadcast(Imm);
6253     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6254       Ok = parseSwizzleSwap(Imm);
6255     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6256       Ok = parseSwizzleReverse(Imm);
6257     } else {
6258       Error(ModeLoc, "expected a swizzle mode");
6259     }
6260 
6261     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6262   }
6263 
6264   return false;
6265 }
6266 
6267 OperandMatchResultTy
6268 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6269   SMLoc S = Parser.getTok().getLoc();
6270   int64_t Imm = 0;
6271 
6272   if (trySkipId("offset")) {
6273 
6274     bool Ok = false;
6275     if (skipToken(AsmToken::Colon, "expected a colon")) {
6276       if (trySkipId("swizzle")) {
6277         Ok = parseSwizzleMacro(Imm);
6278       } else {
6279         Ok = parseSwizzleOffset(Imm);
6280       }
6281     }
6282 
6283     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6284 
6285     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6286   } else {
6287     // Swizzle "offset" operand is optional.
6288     // If it is omitted, try parsing other optional operands.
6289     return parseOptionalOpr(Operands);
6290   }
6291 }
6292 
6293 bool
6294 AMDGPUOperand::isSwizzle() const {
6295   return isImmTy(ImmTySwizzle);
6296 }
6297 
6298 //===----------------------------------------------------------------------===//
6299 // VGPR Index Mode
6300 //===----------------------------------------------------------------------===//
6301 
6302 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6303 
6304   using namespace llvm::AMDGPU::VGPRIndexMode;
6305 
6306   if (trySkipToken(AsmToken::RParen)) {
6307     return OFF;
6308   }
6309 
6310   int64_t Imm = 0;
6311 
6312   while (true) {
6313     unsigned Mode = 0;
6314     SMLoc S = Parser.getTok().getLoc();
6315 
6316     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6317       if (trySkipId(IdSymbolic[ModeId])) {
6318         Mode = 1 << ModeId;
6319         break;
6320       }
6321     }
6322 
6323     if (Mode == 0) {
6324       Error(S, (Imm == 0)?
6325                "expected a VGPR index mode or a closing parenthesis" :
6326                "expected a VGPR index mode");
6327       return UNDEF;
6328     }
6329 
6330     if (Imm & Mode) {
6331       Error(S, "duplicate VGPR index mode");
6332       return UNDEF;
6333     }
6334     Imm |= Mode;
6335 
6336     if (trySkipToken(AsmToken::RParen))
6337       break;
6338     if (!skipToken(AsmToken::Comma,
6339                    "expected a comma or a closing parenthesis"))
6340       return UNDEF;
6341   }
6342 
6343   return Imm;
6344 }
6345 
6346 OperandMatchResultTy
6347 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6348 
6349   using namespace llvm::AMDGPU::VGPRIndexMode;
6350 
6351   int64_t Imm = 0;
6352   SMLoc S = Parser.getTok().getLoc();
6353 
6354   if (getLexer().getKind() == AsmToken::Identifier &&
6355       Parser.getTok().getString() == "gpr_idx" &&
6356       getLexer().peekTok().is(AsmToken::LParen)) {
6357 
6358     Parser.Lex();
6359     Parser.Lex();
6360 
6361     Imm = parseGPRIdxMacro();
6362     if (Imm == UNDEF)
6363       return MatchOperand_ParseFail;
6364 
6365   } else {
6366     if (getParser().parseAbsoluteExpression(Imm))
6367       return MatchOperand_ParseFail;
6368     if (Imm < 0 || !isUInt<4>(Imm)) {
6369       Error(S, "invalid immediate: only 4-bit values are legal");
6370       return MatchOperand_ParseFail;
6371     }
6372   }
6373 
6374   Operands.push_back(
6375       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6376   return MatchOperand_Success;
6377 }
6378 
6379 bool AMDGPUOperand::isGPRIdxMode() const {
6380   return isImmTy(ImmTyGprIdxMode);
6381 }
6382 
6383 //===----------------------------------------------------------------------===//
6384 // sopp branch targets
6385 //===----------------------------------------------------------------------===//
6386 
6387 OperandMatchResultTy
6388 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6389 
6390   // Make sure we are not parsing something
6391   // that looks like a label or an expression but is not.
6392   // This will improve error messages.
6393   if (isRegister() || isModifier())
6394     return MatchOperand_NoMatch;
6395 
6396   if (!parseExpr(Operands))
6397     return MatchOperand_ParseFail;
6398 
6399   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6400   assert(Opr.isImm() || Opr.isExpr());
6401   SMLoc Loc = Opr.getStartLoc();
6402 
6403   // Currently we do not support arbitrary expressions as branch targets.
6404   // Only labels and absolute expressions are accepted.
6405   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6406     Error(Loc, "expected an absolute expression or a label");
6407   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6408     Error(Loc, "expected a 16-bit signed jump offset");
6409   }
6410 
6411   return MatchOperand_Success;
6412 }
6413 
6414 //===----------------------------------------------------------------------===//
6415 // Boolean holding registers
6416 //===----------------------------------------------------------------------===//
6417 
6418 OperandMatchResultTy
6419 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6420   return parseReg(Operands);
6421 }
6422 
6423 //===----------------------------------------------------------------------===//
6424 // mubuf
6425 //===----------------------------------------------------------------------===//
6426 
6427 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
6428   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
6429 }
6430 
6431 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
6432   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
6433 }
6434 
6435 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const {
6436   return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC);
6437 }
6438 
6439 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
6440   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
6441 }
6442 
6443 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6444                                const OperandVector &Operands,
6445                                bool IsAtomic,
6446                                bool IsAtomicReturn,
6447                                bool IsLds) {
6448   bool IsLdsOpcode = IsLds;
6449   bool HasLdsModifier = false;
6450   OptionalImmIndexMap OptionalIdx;
6451   assert(IsAtomicReturn ? IsAtomic : true);
6452   unsigned FirstOperandIdx = 1;
6453 
6454   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6455     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6456 
6457     // Add the register arguments
6458     if (Op.isReg()) {
6459       Op.addRegOperands(Inst, 1);
6460       // Insert a tied src for atomic return dst.
6461       // This cannot be postponed as subsequent calls to
6462       // addImmOperands rely on correct number of MC operands.
6463       if (IsAtomicReturn && i == FirstOperandIdx)
6464         Op.addRegOperands(Inst, 1);
6465       continue;
6466     }
6467 
6468     // Handle the case where soffset is an immediate
6469     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6470       Op.addImmOperands(Inst, 1);
6471       continue;
6472     }
6473 
6474     HasLdsModifier |= Op.isLDS();
6475 
6476     // Handle tokens like 'offen' which are sometimes hard-coded into the
6477     // asm string.  There are no MCInst operands for these.
6478     if (Op.isToken()) {
6479       continue;
6480     }
6481     assert(Op.isImm());
6482 
6483     // Handle optional arguments
6484     OptionalIdx[Op.getImmTy()] = i;
6485   }
6486 
6487   // This is a workaround for an llvm quirk which may result in an
6488   // incorrect instruction selection. Lds and non-lds versions of
6489   // MUBUF instructions are identical except that lds versions
6490   // have mandatory 'lds' modifier. However this modifier follows
6491   // optional modifiers and llvm asm matcher regards this 'lds'
6492   // modifier as an optional one. As a result, an lds version
6493   // of opcode may be selected even if it has no 'lds' modifier.
6494   if (IsLdsOpcode && !HasLdsModifier) {
6495     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6496     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6497       Inst.setOpcode(NoLdsOpcode);
6498       IsLdsOpcode = false;
6499     }
6500   }
6501 
6502   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6503   if (!IsAtomic || IsAtomicReturn) {
6504     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6505   }
6506   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6507 
6508   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6509     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6510   }
6511 
6512   if (isGFX10())
6513     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6514 }
6515 
6516 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6517   OptionalImmIndexMap OptionalIdx;
6518 
6519   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6520     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6521 
6522     // Add the register arguments
6523     if (Op.isReg()) {
6524       Op.addRegOperands(Inst, 1);
6525       continue;
6526     }
6527 
6528     // Handle the case where soffset is an immediate
6529     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6530       Op.addImmOperands(Inst, 1);
6531       continue;
6532     }
6533 
6534     // Handle tokens like 'offen' which are sometimes hard-coded into the
6535     // asm string.  There are no MCInst operands for these.
6536     if (Op.isToken()) {
6537       continue;
6538     }
6539     assert(Op.isImm());
6540 
6541     // Handle optional arguments
6542     OptionalIdx[Op.getImmTy()] = i;
6543   }
6544 
6545   addOptionalImmOperand(Inst, Operands, OptionalIdx,
6546                         AMDGPUOperand::ImmTyOffset);
6547   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6548   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6549   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6550   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6551 
6552   if (isGFX10())
6553     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6554 }
6555 
6556 //===----------------------------------------------------------------------===//
6557 // mimg
6558 //===----------------------------------------------------------------------===//
6559 
6560 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6561                               bool IsAtomic) {
6562   unsigned I = 1;
6563   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6564   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6565     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6566   }
6567 
6568   if (IsAtomic) {
6569     // Add src, same as dst
6570     assert(Desc.getNumDefs() == 1);
6571     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6572   }
6573 
6574   OptionalImmIndexMap OptionalIdx;
6575 
6576   for (unsigned E = Operands.size(); I != E; ++I) {
6577     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6578 
6579     // Add the register arguments
6580     if (Op.isReg()) {
6581       Op.addRegOperands(Inst, 1);
6582     } else if (Op.isImmModifier()) {
6583       OptionalIdx[Op.getImmTy()] = I;
6584     } else if (!Op.isToken()) {
6585       llvm_unreachable("unexpected operand type");
6586     }
6587   }
6588 
6589   bool IsGFX10 = isGFX10();
6590 
6591   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6592   if (IsGFX10)
6593     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6594   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6595   if (IsGFX10)
6596     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6597   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6598   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6599   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6600   if (IsGFX10)
6601     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6602   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6603   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6604   if (!IsGFX10)
6605     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6606   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6607 }
6608 
6609 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6610   cvtMIMG(Inst, Operands, true);
6611 }
6612 
6613 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
6614                                       const OperandVector &Operands) {
6615   for (unsigned I = 1; I < Operands.size(); ++I) {
6616     auto &Operand = (AMDGPUOperand &)*Operands[I];
6617     if (Operand.isReg())
6618       Operand.addRegOperands(Inst, 1);
6619   }
6620 
6621   Inst.addOperand(MCOperand::createImm(1)); // a16
6622 }
6623 
6624 //===----------------------------------------------------------------------===//
6625 // smrd
6626 //===----------------------------------------------------------------------===//
6627 
6628 bool AMDGPUOperand::isSMRDOffset8() const {
6629   return isImm() && isUInt<8>(getImm());
6630 }
6631 
6632 bool AMDGPUOperand::isSMEMOffset() const {
6633   return isImm(); // Offset range is checked later by validator.
6634 }
6635 
6636 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6637   // 32-bit literals are only supported on CI and we only want to use them
6638   // when the offset is > 8-bits.
6639   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6640 }
6641 
6642 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6643   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6644 }
6645 
6646 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
6647   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6648 }
6649 
6650 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6651   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6652 }
6653 
6654 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6655   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6656 }
6657 
6658 //===----------------------------------------------------------------------===//
6659 // vop3
6660 //===----------------------------------------------------------------------===//
6661 
6662 static bool ConvertOmodMul(int64_t &Mul) {
6663   if (Mul != 1 && Mul != 2 && Mul != 4)
6664     return false;
6665 
6666   Mul >>= 1;
6667   return true;
6668 }
6669 
6670 static bool ConvertOmodDiv(int64_t &Div) {
6671   if (Div == 1) {
6672     Div = 0;
6673     return true;
6674   }
6675 
6676   if (Div == 2) {
6677     Div = 3;
6678     return true;
6679   }
6680 
6681   return false;
6682 }
6683 
6684 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6685   if (BoundCtrl == 0) {
6686     BoundCtrl = 1;
6687     return true;
6688   }
6689 
6690   if (BoundCtrl == -1) {
6691     BoundCtrl = 0;
6692     return true;
6693   }
6694 
6695   return false;
6696 }
6697 
6698 // Note: the order in this table matches the order of operands in AsmString.
6699 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6700   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6701   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6702   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6703   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6704   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6705   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6706   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6707   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6708   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6709   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6710   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6711   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6712   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6713   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6714   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6715   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6716   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6717   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6718   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6719   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6720   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6721   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6722   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6723   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6724   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6725   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6726   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6727   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6728   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6729   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6730   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6731   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6732   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6733   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6734   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6735   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6736   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6737   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6738   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6739   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6740   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6741   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6742   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6743 };
6744 
6745 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6746 
6747   OperandMatchResultTy res = parseOptionalOpr(Operands);
6748 
6749   // This is a hack to enable hardcoded mandatory operands which follow
6750   // optional operands.
6751   //
6752   // Current design assumes that all operands after the first optional operand
6753   // are also optional. However implementation of some instructions violates
6754   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6755   //
6756   // To alleviate this problem, we have to (implicitly) parse extra operands
6757   // to make sure autogenerated parser of custom operands never hit hardcoded
6758   // mandatory operands.
6759 
6760   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6761     if (res != MatchOperand_Success ||
6762         isToken(AsmToken::EndOfStatement))
6763       break;
6764 
6765     trySkipToken(AsmToken::Comma);
6766     res = parseOptionalOpr(Operands);
6767   }
6768 
6769   return res;
6770 }
6771 
6772 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6773   OperandMatchResultTy res;
6774   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6775     // try to parse any optional operand here
6776     if (Op.IsBit) {
6777       res = parseNamedBit(Op.Name, Operands, Op.Type);
6778     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6779       res = parseOModOperand(Operands);
6780     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6781                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6782                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6783       res = parseSDWASel(Operands, Op.Name, Op.Type);
6784     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6785       res = parseSDWADstUnused(Operands);
6786     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6787                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6788                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6789                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6790       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6791                                         Op.ConvertResult);
6792     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6793       res = parseDim(Operands);
6794     } else {
6795       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6796     }
6797     if (res != MatchOperand_NoMatch) {
6798       return res;
6799     }
6800   }
6801   return MatchOperand_NoMatch;
6802 }
6803 
6804 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6805   StringRef Name = Parser.getTok().getString();
6806   if (Name == "mul") {
6807     return parseIntWithPrefix("mul", Operands,
6808                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6809   }
6810 
6811   if (Name == "div") {
6812     return parseIntWithPrefix("div", Operands,
6813                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6814   }
6815 
6816   return MatchOperand_NoMatch;
6817 }
6818 
6819 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6820   cvtVOP3P(Inst, Operands);
6821 
6822   int Opc = Inst.getOpcode();
6823 
6824   int SrcNum;
6825   const int Ops[] = { AMDGPU::OpName::src0,
6826                       AMDGPU::OpName::src1,
6827                       AMDGPU::OpName::src2 };
6828   for (SrcNum = 0;
6829        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6830        ++SrcNum);
6831   assert(SrcNum > 0);
6832 
6833   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6834   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6835 
6836   if ((OpSel & (1 << SrcNum)) != 0) {
6837     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6838     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6839     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6840   }
6841 }
6842 
6843 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6844       // 1. This operand is input modifiers
6845   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6846       // 2. This is not last operand
6847       && Desc.NumOperands > (OpNum + 1)
6848       // 3. Next operand is register class
6849       && Desc.OpInfo[OpNum + 1].RegClass != -1
6850       // 4. Next register is not tied to any other operand
6851       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6852 }
6853 
6854 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6855 {
6856   OptionalImmIndexMap OptionalIdx;
6857   unsigned Opc = Inst.getOpcode();
6858 
6859   unsigned I = 1;
6860   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6861   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6862     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6863   }
6864 
6865   for (unsigned E = Operands.size(); I != E; ++I) {
6866     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6867     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6868       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6869     } else if (Op.isInterpSlot() ||
6870                Op.isInterpAttr() ||
6871                Op.isAttrChan()) {
6872       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6873     } else if (Op.isImmModifier()) {
6874       OptionalIdx[Op.getImmTy()] = I;
6875     } else {
6876       llvm_unreachable("unhandled operand type");
6877     }
6878   }
6879 
6880   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6881     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6882   }
6883 
6884   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6885     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6886   }
6887 
6888   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6889     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6890   }
6891 }
6892 
6893 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6894                               OptionalImmIndexMap &OptionalIdx) {
6895   unsigned Opc = Inst.getOpcode();
6896 
6897   unsigned I = 1;
6898   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6899   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6900     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6901   }
6902 
6903   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6904     // This instruction has src modifiers
6905     for (unsigned E = Operands.size(); I != E; ++I) {
6906       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6907       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6908         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6909       } else if (Op.isImmModifier()) {
6910         OptionalIdx[Op.getImmTy()] = I;
6911       } else if (Op.isRegOrImm()) {
6912         Op.addRegOrImmOperands(Inst, 1);
6913       } else {
6914         llvm_unreachable("unhandled operand type");
6915       }
6916     }
6917   } else {
6918     // No src modifiers
6919     for (unsigned E = Operands.size(); I != E; ++I) {
6920       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6921       if (Op.isMod()) {
6922         OptionalIdx[Op.getImmTy()] = I;
6923       } else {
6924         Op.addRegOrImmOperands(Inst, 1);
6925       }
6926     }
6927   }
6928 
6929   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6930     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6931   }
6932 
6933   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6934     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6935   }
6936 
6937   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6938   // it has src2 register operand that is tied to dst operand
6939   // we don't allow modifiers for this operand in assembler so src2_modifiers
6940   // should be 0.
6941   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6942       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6943       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6944       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
6945       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
6946       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6947       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6948       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6949       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
6950       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6951     auto it = Inst.begin();
6952     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6953     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6954     ++it;
6955     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6956   }
6957 }
6958 
6959 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6960   OptionalImmIndexMap OptionalIdx;
6961   cvtVOP3(Inst, Operands, OptionalIdx);
6962 }
6963 
6964 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6965                                const OperandVector &Operands) {
6966   OptionalImmIndexMap OptIdx;
6967   const int Opc = Inst.getOpcode();
6968   const MCInstrDesc &Desc = MII.get(Opc);
6969 
6970   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6971 
6972   cvtVOP3(Inst, Operands, OptIdx);
6973 
6974   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6975     assert(!IsPacked);
6976     Inst.addOperand(Inst.getOperand(0));
6977   }
6978 
6979   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6980   // instruction, and then figure out where to actually put the modifiers
6981 
6982   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6983 
6984   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6985   if (OpSelHiIdx != -1) {
6986     int DefaultVal = IsPacked ? -1 : 0;
6987     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6988                           DefaultVal);
6989   }
6990 
6991   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6992   if (NegLoIdx != -1) {
6993     assert(IsPacked);
6994     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6995     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6996   }
6997 
6998   const int Ops[] = { AMDGPU::OpName::src0,
6999                       AMDGPU::OpName::src1,
7000                       AMDGPU::OpName::src2 };
7001   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7002                          AMDGPU::OpName::src1_modifiers,
7003                          AMDGPU::OpName::src2_modifiers };
7004 
7005   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7006 
7007   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7008   unsigned OpSelHi = 0;
7009   unsigned NegLo = 0;
7010   unsigned NegHi = 0;
7011 
7012   if (OpSelHiIdx != -1) {
7013     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7014   }
7015 
7016   if (NegLoIdx != -1) {
7017     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7018     NegLo = Inst.getOperand(NegLoIdx).getImm();
7019     NegHi = Inst.getOperand(NegHiIdx).getImm();
7020   }
7021 
7022   for (int J = 0; J < 3; ++J) {
7023     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7024     if (OpIdx == -1)
7025       break;
7026 
7027     uint32_t ModVal = 0;
7028 
7029     if ((OpSel & (1 << J)) != 0)
7030       ModVal |= SISrcMods::OP_SEL_0;
7031 
7032     if ((OpSelHi & (1 << J)) != 0)
7033       ModVal |= SISrcMods::OP_SEL_1;
7034 
7035     if ((NegLo & (1 << J)) != 0)
7036       ModVal |= SISrcMods::NEG;
7037 
7038     if ((NegHi & (1 << J)) != 0)
7039       ModVal |= SISrcMods::NEG_HI;
7040 
7041     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7042 
7043     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7044   }
7045 }
7046 
7047 //===----------------------------------------------------------------------===//
7048 // dpp
7049 //===----------------------------------------------------------------------===//
7050 
7051 bool AMDGPUOperand::isDPP8() const {
7052   return isImmTy(ImmTyDPP8);
7053 }
7054 
7055 bool AMDGPUOperand::isDPPCtrl() const {
7056   using namespace AMDGPU::DPP;
7057 
7058   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7059   if (result) {
7060     int64_t Imm = getImm();
7061     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7062            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7063            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7064            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7065            (Imm == DppCtrl::WAVE_SHL1) ||
7066            (Imm == DppCtrl::WAVE_ROL1) ||
7067            (Imm == DppCtrl::WAVE_SHR1) ||
7068            (Imm == DppCtrl::WAVE_ROR1) ||
7069            (Imm == DppCtrl::ROW_MIRROR) ||
7070            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7071            (Imm == DppCtrl::BCAST15) ||
7072            (Imm == DppCtrl::BCAST31) ||
7073            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7074            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7075   }
7076   return false;
7077 }
7078 
7079 //===----------------------------------------------------------------------===//
7080 // mAI
7081 //===----------------------------------------------------------------------===//
7082 
7083 bool AMDGPUOperand::isBLGP() const {
7084   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7085 }
7086 
7087 bool AMDGPUOperand::isCBSZ() const {
7088   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7089 }
7090 
7091 bool AMDGPUOperand::isABID() const {
7092   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7093 }
7094 
7095 bool AMDGPUOperand::isS16Imm() const {
7096   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7097 }
7098 
7099 bool AMDGPUOperand::isU16Imm() const {
7100   return isImm() && isUInt<16>(getImm());
7101 }
7102 
7103 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7104   if (!isGFX10())
7105     return MatchOperand_NoMatch;
7106 
7107   SMLoc S = Parser.getTok().getLoc();
7108 
7109   if (getLexer().isNot(AsmToken::Identifier))
7110     return MatchOperand_NoMatch;
7111   if (getLexer().getTok().getString() != "dim")
7112     return MatchOperand_NoMatch;
7113 
7114   Parser.Lex();
7115   if (getLexer().isNot(AsmToken::Colon))
7116     return MatchOperand_ParseFail;
7117 
7118   Parser.Lex();
7119 
7120   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
7121   // integer.
7122   std::string Token;
7123   if (getLexer().is(AsmToken::Integer)) {
7124     SMLoc Loc = getLexer().getTok().getEndLoc();
7125     Token = std::string(getLexer().getTok().getString());
7126     Parser.Lex();
7127     if (getLexer().getTok().getLoc() != Loc)
7128       return MatchOperand_ParseFail;
7129   }
7130   if (getLexer().isNot(AsmToken::Identifier))
7131     return MatchOperand_ParseFail;
7132   Token += getLexer().getTok().getString();
7133 
7134   StringRef DimId = Token;
7135   if (DimId.startswith("SQ_RSRC_IMG_"))
7136     DimId = DimId.substr(12);
7137 
7138   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7139   if (!DimInfo)
7140     return MatchOperand_ParseFail;
7141 
7142   Parser.Lex();
7143 
7144   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
7145                                               AMDGPUOperand::ImmTyDim));
7146   return MatchOperand_Success;
7147 }
7148 
7149 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7150   SMLoc S = Parser.getTok().getLoc();
7151   StringRef Prefix;
7152 
7153   if (getLexer().getKind() == AsmToken::Identifier) {
7154     Prefix = Parser.getTok().getString();
7155   } else {
7156     return MatchOperand_NoMatch;
7157   }
7158 
7159   if (Prefix != "dpp8")
7160     return parseDPPCtrl(Operands);
7161   if (!isGFX10())
7162     return MatchOperand_NoMatch;
7163 
7164   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7165 
7166   int64_t Sels[8];
7167 
7168   Parser.Lex();
7169   if (getLexer().isNot(AsmToken::Colon))
7170     return MatchOperand_ParseFail;
7171 
7172   Parser.Lex();
7173   if (getLexer().isNot(AsmToken::LBrac))
7174     return MatchOperand_ParseFail;
7175 
7176   Parser.Lex();
7177   if (getParser().parseAbsoluteExpression(Sels[0]))
7178     return MatchOperand_ParseFail;
7179   if (0 > Sels[0] || 7 < Sels[0])
7180     return MatchOperand_ParseFail;
7181 
7182   for (size_t i = 1; i < 8; ++i) {
7183     if (getLexer().isNot(AsmToken::Comma))
7184       return MatchOperand_ParseFail;
7185 
7186     Parser.Lex();
7187     if (getParser().parseAbsoluteExpression(Sels[i]))
7188       return MatchOperand_ParseFail;
7189     if (0 > Sels[i] || 7 < Sels[i])
7190       return MatchOperand_ParseFail;
7191   }
7192 
7193   if (getLexer().isNot(AsmToken::RBrac))
7194     return MatchOperand_ParseFail;
7195   Parser.Lex();
7196 
7197   unsigned DPP8 = 0;
7198   for (size_t i = 0; i < 8; ++i)
7199     DPP8 |= (Sels[i] << (i * 3));
7200 
7201   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7202   return MatchOperand_Success;
7203 }
7204 
7205 OperandMatchResultTy
7206 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7207   using namespace AMDGPU::DPP;
7208 
7209   SMLoc S = Parser.getTok().getLoc();
7210   StringRef Prefix;
7211   int64_t Int;
7212 
7213   if (getLexer().getKind() == AsmToken::Identifier) {
7214     Prefix = Parser.getTok().getString();
7215   } else {
7216     return MatchOperand_NoMatch;
7217   }
7218 
7219   if (Prefix == "row_mirror") {
7220     Int = DppCtrl::ROW_MIRROR;
7221     Parser.Lex();
7222   } else if (Prefix == "row_half_mirror") {
7223     Int = DppCtrl::ROW_HALF_MIRROR;
7224     Parser.Lex();
7225   } else {
7226     // Check to prevent parseDPPCtrlOps from eating invalid tokens
7227     if (Prefix != "quad_perm"
7228         && Prefix != "row_shl"
7229         && Prefix != "row_shr"
7230         && Prefix != "row_ror"
7231         && Prefix != "wave_shl"
7232         && Prefix != "wave_rol"
7233         && Prefix != "wave_shr"
7234         && Prefix != "wave_ror"
7235         && Prefix != "row_bcast"
7236         && Prefix != "row_share"
7237         && Prefix != "row_xmask") {
7238       return MatchOperand_NoMatch;
7239     }
7240 
7241     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
7242       return MatchOperand_NoMatch;
7243 
7244     if (!isVI() && !isGFX9() &&
7245         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
7246          Prefix == "wave_rol" || Prefix == "wave_ror" ||
7247          Prefix == "row_bcast"))
7248       return MatchOperand_NoMatch;
7249 
7250     Parser.Lex();
7251     if (getLexer().isNot(AsmToken::Colon))
7252       return MatchOperand_ParseFail;
7253 
7254     if (Prefix == "quad_perm") {
7255       // quad_perm:[%d,%d,%d,%d]
7256       Parser.Lex();
7257       if (getLexer().isNot(AsmToken::LBrac))
7258         return MatchOperand_ParseFail;
7259       Parser.Lex();
7260 
7261       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
7262         return MatchOperand_ParseFail;
7263 
7264       for (int i = 0; i < 3; ++i) {
7265         if (getLexer().isNot(AsmToken::Comma))
7266           return MatchOperand_ParseFail;
7267         Parser.Lex();
7268 
7269         int64_t Temp;
7270         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
7271           return MatchOperand_ParseFail;
7272         const int shift = i*2 + 2;
7273         Int += (Temp << shift);
7274       }
7275 
7276       if (getLexer().isNot(AsmToken::RBrac))
7277         return MatchOperand_ParseFail;
7278       Parser.Lex();
7279     } else {
7280       // sel:%d
7281       Parser.Lex();
7282       if (getParser().parseAbsoluteExpression(Int))
7283         return MatchOperand_ParseFail;
7284 
7285       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
7286         Int |= DppCtrl::ROW_SHL0;
7287       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
7288         Int |= DppCtrl::ROW_SHR0;
7289       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
7290         Int |= DppCtrl::ROW_ROR0;
7291       } else if (Prefix == "wave_shl" && 1 == Int) {
7292         Int = DppCtrl::WAVE_SHL1;
7293       } else if (Prefix == "wave_rol" && 1 == Int) {
7294         Int = DppCtrl::WAVE_ROL1;
7295       } else if (Prefix == "wave_shr" && 1 == Int) {
7296         Int = DppCtrl::WAVE_SHR1;
7297       } else if (Prefix == "wave_ror" && 1 == Int) {
7298         Int = DppCtrl::WAVE_ROR1;
7299       } else if (Prefix == "row_bcast") {
7300         if (Int == 15) {
7301           Int = DppCtrl::BCAST15;
7302         } else if (Int == 31) {
7303           Int = DppCtrl::BCAST31;
7304         } else {
7305           return MatchOperand_ParseFail;
7306         }
7307       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
7308         Int |= DppCtrl::ROW_SHARE_FIRST;
7309       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
7310         Int |= DppCtrl::ROW_XMASK_FIRST;
7311       } else {
7312         return MatchOperand_ParseFail;
7313       }
7314     }
7315   }
7316 
7317   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
7318   return MatchOperand_Success;
7319 }
7320 
7321 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7322   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7323 }
7324 
7325 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7326   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7327 }
7328 
7329 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7330   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7331 }
7332 
7333 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7334   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7335 }
7336 
7337 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7338   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7339 }
7340 
7341 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7342   OptionalImmIndexMap OptionalIdx;
7343 
7344   unsigned I = 1;
7345   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7346   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7347     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7348   }
7349 
7350   int Fi = 0;
7351   for (unsigned E = Operands.size(); I != E; ++I) {
7352     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7353                                             MCOI::TIED_TO);
7354     if (TiedTo != -1) {
7355       assert((unsigned)TiedTo < Inst.getNumOperands());
7356       // handle tied old or src2 for MAC instructions
7357       Inst.addOperand(Inst.getOperand(TiedTo));
7358     }
7359     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7360     // Add the register arguments
7361     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7362       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7363       // Skip it.
7364       continue;
7365     }
7366 
7367     if (IsDPP8) {
7368       if (Op.isDPP8()) {
7369         Op.addImmOperands(Inst, 1);
7370       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7371         Op.addRegWithFPInputModsOperands(Inst, 2);
7372       } else if (Op.isFI()) {
7373         Fi = Op.getImm();
7374       } else if (Op.isReg()) {
7375         Op.addRegOperands(Inst, 1);
7376       } else {
7377         llvm_unreachable("Invalid operand type");
7378       }
7379     } else {
7380       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7381         Op.addRegWithFPInputModsOperands(Inst, 2);
7382       } else if (Op.isDPPCtrl()) {
7383         Op.addImmOperands(Inst, 1);
7384       } else if (Op.isImm()) {
7385         // Handle optional arguments
7386         OptionalIdx[Op.getImmTy()] = I;
7387       } else {
7388         llvm_unreachable("Invalid operand type");
7389       }
7390     }
7391   }
7392 
7393   if (IsDPP8) {
7394     using namespace llvm::AMDGPU::DPP;
7395     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7396   } else {
7397     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7398     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7399     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7400     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7401       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7402     }
7403   }
7404 }
7405 
7406 //===----------------------------------------------------------------------===//
7407 // sdwa
7408 //===----------------------------------------------------------------------===//
7409 
7410 OperandMatchResultTy
7411 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7412                               AMDGPUOperand::ImmTy Type) {
7413   using namespace llvm::AMDGPU::SDWA;
7414 
7415   SMLoc S = Parser.getTok().getLoc();
7416   StringRef Value;
7417   OperandMatchResultTy res;
7418 
7419   res = parseStringWithPrefix(Prefix, Value);
7420   if (res != MatchOperand_Success) {
7421     return res;
7422   }
7423 
7424   int64_t Int;
7425   Int = StringSwitch<int64_t>(Value)
7426         .Case("BYTE_0", SdwaSel::BYTE_0)
7427         .Case("BYTE_1", SdwaSel::BYTE_1)
7428         .Case("BYTE_2", SdwaSel::BYTE_2)
7429         .Case("BYTE_3", SdwaSel::BYTE_3)
7430         .Case("WORD_0", SdwaSel::WORD_0)
7431         .Case("WORD_1", SdwaSel::WORD_1)
7432         .Case("DWORD", SdwaSel::DWORD)
7433         .Default(0xffffffff);
7434   Parser.Lex(); // eat last token
7435 
7436   if (Int == 0xffffffff) {
7437     return MatchOperand_ParseFail;
7438   }
7439 
7440   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
7441   return MatchOperand_Success;
7442 }
7443 
7444 OperandMatchResultTy
7445 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
7446   using namespace llvm::AMDGPU::SDWA;
7447 
7448   SMLoc S = Parser.getTok().getLoc();
7449   StringRef Value;
7450   OperandMatchResultTy res;
7451 
7452   res = parseStringWithPrefix("dst_unused", Value);
7453   if (res != MatchOperand_Success) {
7454     return res;
7455   }
7456 
7457   int64_t Int;
7458   Int = StringSwitch<int64_t>(Value)
7459         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
7460         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
7461         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
7462         .Default(0xffffffff);
7463   Parser.Lex(); // eat last token
7464 
7465   if (Int == 0xffffffff) {
7466     return MatchOperand_ParseFail;
7467   }
7468 
7469   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
7470   return MatchOperand_Success;
7471 }
7472 
7473 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
7474   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
7475 }
7476 
7477 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
7478   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
7479 }
7480 
7481 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
7482   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
7483 }
7484 
7485 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
7486   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
7487 }
7488 
7489 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
7490   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
7491 }
7492 
7493 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
7494                               uint64_t BasicInstType,
7495                               bool SkipDstVcc,
7496                               bool SkipSrcVcc) {
7497   using namespace llvm::AMDGPU::SDWA;
7498 
7499   OptionalImmIndexMap OptionalIdx;
7500   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
7501   bool SkippedVcc = false;
7502 
7503   unsigned I = 1;
7504   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7505   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7506     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7507   }
7508 
7509   for (unsigned E = Operands.size(); I != E; ++I) {
7510     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7511     if (SkipVcc && !SkippedVcc && Op.isReg() &&
7512         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7513       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7514       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7515       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7516       // Skip VCC only if we didn't skip it on previous iteration.
7517       // Note that src0 and src1 occupy 2 slots each because of modifiers.
7518       if (BasicInstType == SIInstrFlags::VOP2 &&
7519           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7520            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7521         SkippedVcc = true;
7522         continue;
7523       } else if (BasicInstType == SIInstrFlags::VOPC &&
7524                  Inst.getNumOperands() == 0) {
7525         SkippedVcc = true;
7526         continue;
7527       }
7528     }
7529     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7530       Op.addRegOrImmWithInputModsOperands(Inst, 2);
7531     } else if (Op.isImm()) {
7532       // Handle optional arguments
7533       OptionalIdx[Op.getImmTy()] = I;
7534     } else {
7535       llvm_unreachable("Invalid operand type");
7536     }
7537     SkippedVcc = false;
7538   }
7539 
7540   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7541       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7542       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7543     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7544     switch (BasicInstType) {
7545     case SIInstrFlags::VOP1:
7546       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7547       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7548         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7549       }
7550       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7551       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7552       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7553       break;
7554 
7555     case SIInstrFlags::VOP2:
7556       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7557       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7558         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7559       }
7560       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7561       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7562       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7563       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7564       break;
7565 
7566     case SIInstrFlags::VOPC:
7567       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7568         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7569       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7570       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7571       break;
7572 
7573     default:
7574       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7575     }
7576   }
7577 
7578   // special case v_mac_{f16, f32}:
7579   // it has src2 register operand that is tied to dst operand
7580   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7581       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7582     auto it = Inst.begin();
7583     std::advance(
7584       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7585     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7586   }
7587 }
7588 
7589 //===----------------------------------------------------------------------===//
7590 // mAI
7591 //===----------------------------------------------------------------------===//
7592 
7593 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7594   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7595 }
7596 
7597 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7598   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7599 }
7600 
7601 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7602   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7603 }
7604 
7605 /// Force static initialization.
7606 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7607   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7608   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7609 }
7610 
7611 #define GET_REGISTER_MATCHER
7612 #define GET_MATCHER_IMPLEMENTATION
7613 #define GET_MNEMONIC_SPELL_CHECKER
7614 #define GET_MNEMONIC_CHECKER
7615 #include "AMDGPUGenAsmMatcher.inc"
7616 
7617 // This fuction should be defined after auto-generated include so that we have
7618 // MatchClassKind enum defined
7619 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7620                                                      unsigned Kind) {
7621   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7622   // But MatchInstructionImpl() expects to meet token and fails to validate
7623   // operand. This method checks if we are given immediate operand but expect to
7624   // get corresponding token.
7625   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7626   switch (Kind) {
7627   case MCK_addr64:
7628     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7629   case MCK_gds:
7630     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7631   case MCK_lds:
7632     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7633   case MCK_glc:
7634     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7635   case MCK_idxen:
7636     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7637   case MCK_offen:
7638     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7639   case MCK_SSrcB32:
7640     // When operands have expression values, they will return true for isToken,
7641     // because it is not possible to distinguish between a token and an
7642     // expression at parse time. MatchInstructionImpl() will always try to
7643     // match an operand as a token, when isToken returns true, and when the
7644     // name of the expression is not a valid token, the match will fail,
7645     // so we need to handle it here.
7646     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7647   case MCK_SSrcF32:
7648     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7649   case MCK_SoppBrTarget:
7650     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7651   case MCK_VReg32OrOff:
7652     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7653   case MCK_InterpSlot:
7654     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7655   case MCK_Attr:
7656     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7657   case MCK_AttrChan:
7658     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7659   case MCK_ImmSMEMOffset:
7660     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
7661   case MCK_SReg_64:
7662   case MCK_SReg_64_XEXEC:
7663     // Null is defined as a 32-bit register but
7664     // it should also be enabled with 64-bit operands.
7665     // The following code enables it for SReg_64 operands
7666     // used as source and destination. Remaining source
7667     // operands are handled in isInlinableImm.
7668     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7669   default:
7670     return Match_InvalidOperand;
7671   }
7672 }
7673 
7674 //===----------------------------------------------------------------------===//
7675 // endpgm
7676 //===----------------------------------------------------------------------===//
7677 
7678 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7679   SMLoc S = Parser.getTok().getLoc();
7680   int64_t Imm = 0;
7681 
7682   if (!parseExpr(Imm)) {
7683     // The operand is optional, if not present default to 0
7684     Imm = 0;
7685   }
7686 
7687   if (!isUInt<16>(Imm)) {
7688     Error(S, "expected a 16-bit value");
7689     return MatchOperand_ParseFail;
7690   }
7691 
7692   Operands.push_back(
7693       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7694   return MatchOperand_Success;
7695 }
7696 
7697 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7698