1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyA16,
167     ImmTyLWE,
168     ImmTyExpTgt,
169     ImmTyExpCompr,
170     ImmTyExpVM,
171     ImmTyFORMAT,
172     ImmTyHwreg,
173     ImmTyOff,
174     ImmTySendMsg,
175     ImmTyInterpSlot,
176     ImmTyInterpAttr,
177     ImmTyAttrChan,
178     ImmTyOpSel,
179     ImmTyOpSelHi,
180     ImmTyNegLo,
181     ImmTyNegHi,
182     ImmTySwizzle,
183     ImmTyGprIdxMode,
184     ImmTyHigh,
185     ImmTyBLGP,
186     ImmTyCBSZ,
187     ImmTyABID,
188     ImmTyEndpgm,
189   };
190 
191 private:
192   struct TokOp {
193     const char *Data;
194     unsigned Length;
195   };
196 
197   struct ImmOp {
198     int64_t Val;
199     ImmTy Type;
200     bool IsFPImm;
201     Modifiers Mods;
202   };
203 
204   struct RegOp {
205     unsigned RegNo;
206     Modifiers Mods;
207   };
208 
209   union {
210     TokOp Tok;
211     ImmOp Imm;
212     RegOp Reg;
213     const MCExpr *Expr;
214   };
215 
216 public:
217   bool isToken() const override {
218     if (Kind == Token)
219       return true;
220 
221     // When parsing operands, we can't always tell if something was meant to be
222     // a token, like 'gds', or an expression that references a global variable.
223     // In this case, we assume the string is an expression, and if we need to
224     // interpret is a token, then we treat the symbol name as the token.
225     return isSymbolRefExpr();
226   }
227 
228   bool isSymbolRefExpr() const {
229     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230   }
231 
232   bool isImm() const override {
233     return Kind == Immediate;
234   }
235 
236   bool isInlinableImm(MVT type) const;
237   bool isLiteralImm(MVT type) const;
238 
239   bool isRegKind() const {
240     return Kind == Register;
241   }
242 
243   bool isReg() const override {
244     return isRegKind() && !hasModifiers();
245   }
246 
247   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
248     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
249   }
250 
251   bool isRegOrImmWithInt16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
253   }
254 
255   bool isRegOrImmWithInt32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
257   }
258 
259   bool isRegOrImmWithInt64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
261   }
262 
263   bool isRegOrImmWithFP16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
265   }
266 
267   bool isRegOrImmWithFP32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
269   }
270 
271   bool isRegOrImmWithFP64InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
273   }
274 
275   bool isVReg() const {
276     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
277            isRegClass(AMDGPU::VReg_64RegClassID) ||
278            isRegClass(AMDGPU::VReg_96RegClassID) ||
279            isRegClass(AMDGPU::VReg_128RegClassID) ||
280            isRegClass(AMDGPU::VReg_160RegClassID) ||
281            isRegClass(AMDGPU::VReg_256RegClassID) ||
282            isRegClass(AMDGPU::VReg_512RegClassID) ||
283            isRegClass(AMDGPU::VReg_1024RegClassID);
284   }
285 
286   bool isVReg32() const {
287     return isRegClass(AMDGPU::VGPR_32RegClassID);
288   }
289 
290   bool isVReg32OrOff() const {
291     return isOff() || isVReg32();
292   }
293 
294   bool isNull() const {
295     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
296   }
297 
298   bool isSDWAOperand(MVT type) const;
299   bool isSDWAFP16Operand() const;
300   bool isSDWAFP32Operand() const;
301   bool isSDWAInt16Operand() const;
302   bool isSDWAInt32Operand() const;
303 
304   bool isImmTy(ImmTy ImmT) const {
305     return isImm() && Imm.Type == ImmT;
306   }
307 
308   bool isImmModifier() const {
309     return isImm() && Imm.Type != ImmTyNone;
310   }
311 
312   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
313   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
314   bool isDMask() const { return isImmTy(ImmTyDMask); }
315   bool isDim() const { return isImmTy(ImmTyDim); }
316   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
317   bool isDA() const { return isImmTy(ImmTyDA); }
318   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
319   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
320   bool isLWE() const { return isImmTy(ImmTyLWE); }
321   bool isOff() const { return isImmTy(ImmTyOff); }
322   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
323   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
324   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
325   bool isOffen() const { return isImmTy(ImmTyOffen); }
326   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
327   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
328   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
329   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
330   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
331 
332   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
333   bool isGDS() const { return isImmTy(ImmTyGDS); }
334   bool isLDS() const { return isImmTy(ImmTyLDS); }
335   bool isDLC() const { return isImmTy(ImmTyDLC); }
336   bool isGLC() const { return isImmTy(ImmTyGLC); }
337   bool isSLC() const { return isImmTy(ImmTySLC); }
338   bool isSWZ() const { return isImmTy(ImmTySWZ); }
339   bool isTFE() const { return isImmTy(ImmTyTFE); }
340   bool isD16() const { return isImmTy(ImmTyD16); }
341   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
342   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
343   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
344   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
345   bool isFI() const { return isImmTy(ImmTyDppFi); }
346   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
347   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
348   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
349   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
350   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
351   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
352   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
353   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
354   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
355   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
356   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
357   bool isHigh() const { return isImmTy(ImmTyHigh); }
358 
359   bool isMod() const {
360     return isClampSI() || isOModSI();
361   }
362 
363   bool isRegOrImm() const {
364     return isReg() || isImm();
365   }
366 
367   bool isRegClass(unsigned RCID) const;
368 
369   bool isInlineValue() const;
370 
371   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
372     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
373   }
374 
375   bool isSCSrcB16() const {
376     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
377   }
378 
379   bool isSCSrcV2B16() const {
380     return isSCSrcB16();
381   }
382 
383   bool isSCSrcB32() const {
384     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
385   }
386 
387   bool isSCSrcB64() const {
388     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
389   }
390 
391   bool isBoolReg() const;
392 
393   bool isSCSrcF16() const {
394     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
395   }
396 
397   bool isSCSrcV2F16() const {
398     return isSCSrcF16();
399   }
400 
401   bool isSCSrcF32() const {
402     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
403   }
404 
405   bool isSCSrcF64() const {
406     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
407   }
408 
409   bool isSSrcB32() const {
410     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
411   }
412 
413   bool isSSrcB16() const {
414     return isSCSrcB16() || isLiteralImm(MVT::i16);
415   }
416 
417   bool isSSrcV2B16() const {
418     llvm_unreachable("cannot happen");
419     return isSSrcB16();
420   }
421 
422   bool isSSrcB64() const {
423     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
424     // See isVSrc64().
425     return isSCSrcB64() || isLiteralImm(MVT::i64);
426   }
427 
428   bool isSSrcF32() const {
429     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
430   }
431 
432   bool isSSrcF64() const {
433     return isSCSrcB64() || isLiteralImm(MVT::f64);
434   }
435 
436   bool isSSrcF16() const {
437     return isSCSrcB16() || isLiteralImm(MVT::f16);
438   }
439 
440   bool isSSrcV2F16() const {
441     llvm_unreachable("cannot happen");
442     return isSSrcF16();
443   }
444 
445   bool isSSrcOrLdsB32() const {
446     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
447            isLiteralImm(MVT::i32) || isExpr();
448   }
449 
450   bool isVCSrcB32() const {
451     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
452   }
453 
454   bool isVCSrcB64() const {
455     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
456   }
457 
458   bool isVCSrcB16() const {
459     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
460   }
461 
462   bool isVCSrcV2B16() const {
463     return isVCSrcB16();
464   }
465 
466   bool isVCSrcF32() const {
467     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
468   }
469 
470   bool isVCSrcF64() const {
471     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
472   }
473 
474   bool isVCSrcF16() const {
475     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
476   }
477 
478   bool isVCSrcV2F16() const {
479     return isVCSrcF16();
480   }
481 
482   bool isVSrcB32() const {
483     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
484   }
485 
486   bool isVSrcB64() const {
487     return isVCSrcF64() || isLiteralImm(MVT::i64);
488   }
489 
490   bool isVSrcB16() const {
491     return isVCSrcF16() || isLiteralImm(MVT::i16);
492   }
493 
494   bool isVSrcV2B16() const {
495     return isVSrcB16() || isLiteralImm(MVT::v2i16);
496   }
497 
498   bool isVSrcF32() const {
499     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
500   }
501 
502   bool isVSrcF64() const {
503     return isVCSrcF64() || isLiteralImm(MVT::f64);
504   }
505 
506   bool isVSrcF16() const {
507     return isVCSrcF16() || isLiteralImm(MVT::f16);
508   }
509 
510   bool isVSrcV2F16() const {
511     return isVSrcF16() || isLiteralImm(MVT::v2f16);
512   }
513 
514   bool isVISrcB32() const {
515     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
516   }
517 
518   bool isVISrcB16() const {
519     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
520   }
521 
522   bool isVISrcV2B16() const {
523     return isVISrcB16();
524   }
525 
526   bool isVISrcF32() const {
527     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
528   }
529 
530   bool isVISrcF16() const {
531     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
532   }
533 
534   bool isVISrcV2F16() const {
535     return isVISrcF16() || isVISrcB32();
536   }
537 
538   bool isAISrcB32() const {
539     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
540   }
541 
542   bool isAISrcB16() const {
543     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
544   }
545 
546   bool isAISrcV2B16() const {
547     return isAISrcB16();
548   }
549 
550   bool isAISrcF32() const {
551     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
552   }
553 
554   bool isAISrcF16() const {
555     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
556   }
557 
558   bool isAISrcV2F16() const {
559     return isAISrcF16() || isAISrcB32();
560   }
561 
562   bool isAISrc_128B32() const {
563     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
564   }
565 
566   bool isAISrc_128B16() const {
567     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
568   }
569 
570   bool isAISrc_128V2B16() const {
571     return isAISrc_128B16();
572   }
573 
574   bool isAISrc_128F32() const {
575     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
576   }
577 
578   bool isAISrc_128F16() const {
579     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
580   }
581 
582   bool isAISrc_128V2F16() const {
583     return isAISrc_128F16() || isAISrc_128B32();
584   }
585 
586   bool isAISrc_512B32() const {
587     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
588   }
589 
590   bool isAISrc_512B16() const {
591     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
592   }
593 
594   bool isAISrc_512V2B16() const {
595     return isAISrc_512B16();
596   }
597 
598   bool isAISrc_512F32() const {
599     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
600   }
601 
602   bool isAISrc_512F16() const {
603     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
604   }
605 
606   bool isAISrc_512V2F16() const {
607     return isAISrc_512F16() || isAISrc_512B32();
608   }
609 
610   bool isAISrc_1024B32() const {
611     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
612   }
613 
614   bool isAISrc_1024B16() const {
615     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
616   }
617 
618   bool isAISrc_1024V2B16() const {
619     return isAISrc_1024B16();
620   }
621 
622   bool isAISrc_1024F32() const {
623     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
624   }
625 
626   bool isAISrc_1024F16() const {
627     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
628   }
629 
630   bool isAISrc_1024V2F16() const {
631     return isAISrc_1024F16() || isAISrc_1024B32();
632   }
633 
634   bool isKImmFP32() const {
635     return isLiteralImm(MVT::f32);
636   }
637 
638   bool isKImmFP16() const {
639     return isLiteralImm(MVT::f16);
640   }
641 
642   bool isMem() const override {
643     return false;
644   }
645 
646   bool isExpr() const {
647     return Kind == Expression;
648   }
649 
650   bool isSoppBrTarget() const {
651     return isExpr() || isImm();
652   }
653 
654   bool isSWaitCnt() const;
655   bool isHwreg() const;
656   bool isSendMsg() const;
657   bool isSwizzle() const;
658   bool isSMRDOffset8() const;
659   bool isSMRDOffset20() const;
660   bool isSMRDLiteralOffset() const;
661   bool isDPP8() const;
662   bool isDPPCtrl() const;
663   bool isBLGP() const;
664   bool isCBSZ() const;
665   bool isABID() const;
666   bool isGPRIdxMode() const;
667   bool isS16Imm() const;
668   bool isU16Imm() const;
669   bool isEndpgm() const;
670 
671   StringRef getExpressionAsToken() const {
672     assert(isExpr());
673     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
674     return S->getSymbol().getName();
675   }
676 
677   StringRef getToken() const {
678     assert(isToken());
679 
680     if (Kind == Expression)
681       return getExpressionAsToken();
682 
683     return StringRef(Tok.Data, Tok.Length);
684   }
685 
686   int64_t getImm() const {
687     assert(isImm());
688     return Imm.Val;
689   }
690 
691   ImmTy getImmTy() const {
692     assert(isImm());
693     return Imm.Type;
694   }
695 
696   unsigned getReg() const override {
697     assert(isRegKind());
698     return Reg.RegNo;
699   }
700 
701   SMLoc getStartLoc() const override {
702     return StartLoc;
703   }
704 
705   SMLoc getEndLoc() const override {
706     return EndLoc;
707   }
708 
709   SMRange getLocRange() const {
710     return SMRange(StartLoc, EndLoc);
711   }
712 
713   Modifiers getModifiers() const {
714     assert(isRegKind() || isImmTy(ImmTyNone));
715     return isRegKind() ? Reg.Mods : Imm.Mods;
716   }
717 
718   void setModifiers(Modifiers Mods) {
719     assert(isRegKind() || isImmTy(ImmTyNone));
720     if (isRegKind())
721       Reg.Mods = Mods;
722     else
723       Imm.Mods = Mods;
724   }
725 
726   bool hasModifiers() const {
727     return getModifiers().hasModifiers();
728   }
729 
730   bool hasFPModifiers() const {
731     return getModifiers().hasFPModifiers();
732   }
733 
734   bool hasIntModifiers() const {
735     return getModifiers().hasIntModifiers();
736   }
737 
738   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
739 
740   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
741 
742   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
743 
744   template <unsigned Bitwidth>
745   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
746 
747   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
748     addKImmFPOperands<16>(Inst, N);
749   }
750 
751   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
752     addKImmFPOperands<32>(Inst, N);
753   }
754 
755   void addRegOperands(MCInst &Inst, unsigned N) const;
756 
757   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
758     addRegOperands(Inst, N);
759   }
760 
761   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
762     if (isRegKind())
763       addRegOperands(Inst, N);
764     else if (isExpr())
765       Inst.addOperand(MCOperand::createExpr(Expr));
766     else
767       addImmOperands(Inst, N);
768   }
769 
770   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
771     Modifiers Mods = getModifiers();
772     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
773     if (isRegKind()) {
774       addRegOperands(Inst, N);
775     } else {
776       addImmOperands(Inst, N, false);
777     }
778   }
779 
780   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
781     assert(!hasIntModifiers());
782     addRegOrImmWithInputModsOperands(Inst, N);
783   }
784 
785   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
786     assert(!hasFPModifiers());
787     addRegOrImmWithInputModsOperands(Inst, N);
788   }
789 
790   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
791     Modifiers Mods = getModifiers();
792     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
793     assert(isRegKind());
794     addRegOperands(Inst, N);
795   }
796 
797   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
798     assert(!hasIntModifiers());
799     addRegWithInputModsOperands(Inst, N);
800   }
801 
802   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
803     assert(!hasFPModifiers());
804     addRegWithInputModsOperands(Inst, N);
805   }
806 
807   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
808     if (isImm())
809       addImmOperands(Inst, N);
810     else {
811       assert(isExpr());
812       Inst.addOperand(MCOperand::createExpr(Expr));
813     }
814   }
815 
816   static void printImmTy(raw_ostream& OS, ImmTy Type) {
817     switch (Type) {
818     case ImmTyNone: OS << "None"; break;
819     case ImmTyGDS: OS << "GDS"; break;
820     case ImmTyLDS: OS << "LDS"; break;
821     case ImmTyOffen: OS << "Offen"; break;
822     case ImmTyIdxen: OS << "Idxen"; break;
823     case ImmTyAddr64: OS << "Addr64"; break;
824     case ImmTyOffset: OS << "Offset"; break;
825     case ImmTyInstOffset: OS << "InstOffset"; break;
826     case ImmTyOffset0: OS << "Offset0"; break;
827     case ImmTyOffset1: OS << "Offset1"; break;
828     case ImmTyDLC: OS << "DLC"; break;
829     case ImmTyGLC: OS << "GLC"; break;
830     case ImmTySLC: OS << "SLC"; break;
831     case ImmTySWZ: OS << "SWZ"; break;
832     case ImmTyTFE: OS << "TFE"; break;
833     case ImmTyD16: OS << "D16"; break;
834     case ImmTyFORMAT: OS << "FORMAT"; break;
835     case ImmTyClampSI: OS << "ClampSI"; break;
836     case ImmTyOModSI: OS << "OModSI"; break;
837     case ImmTyDPP8: OS << "DPP8"; break;
838     case ImmTyDppCtrl: OS << "DppCtrl"; break;
839     case ImmTyDppRowMask: OS << "DppRowMask"; break;
840     case ImmTyDppBankMask: OS << "DppBankMask"; break;
841     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
842     case ImmTyDppFi: OS << "FI"; break;
843     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
844     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
845     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
846     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
847     case ImmTyDMask: OS << "DMask"; break;
848     case ImmTyDim: OS << "Dim"; break;
849     case ImmTyUNorm: OS << "UNorm"; break;
850     case ImmTyDA: OS << "DA"; break;
851     case ImmTyR128A16: OS << "R128A16"; break;
852     case ImmTyA16: OS << "A16"; break;
853     case ImmTyLWE: OS << "LWE"; break;
854     case ImmTyOff: OS << "Off"; break;
855     case ImmTyExpTgt: OS << "ExpTgt"; break;
856     case ImmTyExpCompr: OS << "ExpCompr"; break;
857     case ImmTyExpVM: OS << "ExpVM"; break;
858     case ImmTyHwreg: OS << "Hwreg"; break;
859     case ImmTySendMsg: OS << "SendMsg"; break;
860     case ImmTyInterpSlot: OS << "InterpSlot"; break;
861     case ImmTyInterpAttr: OS << "InterpAttr"; break;
862     case ImmTyAttrChan: OS << "AttrChan"; break;
863     case ImmTyOpSel: OS << "OpSel"; break;
864     case ImmTyOpSelHi: OS << "OpSelHi"; break;
865     case ImmTyNegLo: OS << "NegLo"; break;
866     case ImmTyNegHi: OS << "NegHi"; break;
867     case ImmTySwizzle: OS << "Swizzle"; break;
868     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
869     case ImmTyHigh: OS << "High"; break;
870     case ImmTyBLGP: OS << "BLGP"; break;
871     case ImmTyCBSZ: OS << "CBSZ"; break;
872     case ImmTyABID: OS << "ABID"; break;
873     case ImmTyEndpgm: OS << "Endpgm"; break;
874     }
875   }
876 
877   void print(raw_ostream &OS) const override {
878     switch (Kind) {
879     case Register:
880       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
881       break;
882     case Immediate:
883       OS << '<' << getImm();
884       if (getImmTy() != ImmTyNone) {
885         OS << " type: "; printImmTy(OS, getImmTy());
886       }
887       OS << " mods: " << Imm.Mods << '>';
888       break;
889     case Token:
890       OS << '\'' << getToken() << '\'';
891       break;
892     case Expression:
893       OS << "<expr " << *Expr << '>';
894       break;
895     }
896   }
897 
898   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
899                                       int64_t Val, SMLoc Loc,
900                                       ImmTy Type = ImmTyNone,
901                                       bool IsFPImm = false) {
902     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
903     Op->Imm.Val = Val;
904     Op->Imm.IsFPImm = IsFPImm;
905     Op->Imm.Type = Type;
906     Op->Imm.Mods = Modifiers();
907     Op->StartLoc = Loc;
908     Op->EndLoc = Loc;
909     return Op;
910   }
911 
912   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
913                                         StringRef Str, SMLoc Loc,
914                                         bool HasExplicitEncodingSize = true) {
915     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
916     Res->Tok.Data = Str.data();
917     Res->Tok.Length = Str.size();
918     Res->StartLoc = Loc;
919     Res->EndLoc = Loc;
920     return Res;
921   }
922 
923   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
924                                       unsigned RegNo, SMLoc S,
925                                       SMLoc E) {
926     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
927     Op->Reg.RegNo = RegNo;
928     Op->Reg.Mods = Modifiers();
929     Op->StartLoc = S;
930     Op->EndLoc = E;
931     return Op;
932   }
933 
934   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
935                                        const class MCExpr *Expr, SMLoc S) {
936     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
937     Op->Expr = Expr;
938     Op->StartLoc = S;
939     Op->EndLoc = S;
940     return Op;
941   }
942 };
943 
944 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
945   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
946   return OS;
947 }
948 
949 //===----------------------------------------------------------------------===//
950 // AsmParser
951 //===----------------------------------------------------------------------===//
952 
953 // Holds info related to the current kernel, e.g. count of SGPRs used.
954 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
955 // .amdgpu_hsa_kernel or at EOF.
956 class KernelScopeInfo {
957   int SgprIndexUnusedMin = -1;
958   int VgprIndexUnusedMin = -1;
959   MCContext *Ctx = nullptr;
960 
961   void usesSgprAt(int i) {
962     if (i >= SgprIndexUnusedMin) {
963       SgprIndexUnusedMin = ++i;
964       if (Ctx) {
965         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
966         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
967       }
968     }
969   }
970 
971   void usesVgprAt(int i) {
972     if (i >= VgprIndexUnusedMin) {
973       VgprIndexUnusedMin = ++i;
974       if (Ctx) {
975         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
976         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
977       }
978     }
979   }
980 
981 public:
982   KernelScopeInfo() = default;
983 
984   void initialize(MCContext &Context) {
985     Ctx = &Context;
986     usesSgprAt(SgprIndexUnusedMin = -1);
987     usesVgprAt(VgprIndexUnusedMin = -1);
988   }
989 
990   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
991     switch (RegKind) {
992       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
993       case IS_AGPR: // fall through
994       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
995       default: break;
996     }
997   }
998 };
999 
1000 class AMDGPUAsmParser : public MCTargetAsmParser {
1001   MCAsmParser &Parser;
1002 
1003   // Number of extra operands parsed after the first optional operand.
1004   // This may be necessary to skip hardcoded mandatory operands.
1005   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1006 
1007   unsigned ForcedEncodingSize = 0;
1008   bool ForcedDPP = false;
1009   bool ForcedSDWA = false;
1010   KernelScopeInfo KernelScope;
1011 
1012   /// @name Auto-generated Match Functions
1013   /// {
1014 
1015 #define GET_ASSEMBLER_HEADER
1016 #include "AMDGPUGenAsmMatcher.inc"
1017 
1018   /// }
1019 
1020 private:
1021   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1022   bool OutOfRangeError(SMRange Range);
1023   /// Calculate VGPR/SGPR blocks required for given target, reserved
1024   /// registers, and user-specified NextFreeXGPR values.
1025   ///
1026   /// \param Features [in] Target features, used for bug corrections.
1027   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1028   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1029   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1030   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1031   /// descriptor field, if valid.
1032   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1033   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1034   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1035   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1036   /// \param VGPRBlocks [out] Result VGPR block count.
1037   /// \param SGPRBlocks [out] Result SGPR block count.
1038   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1039                           bool FlatScrUsed, bool XNACKUsed,
1040                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1041                           SMRange VGPRRange, unsigned NextFreeSGPR,
1042                           SMRange SGPRRange, unsigned &VGPRBlocks,
1043                           unsigned &SGPRBlocks);
1044   bool ParseDirectiveAMDGCNTarget();
1045   bool ParseDirectiveAMDHSAKernel();
1046   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1047   bool ParseDirectiveHSACodeObjectVersion();
1048   bool ParseDirectiveHSACodeObjectISA();
1049   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1050   bool ParseDirectiveAMDKernelCodeT();
1051   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1052   bool ParseDirectiveAMDGPUHsaKernel();
1053 
1054   bool ParseDirectiveISAVersion();
1055   bool ParseDirectiveHSAMetadata();
1056   bool ParseDirectivePALMetadataBegin();
1057   bool ParseDirectivePALMetadata();
1058   bool ParseDirectiveAMDGPULDS();
1059 
1060   /// Common code to parse out a block of text (typically YAML) between start and
1061   /// end directives.
1062   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1063                            const char *AssemblerDirectiveEnd,
1064                            std::string &CollectString);
1065 
1066   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1067                              RegisterKind RegKind, unsigned Reg1);
1068   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1069                            unsigned &RegNum, unsigned &RegWidth,
1070                            bool RestoreOnFailure = false);
1071   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1072                            unsigned &RegNum, unsigned &RegWidth,
1073                            SmallVectorImpl<AsmToken> &Tokens);
1074   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1075                            unsigned &RegWidth,
1076                            SmallVectorImpl<AsmToken> &Tokens);
1077   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1078                            unsigned &RegWidth,
1079                            SmallVectorImpl<AsmToken> &Tokens);
1080   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1081                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1082   bool ParseRegRange(unsigned& Num, unsigned& Width);
1083   unsigned getRegularReg(RegisterKind RegKind,
1084                          unsigned RegNum,
1085                          unsigned RegWidth);
1086 
1087   bool isRegister();
1088   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1089   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1090   void initializeGprCountSymbol(RegisterKind RegKind);
1091   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1092                              unsigned RegWidth);
1093   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1094                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1095   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1096                  bool IsGdsHardcoded);
1097 
1098 public:
1099   enum AMDGPUMatchResultTy {
1100     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1101   };
1102   enum OperandMode {
1103     OperandMode_Default,
1104     OperandMode_NSA,
1105   };
1106 
1107   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1108 
1109   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1110                const MCInstrInfo &MII,
1111                const MCTargetOptions &Options)
1112       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1113     MCAsmParserExtension::Initialize(Parser);
1114 
1115     if (getFeatureBits().none()) {
1116       // Set default features.
1117       copySTI().ToggleFeature("southern-islands");
1118     }
1119 
1120     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1121 
1122     {
1123       // TODO: make those pre-defined variables read-only.
1124       // Currently there is none suitable machinery in the core llvm-mc for this.
1125       // MCSymbol::isRedefinable is intended for another purpose, and
1126       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1127       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1128       MCContext &Ctx = getContext();
1129       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1130         MCSymbol *Sym =
1131             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1132         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1133         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1134         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1135         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1136         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1137       } else {
1138         MCSymbol *Sym =
1139             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1140         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1141         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1142         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1143         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1144         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1145       }
1146       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1147         initializeGprCountSymbol(IS_VGPR);
1148         initializeGprCountSymbol(IS_SGPR);
1149       } else
1150         KernelScope.initialize(getContext());
1151     }
1152   }
1153 
1154   bool hasXNACK() const {
1155     return AMDGPU::hasXNACK(getSTI());
1156   }
1157 
1158   bool hasMIMG_R128() const {
1159     return AMDGPU::hasMIMG_R128(getSTI());
1160   }
1161 
1162   bool hasPackedD16() const {
1163     return AMDGPU::hasPackedD16(getSTI());
1164   }
1165 
1166   bool hasGFX10A16() const {
1167     return AMDGPU::hasGFX10A16(getSTI());
1168   }
1169 
1170   bool isSI() const {
1171     return AMDGPU::isSI(getSTI());
1172   }
1173 
1174   bool isCI() const {
1175     return AMDGPU::isCI(getSTI());
1176   }
1177 
1178   bool isVI() const {
1179     return AMDGPU::isVI(getSTI());
1180   }
1181 
1182   bool isGFX9() const {
1183     return AMDGPU::isGFX9(getSTI());
1184   }
1185 
1186   bool isGFX10() const {
1187     return AMDGPU::isGFX10(getSTI());
1188   }
1189 
1190   bool hasInv2PiInlineImm() const {
1191     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1192   }
1193 
1194   bool hasFlatOffsets() const {
1195     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1196   }
1197 
1198   bool hasSGPR102_SGPR103() const {
1199     return !isVI() && !isGFX9();
1200   }
1201 
1202   bool hasSGPR104_SGPR105() const {
1203     return isGFX10();
1204   }
1205 
1206   bool hasIntClamp() const {
1207     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1208   }
1209 
1210   AMDGPUTargetStreamer &getTargetStreamer() {
1211     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1212     return static_cast<AMDGPUTargetStreamer &>(TS);
1213   }
1214 
1215   const MCRegisterInfo *getMRI() const {
1216     // We need this const_cast because for some reason getContext() is not const
1217     // in MCAsmParser.
1218     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1219   }
1220 
1221   const MCInstrInfo *getMII() const {
1222     return &MII;
1223   }
1224 
1225   const FeatureBitset &getFeatureBits() const {
1226     return getSTI().getFeatureBits();
1227   }
1228 
1229   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1230   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1231   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1232 
1233   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1234   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1235   bool isForcedDPP() const { return ForcedDPP; }
1236   bool isForcedSDWA() const { return ForcedSDWA; }
1237   ArrayRef<unsigned> getMatchedVariants() const;
1238 
1239   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1240   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1241                      bool RestoreOnFailure);
1242   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1243   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1244                                         SMLoc &EndLoc) override;
1245   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1246   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1247                                       unsigned Kind) override;
1248   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1249                                OperandVector &Operands, MCStreamer &Out,
1250                                uint64_t &ErrorInfo,
1251                                bool MatchingInlineAsm) override;
1252   bool ParseDirective(AsmToken DirectiveID) override;
1253   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1254                                     OperandMode Mode = OperandMode_Default);
1255   StringRef parseMnemonicSuffix(StringRef Name);
1256   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1257                         SMLoc NameLoc, OperandVector &Operands) override;
1258   //bool ProcessInstruction(MCInst &Inst);
1259 
1260   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1261 
1262   OperandMatchResultTy
1263   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1264                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1265                      bool (*ConvertResult)(int64_t &) = nullptr);
1266 
1267   OperandMatchResultTy
1268   parseOperandArrayWithPrefix(const char *Prefix,
1269                               OperandVector &Operands,
1270                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1271                               bool (*ConvertResult)(int64_t&) = nullptr);
1272 
1273   OperandMatchResultTy
1274   parseNamedBit(const char *Name, OperandVector &Operands,
1275                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1276   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1277                                              StringRef &Value);
1278 
1279   bool isModifier();
1280   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1281   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1282   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1283   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1284   bool parseSP3NegModifier();
1285   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1286   OperandMatchResultTy parseReg(OperandVector &Operands);
1287   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1288   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1289   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1290   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1291   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1292   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1293   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1294 
1295   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1296   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1297   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1298   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1299 
1300   bool parseCnt(int64_t &IntVal);
1301   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1302   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1303 
1304 private:
1305   struct OperandInfoTy {
1306     int64_t Id;
1307     bool IsSymbolic = false;
1308     bool IsDefined = false;
1309 
1310     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1311   };
1312 
1313   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1314   bool validateSendMsg(const OperandInfoTy &Msg,
1315                        const OperandInfoTy &Op,
1316                        const OperandInfoTy &Stream,
1317                        const SMLoc Loc);
1318 
1319   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1320   bool validateHwreg(const OperandInfoTy &HwReg,
1321                      const int64_t Offset,
1322                      const int64_t Width,
1323                      const SMLoc Loc);
1324 
1325   void errorExpTgt();
1326   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1327   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1328 
1329   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1330   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1331   bool validateSOPLiteral(const MCInst &Inst) const;
1332   bool validateConstantBusLimitations(const MCInst &Inst);
1333   bool validateEarlyClobberLimitations(const MCInst &Inst);
1334   bool validateIntClampSupported(const MCInst &Inst);
1335   bool validateMIMGAtomicDMask(const MCInst &Inst);
1336   bool validateMIMGGatherDMask(const MCInst &Inst);
1337   bool validateMovrels(const MCInst &Inst);
1338   bool validateMIMGDataSize(const MCInst &Inst);
1339   bool validateMIMGAddrSize(const MCInst &Inst);
1340   bool validateMIMGD16(const MCInst &Inst);
1341   bool validateMIMGDim(const MCInst &Inst);
1342   bool validateLdsDirect(const MCInst &Inst);
1343   bool validateOpSel(const MCInst &Inst);
1344   bool validateVccOperand(unsigned Reg) const;
1345   bool validateVOP3Literal(const MCInst &Inst) const;
1346   unsigned getConstantBusLimit(unsigned Opcode) const;
1347   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1348   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1349   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1350 
1351   bool isId(const StringRef Id) const;
1352   bool isId(const AsmToken &Token, const StringRef Id) const;
1353   bool isToken(const AsmToken::TokenKind Kind) const;
1354   bool trySkipId(const StringRef Id);
1355   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1356   bool trySkipToken(const AsmToken::TokenKind Kind);
1357   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1358   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1359   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1360   AsmToken::TokenKind getTokenKind() const;
1361   bool parseExpr(int64_t &Imm);
1362   bool parseExpr(OperandVector &Operands);
1363   StringRef getTokenStr() const;
1364   AsmToken peekToken();
1365   AsmToken getToken() const;
1366   SMLoc getLoc() const;
1367   void lex();
1368 
1369 public:
1370   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1371   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1372 
1373   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1374   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1375   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1376   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1377   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1378   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1379 
1380   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1381                             const unsigned MinVal,
1382                             const unsigned MaxVal,
1383                             const StringRef ErrMsg);
1384   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1385   bool parseSwizzleOffset(int64_t &Imm);
1386   bool parseSwizzleMacro(int64_t &Imm);
1387   bool parseSwizzleQuadPerm(int64_t &Imm);
1388   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1389   bool parseSwizzleBroadcast(int64_t &Imm);
1390   bool parseSwizzleSwap(int64_t &Imm);
1391   bool parseSwizzleReverse(int64_t &Imm);
1392 
1393   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1394   int64_t parseGPRIdxMacro();
1395 
1396   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1397   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1398   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1399   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1400   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1401 
1402   AMDGPUOperand::Ptr defaultDLC() const;
1403   AMDGPUOperand::Ptr defaultGLC() const;
1404   AMDGPUOperand::Ptr defaultSLC() const;
1405 
1406   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1407   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1408   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1409   AMDGPUOperand::Ptr defaultFlatOffset() const;
1410 
1411   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1412 
1413   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1414                OptionalImmIndexMap &OptionalIdx);
1415   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1416   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1417   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1418 
1419   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1420 
1421   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1422                bool IsAtomic = false);
1423   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1424 
1425   OperandMatchResultTy parseDim(OperandVector &Operands);
1426   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1427   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1428   AMDGPUOperand::Ptr defaultRowMask() const;
1429   AMDGPUOperand::Ptr defaultBankMask() const;
1430   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1431   AMDGPUOperand::Ptr defaultFI() const;
1432   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1433   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1434 
1435   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1436                                     AMDGPUOperand::ImmTy Type);
1437   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1438   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1439   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1440   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1441   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1442   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1443   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1444                uint64_t BasicInstType,
1445                bool SkipDstVcc = false,
1446                bool SkipSrcVcc = false);
1447 
1448   AMDGPUOperand::Ptr defaultBLGP() const;
1449   AMDGPUOperand::Ptr defaultCBSZ() const;
1450   AMDGPUOperand::Ptr defaultABID() const;
1451 
1452   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1453   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1454 };
1455 
1456 struct OptionalOperand {
1457   const char *Name;
1458   AMDGPUOperand::ImmTy Type;
1459   bool IsBit;
1460   bool (*ConvertResult)(int64_t&);
1461 };
1462 
1463 } // end anonymous namespace
1464 
1465 // May be called with integer type with equivalent bitwidth.
1466 static const fltSemantics *getFltSemantics(unsigned Size) {
1467   switch (Size) {
1468   case 4:
1469     return &APFloat::IEEEsingle();
1470   case 8:
1471     return &APFloat::IEEEdouble();
1472   case 2:
1473     return &APFloat::IEEEhalf();
1474   default:
1475     llvm_unreachable("unsupported fp type");
1476   }
1477 }
1478 
1479 static const fltSemantics *getFltSemantics(MVT VT) {
1480   return getFltSemantics(VT.getSizeInBits() / 8);
1481 }
1482 
1483 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1484   switch (OperandType) {
1485   case AMDGPU::OPERAND_REG_IMM_INT32:
1486   case AMDGPU::OPERAND_REG_IMM_FP32:
1487   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1488   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1489   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1490   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1491     return &APFloat::IEEEsingle();
1492   case AMDGPU::OPERAND_REG_IMM_INT64:
1493   case AMDGPU::OPERAND_REG_IMM_FP64:
1494   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1495   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1496     return &APFloat::IEEEdouble();
1497   case AMDGPU::OPERAND_REG_IMM_INT16:
1498   case AMDGPU::OPERAND_REG_IMM_FP16:
1499   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1500   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1501   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1502   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1503   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1504   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1505   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1506   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1507   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1508   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1509     return &APFloat::IEEEhalf();
1510   default:
1511     llvm_unreachable("unsupported fp type");
1512   }
1513 }
1514 
1515 //===----------------------------------------------------------------------===//
1516 // Operand
1517 //===----------------------------------------------------------------------===//
1518 
1519 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1520   bool Lost;
1521 
1522   // Convert literal to single precision
1523   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1524                                                APFloat::rmNearestTiesToEven,
1525                                                &Lost);
1526   // We allow precision lost but not overflow or underflow
1527   if (Status != APFloat::opOK &&
1528       Lost &&
1529       ((Status & APFloat::opOverflow)  != 0 ||
1530        (Status & APFloat::opUnderflow) != 0)) {
1531     return false;
1532   }
1533 
1534   return true;
1535 }
1536 
1537 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1538   return isUIntN(Size, Val) || isIntN(Size, Val);
1539 }
1540 
1541 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1542 
1543   // This is a hack to enable named inline values like
1544   // shared_base with both 32-bit and 64-bit operands.
1545   // Note that these values are defined as
1546   // 32-bit operands only.
1547   if (isInlineValue()) {
1548     return true;
1549   }
1550 
1551   if (!isImmTy(ImmTyNone)) {
1552     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1553     return false;
1554   }
1555   // TODO: We should avoid using host float here. It would be better to
1556   // check the float bit values which is what a few other places do.
1557   // We've had bot failures before due to weird NaN support on mips hosts.
1558 
1559   APInt Literal(64, Imm.Val);
1560 
1561   if (Imm.IsFPImm) { // We got fp literal token
1562     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1563       return AMDGPU::isInlinableLiteral64(Imm.Val,
1564                                           AsmParser->hasInv2PiInlineImm());
1565     }
1566 
1567     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1568     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1569       return false;
1570 
1571     if (type.getScalarSizeInBits() == 16) {
1572       return AMDGPU::isInlinableLiteral16(
1573         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1574         AsmParser->hasInv2PiInlineImm());
1575     }
1576 
1577     // Check if single precision literal is inlinable
1578     return AMDGPU::isInlinableLiteral32(
1579       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1580       AsmParser->hasInv2PiInlineImm());
1581   }
1582 
1583   // We got int literal token.
1584   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1585     return AMDGPU::isInlinableLiteral64(Imm.Val,
1586                                         AsmParser->hasInv2PiInlineImm());
1587   }
1588 
1589   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1590     return false;
1591   }
1592 
1593   if (type.getScalarSizeInBits() == 16) {
1594     return AMDGPU::isInlinableLiteral16(
1595       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1596       AsmParser->hasInv2PiInlineImm());
1597   }
1598 
1599   return AMDGPU::isInlinableLiteral32(
1600     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1601     AsmParser->hasInv2PiInlineImm());
1602 }
1603 
1604 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1605   // Check that this immediate can be added as literal
1606   if (!isImmTy(ImmTyNone)) {
1607     return false;
1608   }
1609 
1610   if (!Imm.IsFPImm) {
1611     // We got int literal token.
1612 
1613     if (type == MVT::f64 && hasFPModifiers()) {
1614       // Cannot apply fp modifiers to int literals preserving the same semantics
1615       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1616       // disable these cases.
1617       return false;
1618     }
1619 
1620     unsigned Size = type.getSizeInBits();
1621     if (Size == 64)
1622       Size = 32;
1623 
1624     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1625     // types.
1626     return isSafeTruncation(Imm.Val, Size);
1627   }
1628 
1629   // We got fp literal token
1630   if (type == MVT::f64) { // Expected 64-bit fp operand
1631     // We would set low 64-bits of literal to zeroes but we accept this literals
1632     return true;
1633   }
1634 
1635   if (type == MVT::i64) { // Expected 64-bit int operand
1636     // We don't allow fp literals in 64-bit integer instructions. It is
1637     // unclear how we should encode them.
1638     return false;
1639   }
1640 
1641   // We allow fp literals with f16x2 operands assuming that the specified
1642   // literal goes into the lower half and the upper half is zero. We also
1643   // require that the literal may be losslesly converted to f16.
1644   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1645                      (type == MVT::v2i16)? MVT::i16 : type;
1646 
1647   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1648   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1649 }
1650 
1651 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1652   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1653 }
1654 
1655 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1656   if (AsmParser->isVI())
1657     return isVReg32();
1658   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1659     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1660   else
1661     return false;
1662 }
1663 
1664 bool AMDGPUOperand::isSDWAFP16Operand() const {
1665   return isSDWAOperand(MVT::f16);
1666 }
1667 
1668 bool AMDGPUOperand::isSDWAFP32Operand() const {
1669   return isSDWAOperand(MVT::f32);
1670 }
1671 
1672 bool AMDGPUOperand::isSDWAInt16Operand() const {
1673   return isSDWAOperand(MVT::i16);
1674 }
1675 
1676 bool AMDGPUOperand::isSDWAInt32Operand() const {
1677   return isSDWAOperand(MVT::i32);
1678 }
1679 
1680 bool AMDGPUOperand::isBoolReg() const {
1681   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1682          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1683 }
1684 
1685 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1686 {
1687   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1688   assert(Size == 2 || Size == 4 || Size == 8);
1689 
1690   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1691 
1692   if (Imm.Mods.Abs) {
1693     Val &= ~FpSignMask;
1694   }
1695   if (Imm.Mods.Neg) {
1696     Val ^= FpSignMask;
1697   }
1698 
1699   return Val;
1700 }
1701 
1702 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1703   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1704                              Inst.getNumOperands())) {
1705     addLiteralImmOperand(Inst, Imm.Val,
1706                          ApplyModifiers &
1707                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1708   } else {
1709     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1710     Inst.addOperand(MCOperand::createImm(Imm.Val));
1711   }
1712 }
1713 
1714 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1715   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1716   auto OpNum = Inst.getNumOperands();
1717   // Check that this operand accepts literals
1718   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1719 
1720   if (ApplyModifiers) {
1721     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1722     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1723     Val = applyInputFPModifiers(Val, Size);
1724   }
1725 
1726   APInt Literal(64, Val);
1727   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1728 
1729   if (Imm.IsFPImm) { // We got fp literal token
1730     switch (OpTy) {
1731     case AMDGPU::OPERAND_REG_IMM_INT64:
1732     case AMDGPU::OPERAND_REG_IMM_FP64:
1733     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1734     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1735       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1736                                        AsmParser->hasInv2PiInlineImm())) {
1737         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1738         return;
1739       }
1740 
1741       // Non-inlineable
1742       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1743         // For fp operands we check if low 32 bits are zeros
1744         if (Literal.getLoBits(32) != 0) {
1745           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1746           "Can't encode literal as exact 64-bit floating-point operand. "
1747           "Low 32-bits will be set to zero");
1748         }
1749 
1750         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1751         return;
1752       }
1753 
1754       // We don't allow fp literals in 64-bit integer instructions. It is
1755       // unclear how we should encode them. This case should be checked earlier
1756       // in predicate methods (isLiteralImm())
1757       llvm_unreachable("fp literal in 64-bit integer instruction.");
1758 
1759     case AMDGPU::OPERAND_REG_IMM_INT32:
1760     case AMDGPU::OPERAND_REG_IMM_FP32:
1761     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1762     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1763     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1764     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1765     case AMDGPU::OPERAND_REG_IMM_INT16:
1766     case AMDGPU::OPERAND_REG_IMM_FP16:
1767     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1768     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1769     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1770     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1771     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1772     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1773     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1774     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1775     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1776     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1777       bool lost;
1778       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1779       // Convert literal to single precision
1780       FPLiteral.convert(*getOpFltSemantics(OpTy),
1781                         APFloat::rmNearestTiesToEven, &lost);
1782       // We allow precision lost but not overflow or underflow. This should be
1783       // checked earlier in isLiteralImm()
1784 
1785       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1786       Inst.addOperand(MCOperand::createImm(ImmVal));
1787       return;
1788     }
1789     default:
1790       llvm_unreachable("invalid operand size");
1791     }
1792 
1793     return;
1794   }
1795 
1796   // We got int literal token.
1797   // Only sign extend inline immediates.
1798   switch (OpTy) {
1799   case AMDGPU::OPERAND_REG_IMM_INT32:
1800   case AMDGPU::OPERAND_REG_IMM_FP32:
1801   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1802   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1803   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1804   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1805   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1806   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1807     if (isSafeTruncation(Val, 32) &&
1808         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1809                                      AsmParser->hasInv2PiInlineImm())) {
1810       Inst.addOperand(MCOperand::createImm(Val));
1811       return;
1812     }
1813 
1814     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1815     return;
1816 
1817   case AMDGPU::OPERAND_REG_IMM_INT64:
1818   case AMDGPU::OPERAND_REG_IMM_FP64:
1819   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1820   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1821     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1822       Inst.addOperand(MCOperand::createImm(Val));
1823       return;
1824     }
1825 
1826     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1827     return;
1828 
1829   case AMDGPU::OPERAND_REG_IMM_INT16:
1830   case AMDGPU::OPERAND_REG_IMM_FP16:
1831   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1832   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1833   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1834   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1835     if (isSafeTruncation(Val, 16) &&
1836         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1837                                      AsmParser->hasInv2PiInlineImm())) {
1838       Inst.addOperand(MCOperand::createImm(Val));
1839       return;
1840     }
1841 
1842     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1843     return;
1844 
1845   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1846   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1847   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1848   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1849     assert(isSafeTruncation(Val, 16));
1850     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1851                                         AsmParser->hasInv2PiInlineImm()));
1852 
1853     Inst.addOperand(MCOperand::createImm(Val));
1854     return;
1855   }
1856   default:
1857     llvm_unreachable("invalid operand size");
1858   }
1859 }
1860 
1861 template <unsigned Bitwidth>
1862 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1863   APInt Literal(64, Imm.Val);
1864 
1865   if (!Imm.IsFPImm) {
1866     // We got int literal token.
1867     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1868     return;
1869   }
1870 
1871   bool Lost;
1872   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1873   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1874                     APFloat::rmNearestTiesToEven, &Lost);
1875   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1876 }
1877 
1878 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1879   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1880 }
1881 
1882 static bool isInlineValue(unsigned Reg) {
1883   switch (Reg) {
1884   case AMDGPU::SRC_SHARED_BASE:
1885   case AMDGPU::SRC_SHARED_LIMIT:
1886   case AMDGPU::SRC_PRIVATE_BASE:
1887   case AMDGPU::SRC_PRIVATE_LIMIT:
1888   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1889     return true;
1890   case AMDGPU::SRC_VCCZ:
1891   case AMDGPU::SRC_EXECZ:
1892   case AMDGPU::SRC_SCC:
1893     return true;
1894   case AMDGPU::SGPR_NULL:
1895     return true;
1896   default:
1897     return false;
1898   }
1899 }
1900 
1901 bool AMDGPUOperand::isInlineValue() const {
1902   return isRegKind() && ::isInlineValue(getReg());
1903 }
1904 
1905 //===----------------------------------------------------------------------===//
1906 // AsmParser
1907 //===----------------------------------------------------------------------===//
1908 
1909 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1910   if (Is == IS_VGPR) {
1911     switch (RegWidth) {
1912       default: return -1;
1913       case 1: return AMDGPU::VGPR_32RegClassID;
1914       case 2: return AMDGPU::VReg_64RegClassID;
1915       case 3: return AMDGPU::VReg_96RegClassID;
1916       case 4: return AMDGPU::VReg_128RegClassID;
1917       case 5: return AMDGPU::VReg_160RegClassID;
1918       case 8: return AMDGPU::VReg_256RegClassID;
1919       case 16: return AMDGPU::VReg_512RegClassID;
1920       case 32: return AMDGPU::VReg_1024RegClassID;
1921     }
1922   } else if (Is == IS_TTMP) {
1923     switch (RegWidth) {
1924       default: return -1;
1925       case 1: return AMDGPU::TTMP_32RegClassID;
1926       case 2: return AMDGPU::TTMP_64RegClassID;
1927       case 4: return AMDGPU::TTMP_128RegClassID;
1928       case 8: return AMDGPU::TTMP_256RegClassID;
1929       case 16: return AMDGPU::TTMP_512RegClassID;
1930     }
1931   } else if (Is == IS_SGPR) {
1932     switch (RegWidth) {
1933       default: return -1;
1934       case 1: return AMDGPU::SGPR_32RegClassID;
1935       case 2: return AMDGPU::SGPR_64RegClassID;
1936       case 4: return AMDGPU::SGPR_128RegClassID;
1937       case 8: return AMDGPU::SGPR_256RegClassID;
1938       case 16: return AMDGPU::SGPR_512RegClassID;
1939     }
1940   } else if (Is == IS_AGPR) {
1941     switch (RegWidth) {
1942       default: return -1;
1943       case 1: return AMDGPU::AGPR_32RegClassID;
1944       case 2: return AMDGPU::AReg_64RegClassID;
1945       case 4: return AMDGPU::AReg_128RegClassID;
1946       case 16: return AMDGPU::AReg_512RegClassID;
1947       case 32: return AMDGPU::AReg_1024RegClassID;
1948     }
1949   }
1950   return -1;
1951 }
1952 
1953 static unsigned getSpecialRegForName(StringRef RegName) {
1954   return StringSwitch<unsigned>(RegName)
1955     .Case("exec", AMDGPU::EXEC)
1956     .Case("vcc", AMDGPU::VCC)
1957     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1958     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1959     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1960     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1961     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1962     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1963     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1964     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1965     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1966     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1967     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1968     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1969     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1970     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1971     .Case("m0", AMDGPU::M0)
1972     .Case("vccz", AMDGPU::SRC_VCCZ)
1973     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1974     .Case("execz", AMDGPU::SRC_EXECZ)
1975     .Case("src_execz", AMDGPU::SRC_EXECZ)
1976     .Case("scc", AMDGPU::SRC_SCC)
1977     .Case("src_scc", AMDGPU::SRC_SCC)
1978     .Case("tba", AMDGPU::TBA)
1979     .Case("tma", AMDGPU::TMA)
1980     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1981     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1982     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1983     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1984     .Case("vcc_lo", AMDGPU::VCC_LO)
1985     .Case("vcc_hi", AMDGPU::VCC_HI)
1986     .Case("exec_lo", AMDGPU::EXEC_LO)
1987     .Case("exec_hi", AMDGPU::EXEC_HI)
1988     .Case("tma_lo", AMDGPU::TMA_LO)
1989     .Case("tma_hi", AMDGPU::TMA_HI)
1990     .Case("tba_lo", AMDGPU::TBA_LO)
1991     .Case("tba_hi", AMDGPU::TBA_HI)
1992     .Case("null", AMDGPU::SGPR_NULL)
1993     .Default(AMDGPU::NoRegister);
1994 }
1995 
1996 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1997                                     SMLoc &EndLoc, bool RestoreOnFailure) {
1998   auto R = parseRegister();
1999   if (!R) return true;
2000   assert(R->isReg());
2001   RegNo = R->getReg();
2002   StartLoc = R->getStartLoc();
2003   EndLoc = R->getEndLoc();
2004   return false;
2005 }
2006 
2007 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2008                                     SMLoc &EndLoc) {
2009   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2010 }
2011 
2012 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2013                                                        SMLoc &StartLoc,
2014                                                        SMLoc &EndLoc) {
2015   bool Result =
2016       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2017   bool PendingErrors = getParser().hasPendingError();
2018   getParser().clearPendingErrors();
2019   if (PendingErrors)
2020     return MatchOperand_ParseFail;
2021   if (Result)
2022     return MatchOperand_NoMatch;
2023   return MatchOperand_Success;
2024 }
2025 
2026 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2027                                             RegisterKind RegKind, unsigned Reg1) {
2028   switch (RegKind) {
2029   case IS_SPECIAL:
2030     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2031       Reg = AMDGPU::EXEC;
2032       RegWidth = 2;
2033       return true;
2034     }
2035     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2036       Reg = AMDGPU::FLAT_SCR;
2037       RegWidth = 2;
2038       return true;
2039     }
2040     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2041       Reg = AMDGPU::XNACK_MASK;
2042       RegWidth = 2;
2043       return true;
2044     }
2045     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2046       Reg = AMDGPU::VCC;
2047       RegWidth = 2;
2048       return true;
2049     }
2050     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2051       Reg = AMDGPU::TBA;
2052       RegWidth = 2;
2053       return true;
2054     }
2055     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2056       Reg = AMDGPU::TMA;
2057       RegWidth = 2;
2058       return true;
2059     }
2060     return false;
2061   case IS_VGPR:
2062   case IS_SGPR:
2063   case IS_AGPR:
2064   case IS_TTMP:
2065     if (Reg1 != Reg + RegWidth) {
2066       return false;
2067     }
2068     RegWidth++;
2069     return true;
2070   default:
2071     llvm_unreachable("unexpected register kind");
2072   }
2073 }
2074 
2075 struct RegInfo {
2076   StringLiteral Name;
2077   RegisterKind Kind;
2078 };
2079 
2080 static constexpr RegInfo RegularRegisters[] = {
2081   {{"v"},    IS_VGPR},
2082   {{"s"},    IS_SGPR},
2083   {{"ttmp"}, IS_TTMP},
2084   {{"acc"},  IS_AGPR},
2085   {{"a"},    IS_AGPR},
2086 };
2087 
2088 static bool isRegularReg(RegisterKind Kind) {
2089   return Kind == IS_VGPR ||
2090          Kind == IS_SGPR ||
2091          Kind == IS_TTMP ||
2092          Kind == IS_AGPR;
2093 }
2094 
2095 static const RegInfo* getRegularRegInfo(StringRef Str) {
2096   for (const RegInfo &Reg : RegularRegisters)
2097     if (Str.startswith(Reg.Name))
2098       return &Reg;
2099   return nullptr;
2100 }
2101 
2102 static bool getRegNum(StringRef Str, unsigned& Num) {
2103   return !Str.getAsInteger(10, Num);
2104 }
2105 
2106 bool
2107 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2108                             const AsmToken &NextToken) const {
2109 
2110   // A list of consecutive registers: [s0,s1,s2,s3]
2111   if (Token.is(AsmToken::LBrac))
2112     return true;
2113 
2114   if (!Token.is(AsmToken::Identifier))
2115     return false;
2116 
2117   // A single register like s0 or a range of registers like s[0:1]
2118 
2119   StringRef Str = Token.getString();
2120   const RegInfo *Reg = getRegularRegInfo(Str);
2121   if (Reg) {
2122     StringRef RegName = Reg->Name;
2123     StringRef RegSuffix = Str.substr(RegName.size());
2124     if (!RegSuffix.empty()) {
2125       unsigned Num;
2126       // A single register with an index: rXX
2127       if (getRegNum(RegSuffix, Num))
2128         return true;
2129     } else {
2130       // A range of registers: r[XX:YY].
2131       if (NextToken.is(AsmToken::LBrac))
2132         return true;
2133     }
2134   }
2135 
2136   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2137 }
2138 
2139 bool
2140 AMDGPUAsmParser::isRegister()
2141 {
2142   return isRegister(getToken(), peekToken());
2143 }
2144 
2145 unsigned
2146 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2147                                unsigned RegNum,
2148                                unsigned RegWidth) {
2149 
2150   assert(isRegularReg(RegKind));
2151 
2152   unsigned AlignSize = 1;
2153   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2154     // SGPR and TTMP registers must be aligned.
2155     // Max required alignment is 4 dwords.
2156     AlignSize = std::min(RegWidth, 4u);
2157   }
2158 
2159   if (RegNum % AlignSize != 0)
2160     return AMDGPU::NoRegister;
2161 
2162   unsigned RegIdx = RegNum / AlignSize;
2163   int RCID = getRegClass(RegKind, RegWidth);
2164   if (RCID == -1)
2165     return AMDGPU::NoRegister;
2166 
2167   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2168   const MCRegisterClass RC = TRI->getRegClass(RCID);
2169   if (RegIdx >= RC.getNumRegs())
2170     return AMDGPU::NoRegister;
2171 
2172   return RC.getRegister(RegIdx);
2173 }
2174 
2175 bool
2176 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2177   int64_t RegLo, RegHi;
2178   if (!trySkipToken(AsmToken::LBrac))
2179     return false;
2180 
2181   if (!parseExpr(RegLo))
2182     return false;
2183 
2184   if (trySkipToken(AsmToken::Colon)) {
2185     if (!parseExpr(RegHi))
2186       return false;
2187   } else {
2188     RegHi = RegLo;
2189   }
2190 
2191   if (!trySkipToken(AsmToken::RBrac))
2192     return false;
2193 
2194   if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi)
2195     return false;
2196 
2197   Num = static_cast<unsigned>(RegLo);
2198   Width = (RegHi - RegLo) + 1;
2199   return true;
2200 }
2201 
2202 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2203                                           unsigned &RegNum, unsigned &RegWidth,
2204                                           SmallVectorImpl<AsmToken> &Tokens) {
2205   assert(isToken(AsmToken::Identifier));
2206   unsigned Reg = getSpecialRegForName(getTokenStr());
2207   if (Reg) {
2208     RegNum = 0;
2209     RegWidth = 1;
2210     RegKind = IS_SPECIAL;
2211     Tokens.push_back(getToken());
2212     lex(); // skip register name
2213   }
2214   return Reg;
2215 }
2216 
2217 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2218                                           unsigned &RegNum, unsigned &RegWidth,
2219                                           SmallVectorImpl<AsmToken> &Tokens) {
2220   assert(isToken(AsmToken::Identifier));
2221   StringRef RegName = getTokenStr();
2222 
2223   const RegInfo *RI = getRegularRegInfo(RegName);
2224   if (!RI)
2225     return AMDGPU::NoRegister;
2226   Tokens.push_back(getToken());
2227   lex(); // skip register name
2228 
2229   RegKind = RI->Kind;
2230   StringRef RegSuffix = RegName.substr(RI->Name.size());
2231   if (!RegSuffix.empty()) {
2232     // Single 32-bit register: vXX.
2233     if (!getRegNum(RegSuffix, RegNum))
2234       return AMDGPU::NoRegister;
2235     RegWidth = 1;
2236   } else {
2237     // Range of registers: v[XX:YY]. ":YY" is optional.
2238     if (!ParseRegRange(RegNum, RegWidth))
2239       return AMDGPU::NoRegister;
2240   }
2241 
2242   return getRegularReg(RegKind, RegNum, RegWidth);
2243 }
2244 
2245 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2246                                        unsigned &RegWidth,
2247                                        SmallVectorImpl<AsmToken> &Tokens) {
2248   unsigned Reg = AMDGPU::NoRegister;
2249 
2250   if (!trySkipToken(AsmToken::LBrac))
2251     return AMDGPU::NoRegister;
2252 
2253   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2254 
2255   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2256     return AMDGPU::NoRegister;
2257   if (RegWidth != 1)
2258     return AMDGPU::NoRegister;
2259 
2260   for (; trySkipToken(AsmToken::Comma); ) {
2261     RegisterKind NextRegKind;
2262     unsigned NextReg, NextRegNum, NextRegWidth;
2263 
2264     if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth,
2265                              Tokens))
2266       return AMDGPU::NoRegister;
2267     if (NextRegWidth != 1)
2268       return AMDGPU::NoRegister;
2269     if (NextRegKind != RegKind)
2270       return AMDGPU::NoRegister;
2271     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg))
2272       return AMDGPU::NoRegister;
2273   }
2274 
2275   if (!trySkipToken(AsmToken::RBrac))
2276     return AMDGPU::NoRegister;
2277 
2278   if (isRegularReg(RegKind))
2279     Reg = getRegularReg(RegKind, RegNum, RegWidth);
2280 
2281   return Reg;
2282 }
2283 
2284 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2285                                           unsigned &RegNum, unsigned &RegWidth,
2286                                           SmallVectorImpl<AsmToken> &Tokens) {
2287   Reg = AMDGPU::NoRegister;
2288 
2289   if (isToken(AsmToken::Identifier)) {
2290     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2291     if (Reg == AMDGPU::NoRegister)
2292       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2293   } else {
2294     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2295   }
2296 
2297   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2298   return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg);
2299 }
2300 
2301 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2302                                           unsigned &RegNum, unsigned &RegWidth,
2303                                           bool RestoreOnFailure) {
2304   Reg = AMDGPU::NoRegister;
2305 
2306   SmallVector<AsmToken, 1> Tokens;
2307   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2308     if (RestoreOnFailure) {
2309       while (!Tokens.empty()) {
2310         getLexer().UnLex(Tokens.pop_back_val());
2311       }
2312     }
2313     return true;
2314   }
2315   return false;
2316 }
2317 
2318 Optional<StringRef>
2319 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2320   switch (RegKind) {
2321   case IS_VGPR:
2322     return StringRef(".amdgcn.next_free_vgpr");
2323   case IS_SGPR:
2324     return StringRef(".amdgcn.next_free_sgpr");
2325   default:
2326     return None;
2327   }
2328 }
2329 
2330 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2331   auto SymbolName = getGprCountSymbolName(RegKind);
2332   assert(SymbolName && "initializing invalid register kind");
2333   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2334   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2335 }
2336 
2337 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2338                                             unsigned DwordRegIndex,
2339                                             unsigned RegWidth) {
2340   // Symbols are only defined for GCN targets
2341   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2342     return true;
2343 
2344   auto SymbolName = getGprCountSymbolName(RegKind);
2345   if (!SymbolName)
2346     return true;
2347   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2348 
2349   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2350   int64_t OldCount;
2351 
2352   if (!Sym->isVariable())
2353     return !Error(getParser().getTok().getLoc(),
2354                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2355   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2356     return !Error(
2357         getParser().getTok().getLoc(),
2358         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2359 
2360   if (OldCount <= NewMax)
2361     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2362 
2363   return true;
2364 }
2365 
2366 std::unique_ptr<AMDGPUOperand>
2367 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2368   const auto &Tok = Parser.getTok();
2369   SMLoc StartLoc = Tok.getLoc();
2370   SMLoc EndLoc = Tok.getEndLoc();
2371   RegisterKind RegKind;
2372   unsigned Reg, RegNum, RegWidth;
2373 
2374   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2375     //FIXME: improve error messages (bug 41303).
2376     Error(StartLoc, "not a valid operand.");
2377     return nullptr;
2378   }
2379   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2380     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2381       return nullptr;
2382   } else
2383     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2384   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2385 }
2386 
2387 OperandMatchResultTy
2388 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2389   // TODO: add syntactic sugar for 1/(2*PI)
2390 
2391   assert(!isRegister());
2392   assert(!isModifier());
2393 
2394   const auto& Tok = getToken();
2395   const auto& NextTok = peekToken();
2396   bool IsReal = Tok.is(AsmToken::Real);
2397   SMLoc S = getLoc();
2398   bool Negate = false;
2399 
2400   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2401     lex();
2402     IsReal = true;
2403     Negate = true;
2404   }
2405 
2406   if (IsReal) {
2407     // Floating-point expressions are not supported.
2408     // Can only allow floating-point literals with an
2409     // optional sign.
2410 
2411     StringRef Num = getTokenStr();
2412     lex();
2413 
2414     APFloat RealVal(APFloat::IEEEdouble());
2415     auto roundMode = APFloat::rmNearestTiesToEven;
2416     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2417       return MatchOperand_ParseFail;
2418     }
2419     if (Negate)
2420       RealVal.changeSign();
2421 
2422     Operands.push_back(
2423       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2424                                AMDGPUOperand::ImmTyNone, true));
2425 
2426     return MatchOperand_Success;
2427 
2428   } else {
2429     int64_t IntVal;
2430     const MCExpr *Expr;
2431     SMLoc S = getLoc();
2432 
2433     if (HasSP3AbsModifier) {
2434       // This is a workaround for handling expressions
2435       // as arguments of SP3 'abs' modifier, for example:
2436       //     |1.0|
2437       //     |-1|
2438       //     |1+x|
2439       // This syntax is not compatible with syntax of standard
2440       // MC expressions (due to the trailing '|').
2441       SMLoc EndLoc;
2442       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2443         return MatchOperand_ParseFail;
2444     } else {
2445       if (Parser.parseExpression(Expr))
2446         return MatchOperand_ParseFail;
2447     }
2448 
2449     if (Expr->evaluateAsAbsolute(IntVal)) {
2450       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2451     } else {
2452       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2453     }
2454 
2455     return MatchOperand_Success;
2456   }
2457 
2458   return MatchOperand_NoMatch;
2459 }
2460 
2461 OperandMatchResultTy
2462 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2463   if (!isRegister())
2464     return MatchOperand_NoMatch;
2465 
2466   if (auto R = parseRegister()) {
2467     assert(R->isReg());
2468     Operands.push_back(std::move(R));
2469     return MatchOperand_Success;
2470   }
2471   return MatchOperand_ParseFail;
2472 }
2473 
2474 OperandMatchResultTy
2475 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2476   auto res = parseReg(Operands);
2477   if (res != MatchOperand_NoMatch) {
2478     return res;
2479   } else if (isModifier()) {
2480     return MatchOperand_NoMatch;
2481   } else {
2482     return parseImm(Operands, HasSP3AbsMod);
2483   }
2484 }
2485 
2486 bool
2487 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2488   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2489     const auto &str = Token.getString();
2490     return str == "abs" || str == "neg" || str == "sext";
2491   }
2492   return false;
2493 }
2494 
2495 bool
2496 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2497   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2498 }
2499 
2500 bool
2501 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2502   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2503 }
2504 
2505 bool
2506 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2507   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2508 }
2509 
2510 // Check if this is an operand modifier or an opcode modifier
2511 // which may look like an expression but it is not. We should
2512 // avoid parsing these modifiers as expressions. Currently
2513 // recognized sequences are:
2514 //   |...|
2515 //   abs(...)
2516 //   neg(...)
2517 //   sext(...)
2518 //   -reg
2519 //   -|...|
2520 //   -abs(...)
2521 //   name:...
2522 // Note that simple opcode modifiers like 'gds' may be parsed as
2523 // expressions; this is a special case. See getExpressionAsToken.
2524 //
2525 bool
2526 AMDGPUAsmParser::isModifier() {
2527 
2528   AsmToken Tok = getToken();
2529   AsmToken NextToken[2];
2530   peekTokens(NextToken);
2531 
2532   return isOperandModifier(Tok, NextToken[0]) ||
2533          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2534          isOpcodeModifierWithVal(Tok, NextToken[0]);
2535 }
2536 
2537 // Check if the current token is an SP3 'neg' modifier.
2538 // Currently this modifier is allowed in the following context:
2539 //
2540 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2541 // 2. Before an 'abs' modifier: -abs(...)
2542 // 3. Before an SP3 'abs' modifier: -|...|
2543 //
2544 // In all other cases "-" is handled as a part
2545 // of an expression that follows the sign.
2546 //
2547 // Note: When "-" is followed by an integer literal,
2548 // this is interpreted as integer negation rather
2549 // than a floating-point NEG modifier applied to N.
2550 // Beside being contr-intuitive, such use of floating-point
2551 // NEG modifier would have resulted in different meaning
2552 // of integer literals used with VOP1/2/C and VOP3,
2553 // for example:
2554 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2555 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2556 // Negative fp literals with preceding "-" are
2557 // handled likewise for unifomtity
2558 //
2559 bool
2560 AMDGPUAsmParser::parseSP3NegModifier() {
2561 
2562   AsmToken NextToken[2];
2563   peekTokens(NextToken);
2564 
2565   if (isToken(AsmToken::Minus) &&
2566       (isRegister(NextToken[0], NextToken[1]) ||
2567        NextToken[0].is(AsmToken::Pipe) ||
2568        isId(NextToken[0], "abs"))) {
2569     lex();
2570     return true;
2571   }
2572 
2573   return false;
2574 }
2575 
2576 OperandMatchResultTy
2577 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2578                                               bool AllowImm) {
2579   bool Neg, SP3Neg;
2580   bool Abs, SP3Abs;
2581   SMLoc Loc;
2582 
2583   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2584   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2585     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2586     return MatchOperand_ParseFail;
2587   }
2588 
2589   SP3Neg = parseSP3NegModifier();
2590 
2591   Loc = getLoc();
2592   Neg = trySkipId("neg");
2593   if (Neg && SP3Neg) {
2594     Error(Loc, "expected register or immediate");
2595     return MatchOperand_ParseFail;
2596   }
2597   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2598     return MatchOperand_ParseFail;
2599 
2600   Abs = trySkipId("abs");
2601   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2602     return MatchOperand_ParseFail;
2603 
2604   Loc = getLoc();
2605   SP3Abs = trySkipToken(AsmToken::Pipe);
2606   if (Abs && SP3Abs) {
2607     Error(Loc, "expected register or immediate");
2608     return MatchOperand_ParseFail;
2609   }
2610 
2611   OperandMatchResultTy Res;
2612   if (AllowImm) {
2613     Res = parseRegOrImm(Operands, SP3Abs);
2614   } else {
2615     Res = parseReg(Operands);
2616   }
2617   if (Res != MatchOperand_Success) {
2618     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2619   }
2620 
2621   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2622     return MatchOperand_ParseFail;
2623   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2624     return MatchOperand_ParseFail;
2625   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2626     return MatchOperand_ParseFail;
2627 
2628   AMDGPUOperand::Modifiers Mods;
2629   Mods.Abs = Abs || SP3Abs;
2630   Mods.Neg = Neg || SP3Neg;
2631 
2632   if (Mods.hasFPModifiers()) {
2633     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2634     if (Op.isExpr()) {
2635       Error(Op.getStartLoc(), "expected an absolute expression");
2636       return MatchOperand_ParseFail;
2637     }
2638     Op.setModifiers(Mods);
2639   }
2640   return MatchOperand_Success;
2641 }
2642 
2643 OperandMatchResultTy
2644 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2645                                                bool AllowImm) {
2646   bool Sext = trySkipId("sext");
2647   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2648     return MatchOperand_ParseFail;
2649 
2650   OperandMatchResultTy Res;
2651   if (AllowImm) {
2652     Res = parseRegOrImm(Operands);
2653   } else {
2654     Res = parseReg(Operands);
2655   }
2656   if (Res != MatchOperand_Success) {
2657     return Sext? MatchOperand_ParseFail : Res;
2658   }
2659 
2660   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2661     return MatchOperand_ParseFail;
2662 
2663   AMDGPUOperand::Modifiers Mods;
2664   Mods.Sext = Sext;
2665 
2666   if (Mods.hasIntModifiers()) {
2667     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2668     if (Op.isExpr()) {
2669       Error(Op.getStartLoc(), "expected an absolute expression");
2670       return MatchOperand_ParseFail;
2671     }
2672     Op.setModifiers(Mods);
2673   }
2674 
2675   return MatchOperand_Success;
2676 }
2677 
2678 OperandMatchResultTy
2679 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2680   return parseRegOrImmWithFPInputMods(Operands, false);
2681 }
2682 
2683 OperandMatchResultTy
2684 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2685   return parseRegOrImmWithIntInputMods(Operands, false);
2686 }
2687 
2688 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2689   auto Loc = getLoc();
2690   if (trySkipId("off")) {
2691     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2692                                                 AMDGPUOperand::ImmTyOff, false));
2693     return MatchOperand_Success;
2694   }
2695 
2696   if (!isRegister())
2697     return MatchOperand_NoMatch;
2698 
2699   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2700   if (Reg) {
2701     Operands.push_back(std::move(Reg));
2702     return MatchOperand_Success;
2703   }
2704 
2705   return MatchOperand_ParseFail;
2706 
2707 }
2708 
2709 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2710   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2711 
2712   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2713       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2714       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2715       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2716     return Match_InvalidOperand;
2717 
2718   if ((TSFlags & SIInstrFlags::VOP3) &&
2719       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2720       getForcedEncodingSize() != 64)
2721     return Match_PreferE32;
2722 
2723   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2724       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2725     // v_mac_f32/16 allow only dst_sel == DWORD;
2726     auto OpNum =
2727         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2728     const auto &Op = Inst.getOperand(OpNum);
2729     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2730       return Match_InvalidOperand;
2731     }
2732   }
2733 
2734   return Match_Success;
2735 }
2736 
2737 // What asm variants we should check
2738 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2739   if (getForcedEncodingSize() == 32) {
2740     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2741     return makeArrayRef(Variants);
2742   }
2743 
2744   if (isForcedVOP3()) {
2745     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2746     return makeArrayRef(Variants);
2747   }
2748 
2749   if (isForcedSDWA()) {
2750     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2751                                         AMDGPUAsmVariants::SDWA9};
2752     return makeArrayRef(Variants);
2753   }
2754 
2755   if (isForcedDPP()) {
2756     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2757     return makeArrayRef(Variants);
2758   }
2759 
2760   static const unsigned Variants[] = {
2761     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2762     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2763   };
2764 
2765   return makeArrayRef(Variants);
2766 }
2767 
2768 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2769   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2770   const unsigned Num = Desc.getNumImplicitUses();
2771   for (unsigned i = 0; i < Num; ++i) {
2772     unsigned Reg = Desc.ImplicitUses[i];
2773     switch (Reg) {
2774     case AMDGPU::FLAT_SCR:
2775     case AMDGPU::VCC:
2776     case AMDGPU::VCC_LO:
2777     case AMDGPU::VCC_HI:
2778     case AMDGPU::M0:
2779       return Reg;
2780     default:
2781       break;
2782     }
2783   }
2784   return AMDGPU::NoRegister;
2785 }
2786 
2787 // NB: This code is correct only when used to check constant
2788 // bus limitations because GFX7 support no f16 inline constants.
2789 // Note that there are no cases when a GFX7 opcode violates
2790 // constant bus limitations due to the use of an f16 constant.
2791 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2792                                        unsigned OpIdx) const {
2793   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2794 
2795   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2796     return false;
2797   }
2798 
2799   const MCOperand &MO = Inst.getOperand(OpIdx);
2800 
2801   int64_t Val = MO.getImm();
2802   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2803 
2804   switch (OpSize) { // expected operand size
2805   case 8:
2806     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2807   case 4:
2808     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2809   case 2: {
2810     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2811     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2812         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2813         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2814         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2815         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2816         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2817       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2818     } else {
2819       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2820     }
2821   }
2822   default:
2823     llvm_unreachable("invalid operand size");
2824   }
2825 }
2826 
2827 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2828   if (!isGFX10())
2829     return 1;
2830 
2831   switch (Opcode) {
2832   // 64-bit shift instructions can use only one scalar value input
2833   case AMDGPU::V_LSHLREV_B64:
2834   case AMDGPU::V_LSHLREV_B64_gfx10:
2835   case AMDGPU::V_LSHL_B64:
2836   case AMDGPU::V_LSHRREV_B64:
2837   case AMDGPU::V_LSHRREV_B64_gfx10:
2838   case AMDGPU::V_LSHR_B64:
2839   case AMDGPU::V_ASHRREV_I64:
2840   case AMDGPU::V_ASHRREV_I64_gfx10:
2841   case AMDGPU::V_ASHR_I64:
2842     return 1;
2843   default:
2844     return 2;
2845   }
2846 }
2847 
2848 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2849   const MCOperand &MO = Inst.getOperand(OpIdx);
2850   if (MO.isImm()) {
2851     return !isInlineConstant(Inst, OpIdx);
2852   } else if (MO.isReg()) {
2853     auto Reg = MO.getReg();
2854     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2855     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2856   } else {
2857     return true;
2858   }
2859 }
2860 
2861 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2862   const unsigned Opcode = Inst.getOpcode();
2863   const MCInstrDesc &Desc = MII.get(Opcode);
2864   unsigned ConstantBusUseCount = 0;
2865   unsigned NumLiterals = 0;
2866   unsigned LiteralSize;
2867 
2868   if (Desc.TSFlags &
2869       (SIInstrFlags::VOPC |
2870        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2871        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2872        SIInstrFlags::SDWA)) {
2873     // Check special imm operands (used by madmk, etc)
2874     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2875       ++ConstantBusUseCount;
2876     }
2877 
2878     SmallDenseSet<unsigned> SGPRsUsed;
2879     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2880     if (SGPRUsed != AMDGPU::NoRegister) {
2881       SGPRsUsed.insert(SGPRUsed);
2882       ++ConstantBusUseCount;
2883     }
2884 
2885     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2886     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2887     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2888 
2889     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2890 
2891     for (int OpIdx : OpIndices) {
2892       if (OpIdx == -1) break;
2893 
2894       const MCOperand &MO = Inst.getOperand(OpIdx);
2895       if (usesConstantBus(Inst, OpIdx)) {
2896         if (MO.isReg()) {
2897           const unsigned Reg = mc2PseudoReg(MO.getReg());
2898           // Pairs of registers with a partial intersections like these
2899           //   s0, s[0:1]
2900           //   flat_scratch_lo, flat_scratch
2901           //   flat_scratch_lo, flat_scratch_hi
2902           // are theoretically valid but they are disabled anyway.
2903           // Note that this code mimics SIInstrInfo::verifyInstruction
2904           if (!SGPRsUsed.count(Reg)) {
2905             SGPRsUsed.insert(Reg);
2906             ++ConstantBusUseCount;
2907           }
2908         } else { // Expression or a literal
2909 
2910           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2911             continue; // special operand like VINTERP attr_chan
2912 
2913           // An instruction may use only one literal.
2914           // This has been validated on the previous step.
2915           // See validateVOP3Literal.
2916           // This literal may be used as more than one operand.
2917           // If all these operands are of the same size,
2918           // this literal counts as one scalar value.
2919           // Otherwise it counts as 2 scalar values.
2920           // See "GFX10 Shader Programming", section 3.6.2.3.
2921 
2922           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2923           if (Size < 4) Size = 4;
2924 
2925           if (NumLiterals == 0) {
2926             NumLiterals = 1;
2927             LiteralSize = Size;
2928           } else if (LiteralSize != Size) {
2929             NumLiterals = 2;
2930           }
2931         }
2932       }
2933     }
2934   }
2935   ConstantBusUseCount += NumLiterals;
2936 
2937   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2938 }
2939 
2940 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2941   const unsigned Opcode = Inst.getOpcode();
2942   const MCInstrDesc &Desc = MII.get(Opcode);
2943 
2944   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2945   if (DstIdx == -1 ||
2946       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2947     return true;
2948   }
2949 
2950   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2951 
2952   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2953   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2954   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2955 
2956   assert(DstIdx != -1);
2957   const MCOperand &Dst = Inst.getOperand(DstIdx);
2958   assert(Dst.isReg());
2959   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2960 
2961   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2962 
2963   for (int SrcIdx : SrcIndices) {
2964     if (SrcIdx == -1) break;
2965     const MCOperand &Src = Inst.getOperand(SrcIdx);
2966     if (Src.isReg()) {
2967       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2968       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2969         return false;
2970       }
2971     }
2972   }
2973 
2974   return true;
2975 }
2976 
2977 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2978 
2979   const unsigned Opc = Inst.getOpcode();
2980   const MCInstrDesc &Desc = MII.get(Opc);
2981 
2982   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2983     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2984     assert(ClampIdx != -1);
2985     return Inst.getOperand(ClampIdx).getImm() == 0;
2986   }
2987 
2988   return true;
2989 }
2990 
2991 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2992 
2993   const unsigned Opc = Inst.getOpcode();
2994   const MCInstrDesc &Desc = MII.get(Opc);
2995 
2996   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2997     return true;
2998 
2999   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3000   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3001   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3002 
3003   assert(VDataIdx != -1);
3004   assert(DMaskIdx != -1);
3005   assert(TFEIdx != -1);
3006 
3007   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3008   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3009   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3010   if (DMask == 0)
3011     DMask = 1;
3012 
3013   unsigned DataSize =
3014     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3015   if (hasPackedD16()) {
3016     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3017     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3018       DataSize = (DataSize + 1) / 2;
3019   }
3020 
3021   return (VDataSize / 4) == DataSize + TFESize;
3022 }
3023 
3024 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3025   const unsigned Opc = Inst.getOpcode();
3026   const MCInstrDesc &Desc = MII.get(Opc);
3027 
3028   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
3029     return true;
3030 
3031   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3032   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3033       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3034   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3035   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3036   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3037 
3038   assert(VAddr0Idx != -1);
3039   assert(SrsrcIdx != -1);
3040   assert(DimIdx != -1);
3041   assert(SrsrcIdx > VAddr0Idx);
3042 
3043   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3044   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3045   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3046   unsigned VAddrSize =
3047       IsNSA ? SrsrcIdx - VAddr0Idx
3048             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3049 
3050   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3051                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3052                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3053                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3054   if (!IsNSA) {
3055     if (AddrSize > 8)
3056       AddrSize = 16;
3057     else if (AddrSize > 4)
3058       AddrSize = 8;
3059   }
3060 
3061   return VAddrSize == AddrSize;
3062 }
3063 
3064 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3065 
3066   const unsigned Opc = Inst.getOpcode();
3067   const MCInstrDesc &Desc = MII.get(Opc);
3068 
3069   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3070     return true;
3071   if (!Desc.mayLoad() || !Desc.mayStore())
3072     return true; // Not atomic
3073 
3074   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3075   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3076 
3077   // This is an incomplete check because image_atomic_cmpswap
3078   // may only use 0x3 and 0xf while other atomic operations
3079   // may use 0x1 and 0x3. However these limitations are
3080   // verified when we check that dmask matches dst size.
3081   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3082 }
3083 
3084 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3085 
3086   const unsigned Opc = Inst.getOpcode();
3087   const MCInstrDesc &Desc = MII.get(Opc);
3088 
3089   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3090     return true;
3091 
3092   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3093   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3094 
3095   // GATHER4 instructions use dmask in a different fashion compared to
3096   // other MIMG instructions. The only useful DMASK values are
3097   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3098   // (red,red,red,red) etc.) The ISA document doesn't mention
3099   // this.
3100   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3101 }
3102 
3103 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3104 {
3105   switch (Opcode) {
3106   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3107   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3108   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3109     return true;
3110   default:
3111     return false;
3112   }
3113 }
3114 
3115 // movrels* opcodes should only allow VGPRS as src0.
3116 // This is specified in .td description for vop1/vop3,
3117 // but sdwa is handled differently. See isSDWAOperand.
3118 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3119 
3120   const unsigned Opc = Inst.getOpcode();
3121   const MCInstrDesc &Desc = MII.get(Opc);
3122 
3123   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3124     return true;
3125 
3126   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3127   assert(Src0Idx != -1);
3128 
3129   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3130   if (!Src0.isReg())
3131     return false;
3132 
3133   auto Reg = Src0.getReg();
3134   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3135   return !isSGPR(mc2PseudoReg(Reg), TRI);
3136 }
3137 
3138 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3139 
3140   const unsigned Opc = Inst.getOpcode();
3141   const MCInstrDesc &Desc = MII.get(Opc);
3142 
3143   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3144     return true;
3145 
3146   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3147   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3148     if (isCI() || isSI())
3149       return false;
3150   }
3151 
3152   return true;
3153 }
3154 
3155 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3156   const unsigned Opc = Inst.getOpcode();
3157   const MCInstrDesc &Desc = MII.get(Opc);
3158 
3159   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3160     return true;
3161 
3162   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3163   if (DimIdx < 0)
3164     return true;
3165 
3166   long Imm = Inst.getOperand(DimIdx).getImm();
3167   if (Imm < 0 || Imm >= 8)
3168     return false;
3169 
3170   return true;
3171 }
3172 
3173 static bool IsRevOpcode(const unsigned Opcode)
3174 {
3175   switch (Opcode) {
3176   case AMDGPU::V_SUBREV_F32_e32:
3177   case AMDGPU::V_SUBREV_F32_e64:
3178   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3179   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3180   case AMDGPU::V_SUBREV_F32_e32_vi:
3181   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3182   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3183   case AMDGPU::V_SUBREV_F32_e64_vi:
3184 
3185   case AMDGPU::V_SUBREV_I32_e32:
3186   case AMDGPU::V_SUBREV_I32_e64:
3187   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3188   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3189 
3190   case AMDGPU::V_SUBBREV_U32_e32:
3191   case AMDGPU::V_SUBBREV_U32_e64:
3192   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3193   case AMDGPU::V_SUBBREV_U32_e32_vi:
3194   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3195   case AMDGPU::V_SUBBREV_U32_e64_vi:
3196 
3197   case AMDGPU::V_SUBREV_U32_e32:
3198   case AMDGPU::V_SUBREV_U32_e64:
3199   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3200   case AMDGPU::V_SUBREV_U32_e32_vi:
3201   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3202   case AMDGPU::V_SUBREV_U32_e64_vi:
3203 
3204   case AMDGPU::V_SUBREV_F16_e32:
3205   case AMDGPU::V_SUBREV_F16_e64:
3206   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3207   case AMDGPU::V_SUBREV_F16_e32_vi:
3208   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3209   case AMDGPU::V_SUBREV_F16_e64_vi:
3210 
3211   case AMDGPU::V_SUBREV_U16_e32:
3212   case AMDGPU::V_SUBREV_U16_e64:
3213   case AMDGPU::V_SUBREV_U16_e32_vi:
3214   case AMDGPU::V_SUBREV_U16_e64_vi:
3215 
3216   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3217   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3218   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3219 
3220   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3221   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3222 
3223   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3224   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3225 
3226   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3227   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3228 
3229   case AMDGPU::V_LSHRREV_B32_e32:
3230   case AMDGPU::V_LSHRREV_B32_e64:
3231   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3232   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3233   case AMDGPU::V_LSHRREV_B32_e32_vi:
3234   case AMDGPU::V_LSHRREV_B32_e64_vi:
3235   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3236   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3237 
3238   case AMDGPU::V_ASHRREV_I32_e32:
3239   case AMDGPU::V_ASHRREV_I32_e64:
3240   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3241   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3242   case AMDGPU::V_ASHRREV_I32_e32_vi:
3243   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3244   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3245   case AMDGPU::V_ASHRREV_I32_e64_vi:
3246 
3247   case AMDGPU::V_LSHLREV_B32_e32:
3248   case AMDGPU::V_LSHLREV_B32_e64:
3249   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3250   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3251   case AMDGPU::V_LSHLREV_B32_e32_vi:
3252   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3253   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3254   case AMDGPU::V_LSHLREV_B32_e64_vi:
3255 
3256   case AMDGPU::V_LSHLREV_B16_e32:
3257   case AMDGPU::V_LSHLREV_B16_e64:
3258   case AMDGPU::V_LSHLREV_B16_e32_vi:
3259   case AMDGPU::V_LSHLREV_B16_e64_vi:
3260   case AMDGPU::V_LSHLREV_B16_gfx10:
3261 
3262   case AMDGPU::V_LSHRREV_B16_e32:
3263   case AMDGPU::V_LSHRREV_B16_e64:
3264   case AMDGPU::V_LSHRREV_B16_e32_vi:
3265   case AMDGPU::V_LSHRREV_B16_e64_vi:
3266   case AMDGPU::V_LSHRREV_B16_gfx10:
3267 
3268   case AMDGPU::V_ASHRREV_I16_e32:
3269   case AMDGPU::V_ASHRREV_I16_e64:
3270   case AMDGPU::V_ASHRREV_I16_e32_vi:
3271   case AMDGPU::V_ASHRREV_I16_e64_vi:
3272   case AMDGPU::V_ASHRREV_I16_gfx10:
3273 
3274   case AMDGPU::V_LSHLREV_B64:
3275   case AMDGPU::V_LSHLREV_B64_gfx10:
3276   case AMDGPU::V_LSHLREV_B64_vi:
3277 
3278   case AMDGPU::V_LSHRREV_B64:
3279   case AMDGPU::V_LSHRREV_B64_gfx10:
3280   case AMDGPU::V_LSHRREV_B64_vi:
3281 
3282   case AMDGPU::V_ASHRREV_I64:
3283   case AMDGPU::V_ASHRREV_I64_gfx10:
3284   case AMDGPU::V_ASHRREV_I64_vi:
3285 
3286   case AMDGPU::V_PK_LSHLREV_B16:
3287   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3288   case AMDGPU::V_PK_LSHLREV_B16_vi:
3289 
3290   case AMDGPU::V_PK_LSHRREV_B16:
3291   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3292   case AMDGPU::V_PK_LSHRREV_B16_vi:
3293   case AMDGPU::V_PK_ASHRREV_I16:
3294   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3295   case AMDGPU::V_PK_ASHRREV_I16_vi:
3296     return true;
3297   default:
3298     return false;
3299   }
3300 }
3301 
3302 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3303 
3304   using namespace SIInstrFlags;
3305   const unsigned Opcode = Inst.getOpcode();
3306   const MCInstrDesc &Desc = MII.get(Opcode);
3307 
3308   // lds_direct register is defined so that it can be used
3309   // with 9-bit operands only. Ignore encodings which do not accept these.
3310   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3311     return true;
3312 
3313   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3314   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3315   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3316 
3317   const int SrcIndices[] = { Src1Idx, Src2Idx };
3318 
3319   // lds_direct cannot be specified as either src1 or src2.
3320   for (int SrcIdx : SrcIndices) {
3321     if (SrcIdx == -1) break;
3322     const MCOperand &Src = Inst.getOperand(SrcIdx);
3323     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3324       return false;
3325     }
3326   }
3327 
3328   if (Src0Idx == -1)
3329     return true;
3330 
3331   const MCOperand &Src = Inst.getOperand(Src0Idx);
3332   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3333     return true;
3334 
3335   // lds_direct is specified as src0. Check additional limitations.
3336   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3337 }
3338 
3339 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3340   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3341     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3342     if (Op.isFlatOffset())
3343       return Op.getStartLoc();
3344   }
3345   return getLoc();
3346 }
3347 
3348 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3349                                          const OperandVector &Operands) {
3350   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3351   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3352     return true;
3353 
3354   auto Opcode = Inst.getOpcode();
3355   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3356   assert(OpNum != -1);
3357 
3358   const auto &Op = Inst.getOperand(OpNum);
3359   if (!hasFlatOffsets() && Op.getImm() != 0) {
3360     Error(getFlatOffsetLoc(Operands),
3361           "flat offset modifier is not supported on this GPU");
3362     return false;
3363   }
3364 
3365   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3366   // For FLAT segment the offset must be positive;
3367   // MSB is ignored and forced to zero.
3368   unsigned OffsetSize = isGFX9() ? 13 : 12;
3369   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3370     if (!isIntN(OffsetSize, Op.getImm())) {
3371       Error(getFlatOffsetLoc(Operands),
3372             isGFX9() ? "expected a 13-bit signed offset" :
3373                        "expected a 12-bit signed offset");
3374       return false;
3375     }
3376   } else {
3377     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3378       Error(getFlatOffsetLoc(Operands),
3379             isGFX9() ? "expected a 12-bit unsigned offset" :
3380                        "expected an 11-bit unsigned offset");
3381       return false;
3382     }
3383   }
3384 
3385   return true;
3386 }
3387 
3388 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3389   unsigned Opcode = Inst.getOpcode();
3390   const MCInstrDesc &Desc = MII.get(Opcode);
3391   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3392     return true;
3393 
3394   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3395   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3396 
3397   const int OpIndices[] = { Src0Idx, Src1Idx };
3398 
3399   unsigned NumExprs = 0;
3400   unsigned NumLiterals = 0;
3401   uint32_t LiteralValue;
3402 
3403   for (int OpIdx : OpIndices) {
3404     if (OpIdx == -1) break;
3405 
3406     const MCOperand &MO = Inst.getOperand(OpIdx);
3407     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3408     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3409       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3410         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3411         if (NumLiterals == 0 || LiteralValue != Value) {
3412           LiteralValue = Value;
3413           ++NumLiterals;
3414         }
3415       } else if (MO.isExpr()) {
3416         ++NumExprs;
3417       }
3418     }
3419   }
3420 
3421   return NumLiterals + NumExprs <= 1;
3422 }
3423 
3424 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3425   const unsigned Opc = Inst.getOpcode();
3426   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3427       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3428     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3429     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3430 
3431     if (OpSel & ~3)
3432       return false;
3433   }
3434   return true;
3435 }
3436 
3437 // Check if VCC register matches wavefront size
3438 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3439   auto FB = getFeatureBits();
3440   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3441     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3442 }
3443 
3444 // VOP3 literal is only allowed in GFX10+ and only one can be used
3445 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3446   unsigned Opcode = Inst.getOpcode();
3447   const MCInstrDesc &Desc = MII.get(Opcode);
3448   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3449     return true;
3450 
3451   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3452   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3453   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3454 
3455   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3456 
3457   unsigned NumExprs = 0;
3458   unsigned NumLiterals = 0;
3459   uint32_t LiteralValue;
3460 
3461   for (int OpIdx : OpIndices) {
3462     if (OpIdx == -1) break;
3463 
3464     const MCOperand &MO = Inst.getOperand(OpIdx);
3465     if (!MO.isImm() && !MO.isExpr())
3466       continue;
3467     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3468       continue;
3469 
3470     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3471         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3472       return false;
3473 
3474     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3475       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3476       if (NumLiterals == 0 || LiteralValue != Value) {
3477         LiteralValue = Value;
3478         ++NumLiterals;
3479       }
3480     } else if (MO.isExpr()) {
3481       ++NumExprs;
3482     }
3483   }
3484   NumLiterals += NumExprs;
3485 
3486   return !NumLiterals ||
3487          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3488 }
3489 
3490 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3491                                           const SMLoc &IDLoc,
3492                                           const OperandVector &Operands) {
3493   if (!validateLdsDirect(Inst)) {
3494     Error(IDLoc,
3495       "invalid use of lds_direct");
3496     return false;
3497   }
3498   if (!validateSOPLiteral(Inst)) {
3499     Error(IDLoc,
3500       "only one literal operand is allowed");
3501     return false;
3502   }
3503   if (!validateVOP3Literal(Inst)) {
3504     Error(IDLoc,
3505       "invalid literal operand");
3506     return false;
3507   }
3508   if (!validateConstantBusLimitations(Inst)) {
3509     Error(IDLoc,
3510       "invalid operand (violates constant bus restrictions)");
3511     return false;
3512   }
3513   if (!validateEarlyClobberLimitations(Inst)) {
3514     Error(IDLoc,
3515       "destination must be different than all sources");
3516     return false;
3517   }
3518   if (!validateIntClampSupported(Inst)) {
3519     Error(IDLoc,
3520       "integer clamping is not supported on this GPU");
3521     return false;
3522   }
3523   if (!validateOpSel(Inst)) {
3524     Error(IDLoc,
3525       "invalid op_sel operand");
3526     return false;
3527   }
3528   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3529   if (!validateMIMGD16(Inst)) {
3530     Error(IDLoc,
3531       "d16 modifier is not supported on this GPU");
3532     return false;
3533   }
3534   if (!validateMIMGDim(Inst)) {
3535     Error(IDLoc, "dim modifier is required on this GPU");
3536     return false;
3537   }
3538   if (!validateMIMGDataSize(Inst)) {
3539     Error(IDLoc,
3540       "image data size does not match dmask and tfe");
3541     return false;
3542   }
3543   if (!validateMIMGAddrSize(Inst)) {
3544     Error(IDLoc,
3545       "image address size does not match dim and a16");
3546     return false;
3547   }
3548   if (!validateMIMGAtomicDMask(Inst)) {
3549     Error(IDLoc,
3550       "invalid atomic image dmask");
3551     return false;
3552   }
3553   if (!validateMIMGGatherDMask(Inst)) {
3554     Error(IDLoc,
3555       "invalid image_gather dmask: only one bit must be set");
3556     return false;
3557   }
3558   if (!validateMovrels(Inst)) {
3559     Error(IDLoc, "source operand must be a VGPR");
3560     return false;
3561   }
3562   if (!validateFlatOffset(Inst, Operands)) {
3563     return false;
3564   }
3565 
3566   return true;
3567 }
3568 
3569 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3570                                             const FeatureBitset &FBS,
3571                                             unsigned VariantID = 0);
3572 
3573 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3574                                               OperandVector &Operands,
3575                                               MCStreamer &Out,
3576                                               uint64_t &ErrorInfo,
3577                                               bool MatchingInlineAsm) {
3578   MCInst Inst;
3579   unsigned Result = Match_Success;
3580   for (auto Variant : getMatchedVariants()) {
3581     uint64_t EI;
3582     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3583                                   Variant);
3584     // We order match statuses from least to most specific. We use most specific
3585     // status as resulting
3586     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3587     if ((R == Match_Success) ||
3588         (R == Match_PreferE32) ||
3589         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3590         (R == Match_InvalidOperand && Result != Match_MissingFeature
3591                                    && Result != Match_PreferE32) ||
3592         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3593                                    && Result != Match_MissingFeature
3594                                    && Result != Match_PreferE32)) {
3595       Result = R;
3596       ErrorInfo = EI;
3597     }
3598     if (R == Match_Success)
3599       break;
3600   }
3601 
3602   switch (Result) {
3603   default: break;
3604   case Match_Success:
3605     if (!validateInstruction(Inst, IDLoc, Operands)) {
3606       return true;
3607     }
3608     Inst.setLoc(IDLoc);
3609     Out.emitInstruction(Inst, getSTI());
3610     return false;
3611 
3612   case Match_MissingFeature:
3613     return Error(IDLoc, "instruction not supported on this GPU");
3614 
3615   case Match_MnemonicFail: {
3616     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3617     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3618         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3619     return Error(IDLoc, "invalid instruction" + Suggestion,
3620                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3621   }
3622 
3623   case Match_InvalidOperand: {
3624     SMLoc ErrorLoc = IDLoc;
3625     if (ErrorInfo != ~0ULL) {
3626       if (ErrorInfo >= Operands.size()) {
3627         return Error(IDLoc, "too few operands for instruction");
3628       }
3629       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3630       if (ErrorLoc == SMLoc())
3631         ErrorLoc = IDLoc;
3632     }
3633     return Error(ErrorLoc, "invalid operand for instruction");
3634   }
3635 
3636   case Match_PreferE32:
3637     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3638                         "should be encoded as e32");
3639   }
3640   llvm_unreachable("Implement any new match types added!");
3641 }
3642 
3643 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3644   int64_t Tmp = -1;
3645   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3646     return true;
3647   }
3648   if (getParser().parseAbsoluteExpression(Tmp)) {
3649     return true;
3650   }
3651   Ret = static_cast<uint32_t>(Tmp);
3652   return false;
3653 }
3654 
3655 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3656                                                uint32_t &Minor) {
3657   if (ParseAsAbsoluteExpression(Major))
3658     return TokError("invalid major version");
3659 
3660   if (getLexer().isNot(AsmToken::Comma))
3661     return TokError("minor version number required, comma expected");
3662   Lex();
3663 
3664   if (ParseAsAbsoluteExpression(Minor))
3665     return TokError("invalid minor version");
3666 
3667   return false;
3668 }
3669 
3670 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3671   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3672     return TokError("directive only supported for amdgcn architecture");
3673 
3674   std::string Target;
3675 
3676   SMLoc TargetStart = getTok().getLoc();
3677   if (getParser().parseEscapedString(Target))
3678     return true;
3679   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3680 
3681   std::string ExpectedTarget;
3682   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3683   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3684 
3685   if (Target != ExpectedTargetOS.str())
3686     return getParser().Error(TargetRange.Start, "target must match options",
3687                              TargetRange);
3688 
3689   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3690   return false;
3691 }
3692 
3693 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3694   return getParser().Error(Range.Start, "value out of range", Range);
3695 }
3696 
3697 bool AMDGPUAsmParser::calculateGPRBlocks(
3698     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3699     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3700     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3701     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3702   // TODO(scott.linder): These calculations are duplicated from
3703   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3704   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3705 
3706   unsigned NumVGPRs = NextFreeVGPR;
3707   unsigned NumSGPRs = NextFreeSGPR;
3708 
3709   if (Version.Major >= 10)
3710     NumSGPRs = 0;
3711   else {
3712     unsigned MaxAddressableNumSGPRs =
3713         IsaInfo::getAddressableNumSGPRs(&getSTI());
3714 
3715     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3716         NumSGPRs > MaxAddressableNumSGPRs)
3717       return OutOfRangeError(SGPRRange);
3718 
3719     NumSGPRs +=
3720         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3721 
3722     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3723         NumSGPRs > MaxAddressableNumSGPRs)
3724       return OutOfRangeError(SGPRRange);
3725 
3726     if (Features.test(FeatureSGPRInitBug))
3727       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3728   }
3729 
3730   VGPRBlocks =
3731       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3732   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3733 
3734   return false;
3735 }
3736 
3737 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3738   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3739     return TokError("directive only supported for amdgcn architecture");
3740 
3741   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3742     return TokError("directive only supported for amdhsa OS");
3743 
3744   StringRef KernelName;
3745   if (getParser().parseIdentifier(KernelName))
3746     return true;
3747 
3748   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3749 
3750   StringSet<> Seen;
3751 
3752   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3753 
3754   SMRange VGPRRange;
3755   uint64_t NextFreeVGPR = 0;
3756   SMRange SGPRRange;
3757   uint64_t NextFreeSGPR = 0;
3758   unsigned UserSGPRCount = 0;
3759   bool ReserveVCC = true;
3760   bool ReserveFlatScr = true;
3761   bool ReserveXNACK = hasXNACK();
3762   Optional<bool> EnableWavefrontSize32;
3763 
3764   while (true) {
3765     while (getLexer().is(AsmToken::EndOfStatement))
3766       Lex();
3767 
3768     if (getLexer().isNot(AsmToken::Identifier))
3769       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3770 
3771     StringRef ID = getTok().getIdentifier();
3772     SMRange IDRange = getTok().getLocRange();
3773     Lex();
3774 
3775     if (ID == ".end_amdhsa_kernel")
3776       break;
3777 
3778     if (Seen.find(ID) != Seen.end())
3779       return TokError(".amdhsa_ directives cannot be repeated");
3780     Seen.insert(ID);
3781 
3782     SMLoc ValStart = getTok().getLoc();
3783     int64_t IVal;
3784     if (getParser().parseAbsoluteExpression(IVal))
3785       return true;
3786     SMLoc ValEnd = getTok().getLoc();
3787     SMRange ValRange = SMRange(ValStart, ValEnd);
3788 
3789     if (IVal < 0)
3790       return OutOfRangeError(ValRange);
3791 
3792     uint64_t Val = IVal;
3793 
3794 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3795   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3796     return OutOfRangeError(RANGE);                                             \
3797   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3798 
3799     if (ID == ".amdhsa_group_segment_fixed_size") {
3800       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3801         return OutOfRangeError(ValRange);
3802       KD.group_segment_fixed_size = Val;
3803     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3804       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3805         return OutOfRangeError(ValRange);
3806       KD.private_segment_fixed_size = Val;
3807     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3808       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3809                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3810                        Val, ValRange);
3811       if (Val)
3812         UserSGPRCount += 4;
3813     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3814       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3815                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3816                        ValRange);
3817       if (Val)
3818         UserSGPRCount += 2;
3819     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3820       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3821                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3822                        ValRange);
3823       if (Val)
3824         UserSGPRCount += 2;
3825     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3826       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3827                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3828                        Val, ValRange);
3829       if (Val)
3830         UserSGPRCount += 2;
3831     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3832       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3833                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3834                        ValRange);
3835       if (Val)
3836         UserSGPRCount += 2;
3837     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3838       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3839                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3840                        ValRange);
3841       if (Val)
3842         UserSGPRCount += 2;
3843     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3844       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3845                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3846                        Val, ValRange);
3847       if (Val)
3848         UserSGPRCount += 1;
3849     } else if (ID == ".amdhsa_wavefront_size32") {
3850       if (IVersion.Major < 10)
3851         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3852                                  IDRange);
3853       EnableWavefrontSize32 = Val;
3854       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3855                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3856                        Val, ValRange);
3857     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3858       PARSE_BITS_ENTRY(
3859           KD.compute_pgm_rsrc2,
3860           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3861           ValRange);
3862     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3863       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3864                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3865                        ValRange);
3866     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3867       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3868                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3869                        ValRange);
3870     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3871       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3872                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3873                        ValRange);
3874     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3875       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3876                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3877                        ValRange);
3878     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3879       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3880                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3881                        ValRange);
3882     } else if (ID == ".amdhsa_next_free_vgpr") {
3883       VGPRRange = ValRange;
3884       NextFreeVGPR = Val;
3885     } else if (ID == ".amdhsa_next_free_sgpr") {
3886       SGPRRange = ValRange;
3887       NextFreeSGPR = Val;
3888     } else if (ID == ".amdhsa_reserve_vcc") {
3889       if (!isUInt<1>(Val))
3890         return OutOfRangeError(ValRange);
3891       ReserveVCC = Val;
3892     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3893       if (IVersion.Major < 7)
3894         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3895                                  IDRange);
3896       if (!isUInt<1>(Val))
3897         return OutOfRangeError(ValRange);
3898       ReserveFlatScr = Val;
3899     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3900       if (IVersion.Major < 8)
3901         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3902                                  IDRange);
3903       if (!isUInt<1>(Val))
3904         return OutOfRangeError(ValRange);
3905       ReserveXNACK = Val;
3906     } else if (ID == ".amdhsa_float_round_mode_32") {
3907       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3908                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3909     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3910       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3911                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3912     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3913       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3914                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3915     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3916       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3917                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3918                        ValRange);
3919     } else if (ID == ".amdhsa_dx10_clamp") {
3920       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3921                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3922     } else if (ID == ".amdhsa_ieee_mode") {
3923       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3924                        Val, ValRange);
3925     } else if (ID == ".amdhsa_fp16_overflow") {
3926       if (IVersion.Major < 9)
3927         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3928                                  IDRange);
3929       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3930                        ValRange);
3931     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3932       if (IVersion.Major < 10)
3933         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3934                                  IDRange);
3935       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3936                        ValRange);
3937     } else if (ID == ".amdhsa_memory_ordered") {
3938       if (IVersion.Major < 10)
3939         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3940                                  IDRange);
3941       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3942                        ValRange);
3943     } else if (ID == ".amdhsa_forward_progress") {
3944       if (IVersion.Major < 10)
3945         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3946                                  IDRange);
3947       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3948                        ValRange);
3949     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3950       PARSE_BITS_ENTRY(
3951           KD.compute_pgm_rsrc2,
3952           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3953           ValRange);
3954     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3955       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3956                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3957                        Val, ValRange);
3958     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3959       PARSE_BITS_ENTRY(
3960           KD.compute_pgm_rsrc2,
3961           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3962           ValRange);
3963     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3964       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3965                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3966                        Val, ValRange);
3967     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3968       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3969                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3970                        Val, ValRange);
3971     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3972       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3973                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3974                        Val, ValRange);
3975     } else if (ID == ".amdhsa_exception_int_div_zero") {
3976       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3977                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3978                        Val, ValRange);
3979     } else {
3980       return getParser().Error(IDRange.Start,
3981                                "unknown .amdhsa_kernel directive", IDRange);
3982     }
3983 
3984 #undef PARSE_BITS_ENTRY
3985   }
3986 
3987   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3988     return TokError(".amdhsa_next_free_vgpr directive is required");
3989 
3990   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3991     return TokError(".amdhsa_next_free_sgpr directive is required");
3992 
3993   unsigned VGPRBlocks;
3994   unsigned SGPRBlocks;
3995   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3996                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3997                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3998                          SGPRBlocks))
3999     return true;
4000 
4001   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4002           VGPRBlocks))
4003     return OutOfRangeError(VGPRRange);
4004   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4005                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4006 
4007   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4008           SGPRBlocks))
4009     return OutOfRangeError(SGPRRange);
4010   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4011                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4012                   SGPRBlocks);
4013 
4014   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4015     return TokError("too many user SGPRs enabled");
4016   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4017                   UserSGPRCount);
4018 
4019   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4020       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4021       ReserveFlatScr, ReserveXNACK);
4022   return false;
4023 }
4024 
4025 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4026   uint32_t Major;
4027   uint32_t Minor;
4028 
4029   if (ParseDirectiveMajorMinor(Major, Minor))
4030     return true;
4031 
4032   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4033   return false;
4034 }
4035 
4036 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4037   uint32_t Major;
4038   uint32_t Minor;
4039   uint32_t Stepping;
4040   StringRef VendorName;
4041   StringRef ArchName;
4042 
4043   // If this directive has no arguments, then use the ISA version for the
4044   // targeted GPU.
4045   if (getLexer().is(AsmToken::EndOfStatement)) {
4046     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4047     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4048                                                       ISA.Stepping,
4049                                                       "AMD", "AMDGPU");
4050     return false;
4051   }
4052 
4053   if (ParseDirectiveMajorMinor(Major, Minor))
4054     return true;
4055 
4056   if (getLexer().isNot(AsmToken::Comma))
4057     return TokError("stepping version number required, comma expected");
4058   Lex();
4059 
4060   if (ParseAsAbsoluteExpression(Stepping))
4061     return TokError("invalid stepping version");
4062 
4063   if (getLexer().isNot(AsmToken::Comma))
4064     return TokError("vendor name required, comma expected");
4065   Lex();
4066 
4067   if (getLexer().isNot(AsmToken::String))
4068     return TokError("invalid vendor name");
4069 
4070   VendorName = getLexer().getTok().getStringContents();
4071   Lex();
4072 
4073   if (getLexer().isNot(AsmToken::Comma))
4074     return TokError("arch name required, comma expected");
4075   Lex();
4076 
4077   if (getLexer().isNot(AsmToken::String))
4078     return TokError("invalid arch name");
4079 
4080   ArchName = getLexer().getTok().getStringContents();
4081   Lex();
4082 
4083   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4084                                                     VendorName, ArchName);
4085   return false;
4086 }
4087 
4088 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4089                                                amd_kernel_code_t &Header) {
4090   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4091   // assembly for backwards compatibility.
4092   if (ID == "max_scratch_backing_memory_byte_size") {
4093     Parser.eatToEndOfStatement();
4094     return false;
4095   }
4096 
4097   SmallString<40> ErrStr;
4098   raw_svector_ostream Err(ErrStr);
4099   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4100     return TokError(Err.str());
4101   }
4102   Lex();
4103 
4104   if (ID == "enable_wavefront_size32") {
4105     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4106       if (!isGFX10())
4107         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4108       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4109         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4110     } else {
4111       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4112         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4113     }
4114   }
4115 
4116   if (ID == "wavefront_size") {
4117     if (Header.wavefront_size == 5) {
4118       if (!isGFX10())
4119         return TokError("wavefront_size=5 is only allowed on GFX10+");
4120       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4121         return TokError("wavefront_size=5 requires +WavefrontSize32");
4122     } else if (Header.wavefront_size == 6) {
4123       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4124         return TokError("wavefront_size=6 requires +WavefrontSize64");
4125     }
4126   }
4127 
4128   if (ID == "enable_wgp_mode") {
4129     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4130       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4131   }
4132 
4133   if (ID == "enable_mem_ordered") {
4134     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4135       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4136   }
4137 
4138   if (ID == "enable_fwd_progress") {
4139     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4140       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4141   }
4142 
4143   return false;
4144 }
4145 
4146 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4147   amd_kernel_code_t Header;
4148   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4149 
4150   while (true) {
4151     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4152     // will set the current token to EndOfStatement.
4153     while(getLexer().is(AsmToken::EndOfStatement))
4154       Lex();
4155 
4156     if (getLexer().isNot(AsmToken::Identifier))
4157       return TokError("expected value identifier or .end_amd_kernel_code_t");
4158 
4159     StringRef ID = getLexer().getTok().getIdentifier();
4160     Lex();
4161 
4162     if (ID == ".end_amd_kernel_code_t")
4163       break;
4164 
4165     if (ParseAMDKernelCodeTValue(ID, Header))
4166       return true;
4167   }
4168 
4169   getTargetStreamer().EmitAMDKernelCodeT(Header);
4170 
4171   return false;
4172 }
4173 
4174 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4175   if (getLexer().isNot(AsmToken::Identifier))
4176     return TokError("expected symbol name");
4177 
4178   StringRef KernelName = Parser.getTok().getString();
4179 
4180   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4181                                            ELF::STT_AMDGPU_HSA_KERNEL);
4182   Lex();
4183   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4184     KernelScope.initialize(getContext());
4185   return false;
4186 }
4187 
4188 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4189   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4190     return Error(getParser().getTok().getLoc(),
4191                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4192                  "architectures");
4193   }
4194 
4195   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4196 
4197   std::string ISAVersionStringFromSTI;
4198   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4199   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4200 
4201   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4202     return Error(getParser().getTok().getLoc(),
4203                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4204                  "arguments specified through the command line");
4205   }
4206 
4207   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4208   Lex();
4209 
4210   return false;
4211 }
4212 
4213 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4214   const char *AssemblerDirectiveBegin;
4215   const char *AssemblerDirectiveEnd;
4216   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4217       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4218           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4219                             HSAMD::V3::AssemblerDirectiveEnd)
4220           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4221                             HSAMD::AssemblerDirectiveEnd);
4222 
4223   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4224     return Error(getParser().getTok().getLoc(),
4225                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4226                  "not available on non-amdhsa OSes")).str());
4227   }
4228 
4229   std::string HSAMetadataString;
4230   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4231                           HSAMetadataString))
4232     return true;
4233 
4234   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4235     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4236       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4237   } else {
4238     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4239       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4240   }
4241 
4242   return false;
4243 }
4244 
4245 /// Common code to parse out a block of text (typically YAML) between start and
4246 /// end directives.
4247 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4248                                           const char *AssemblerDirectiveEnd,
4249                                           std::string &CollectString) {
4250 
4251   raw_string_ostream CollectStream(CollectString);
4252 
4253   getLexer().setSkipSpace(false);
4254 
4255   bool FoundEnd = false;
4256   while (!getLexer().is(AsmToken::Eof)) {
4257     while (getLexer().is(AsmToken::Space)) {
4258       CollectStream << getLexer().getTok().getString();
4259       Lex();
4260     }
4261 
4262     if (getLexer().is(AsmToken::Identifier)) {
4263       StringRef ID = getLexer().getTok().getIdentifier();
4264       if (ID == AssemblerDirectiveEnd) {
4265         Lex();
4266         FoundEnd = true;
4267         break;
4268       }
4269     }
4270 
4271     CollectStream << Parser.parseStringToEndOfStatement()
4272                   << getContext().getAsmInfo()->getSeparatorString();
4273 
4274     Parser.eatToEndOfStatement();
4275   }
4276 
4277   getLexer().setSkipSpace(true);
4278 
4279   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4280     return TokError(Twine("expected directive ") +
4281                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4282   }
4283 
4284   CollectStream.flush();
4285   return false;
4286 }
4287 
4288 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4289 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4290   std::string String;
4291   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4292                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4293     return true;
4294 
4295   auto PALMetadata = getTargetStreamer().getPALMetadata();
4296   if (!PALMetadata->setFromString(String))
4297     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4298   return false;
4299 }
4300 
4301 /// Parse the assembler directive for old linear-format PAL metadata.
4302 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4303   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4304     return Error(getParser().getTok().getLoc(),
4305                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4306                  "not available on non-amdpal OSes")).str());
4307   }
4308 
4309   auto PALMetadata = getTargetStreamer().getPALMetadata();
4310   PALMetadata->setLegacy();
4311   for (;;) {
4312     uint32_t Key, Value;
4313     if (ParseAsAbsoluteExpression(Key)) {
4314       return TokError(Twine("invalid value in ") +
4315                       Twine(PALMD::AssemblerDirective));
4316     }
4317     if (getLexer().isNot(AsmToken::Comma)) {
4318       return TokError(Twine("expected an even number of values in ") +
4319                       Twine(PALMD::AssemblerDirective));
4320     }
4321     Lex();
4322     if (ParseAsAbsoluteExpression(Value)) {
4323       return TokError(Twine("invalid value in ") +
4324                       Twine(PALMD::AssemblerDirective));
4325     }
4326     PALMetadata->setRegister(Key, Value);
4327     if (getLexer().isNot(AsmToken::Comma))
4328       break;
4329     Lex();
4330   }
4331   return false;
4332 }
4333 
4334 /// ParseDirectiveAMDGPULDS
4335 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4336 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4337   if (getParser().checkForValidSection())
4338     return true;
4339 
4340   StringRef Name;
4341   SMLoc NameLoc = getLexer().getLoc();
4342   if (getParser().parseIdentifier(Name))
4343     return TokError("expected identifier in directive");
4344 
4345   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4346   if (parseToken(AsmToken::Comma, "expected ','"))
4347     return true;
4348 
4349   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4350 
4351   int64_t Size;
4352   SMLoc SizeLoc = getLexer().getLoc();
4353   if (getParser().parseAbsoluteExpression(Size))
4354     return true;
4355   if (Size < 0)
4356     return Error(SizeLoc, "size must be non-negative");
4357   if (Size > LocalMemorySize)
4358     return Error(SizeLoc, "size is too large");
4359 
4360   int64_t Align = 4;
4361   if (getLexer().is(AsmToken::Comma)) {
4362     Lex();
4363     SMLoc AlignLoc = getLexer().getLoc();
4364     if (getParser().parseAbsoluteExpression(Align))
4365       return true;
4366     if (Align < 0 || !isPowerOf2_64(Align))
4367       return Error(AlignLoc, "alignment must be a power of two");
4368 
4369     // Alignment larger than the size of LDS is possible in theory, as long
4370     // as the linker manages to place to symbol at address 0, but we do want
4371     // to make sure the alignment fits nicely into a 32-bit integer.
4372     if (Align >= 1u << 31)
4373       return Error(AlignLoc, "alignment is too large");
4374   }
4375 
4376   if (parseToken(AsmToken::EndOfStatement,
4377                  "unexpected token in '.amdgpu_lds' directive"))
4378     return true;
4379 
4380   Symbol->redefineIfPossible();
4381   if (!Symbol->isUndefined())
4382     return Error(NameLoc, "invalid symbol redefinition");
4383 
4384   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4385   return false;
4386 }
4387 
4388 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4389   StringRef IDVal = DirectiveID.getString();
4390 
4391   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4392     if (IDVal == ".amdgcn_target")
4393       return ParseDirectiveAMDGCNTarget();
4394 
4395     if (IDVal == ".amdhsa_kernel")
4396       return ParseDirectiveAMDHSAKernel();
4397 
4398     // TODO: Restructure/combine with PAL metadata directive.
4399     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4400       return ParseDirectiveHSAMetadata();
4401   } else {
4402     if (IDVal == ".hsa_code_object_version")
4403       return ParseDirectiveHSACodeObjectVersion();
4404 
4405     if (IDVal == ".hsa_code_object_isa")
4406       return ParseDirectiveHSACodeObjectISA();
4407 
4408     if (IDVal == ".amd_kernel_code_t")
4409       return ParseDirectiveAMDKernelCodeT();
4410 
4411     if (IDVal == ".amdgpu_hsa_kernel")
4412       return ParseDirectiveAMDGPUHsaKernel();
4413 
4414     if (IDVal == ".amd_amdgpu_isa")
4415       return ParseDirectiveISAVersion();
4416 
4417     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4418       return ParseDirectiveHSAMetadata();
4419   }
4420 
4421   if (IDVal == ".amdgpu_lds")
4422     return ParseDirectiveAMDGPULDS();
4423 
4424   if (IDVal == PALMD::AssemblerDirectiveBegin)
4425     return ParseDirectivePALMetadataBegin();
4426 
4427   if (IDVal == PALMD::AssemblerDirective)
4428     return ParseDirectivePALMetadata();
4429 
4430   return true;
4431 }
4432 
4433 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4434                                            unsigned RegNo) const {
4435 
4436   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4437        R.isValid(); ++R) {
4438     if (*R == RegNo)
4439       return isGFX9() || isGFX10();
4440   }
4441 
4442   // GFX10 has 2 more SGPRs 104 and 105.
4443   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4444        R.isValid(); ++R) {
4445     if (*R == RegNo)
4446       return hasSGPR104_SGPR105();
4447   }
4448 
4449   switch (RegNo) {
4450   case AMDGPU::SRC_SHARED_BASE:
4451   case AMDGPU::SRC_SHARED_LIMIT:
4452   case AMDGPU::SRC_PRIVATE_BASE:
4453   case AMDGPU::SRC_PRIVATE_LIMIT:
4454   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4455     return !isCI() && !isSI() && !isVI();
4456   case AMDGPU::TBA:
4457   case AMDGPU::TBA_LO:
4458   case AMDGPU::TBA_HI:
4459   case AMDGPU::TMA:
4460   case AMDGPU::TMA_LO:
4461   case AMDGPU::TMA_HI:
4462     return !isGFX9() && !isGFX10();
4463   case AMDGPU::XNACK_MASK:
4464   case AMDGPU::XNACK_MASK_LO:
4465   case AMDGPU::XNACK_MASK_HI:
4466     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4467   case AMDGPU::SGPR_NULL:
4468     return isGFX10();
4469   default:
4470     break;
4471   }
4472 
4473   if (isCI())
4474     return true;
4475 
4476   if (isSI() || isGFX10()) {
4477     // No flat_scr on SI.
4478     // On GFX10 flat scratch is not a valid register operand and can only be
4479     // accessed with s_setreg/s_getreg.
4480     switch (RegNo) {
4481     case AMDGPU::FLAT_SCR:
4482     case AMDGPU::FLAT_SCR_LO:
4483     case AMDGPU::FLAT_SCR_HI:
4484       return false;
4485     default:
4486       return true;
4487     }
4488   }
4489 
4490   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4491   // SI/CI have.
4492   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4493        R.isValid(); ++R) {
4494     if (*R == RegNo)
4495       return hasSGPR102_SGPR103();
4496   }
4497 
4498   return true;
4499 }
4500 
4501 OperandMatchResultTy
4502 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4503                               OperandMode Mode) {
4504   // Try to parse with a custom parser
4505   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4506 
4507   // If we successfully parsed the operand or if there as an error parsing,
4508   // we are done.
4509   //
4510   // If we are parsing after we reach EndOfStatement then this means we
4511   // are appending default values to the Operands list.  This is only done
4512   // by custom parser, so we shouldn't continue on to the generic parsing.
4513   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4514       getLexer().is(AsmToken::EndOfStatement))
4515     return ResTy;
4516 
4517   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4518     unsigned Prefix = Operands.size();
4519     SMLoc LBraceLoc = getTok().getLoc();
4520     Parser.Lex(); // eat the '['
4521 
4522     for (;;) {
4523       ResTy = parseReg(Operands);
4524       if (ResTy != MatchOperand_Success)
4525         return ResTy;
4526 
4527       if (getLexer().is(AsmToken::RBrac))
4528         break;
4529 
4530       if (getLexer().isNot(AsmToken::Comma))
4531         return MatchOperand_ParseFail;
4532       Parser.Lex();
4533     }
4534 
4535     if (Operands.size() - Prefix > 1) {
4536       Operands.insert(Operands.begin() + Prefix,
4537                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4538       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4539                                                     getTok().getLoc()));
4540     }
4541 
4542     Parser.Lex(); // eat the ']'
4543     return MatchOperand_Success;
4544   }
4545 
4546   return parseRegOrImm(Operands);
4547 }
4548 
4549 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4550   // Clear any forced encodings from the previous instruction.
4551   setForcedEncodingSize(0);
4552   setForcedDPP(false);
4553   setForcedSDWA(false);
4554 
4555   if (Name.endswith("_e64")) {
4556     setForcedEncodingSize(64);
4557     return Name.substr(0, Name.size() - 4);
4558   } else if (Name.endswith("_e32")) {
4559     setForcedEncodingSize(32);
4560     return Name.substr(0, Name.size() - 4);
4561   } else if (Name.endswith("_dpp")) {
4562     setForcedDPP(true);
4563     return Name.substr(0, Name.size() - 4);
4564   } else if (Name.endswith("_sdwa")) {
4565     setForcedSDWA(true);
4566     return Name.substr(0, Name.size() - 5);
4567   }
4568   return Name;
4569 }
4570 
4571 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4572                                        StringRef Name,
4573                                        SMLoc NameLoc, OperandVector &Operands) {
4574   // Add the instruction mnemonic
4575   Name = parseMnemonicSuffix(Name);
4576   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4577 
4578   bool IsMIMG = Name.startswith("image_");
4579 
4580   while (!getLexer().is(AsmToken::EndOfStatement)) {
4581     OperandMode Mode = OperandMode_Default;
4582     if (IsMIMG && isGFX10() && Operands.size() == 2)
4583       Mode = OperandMode_NSA;
4584     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4585 
4586     // Eat the comma or space if there is one.
4587     if (getLexer().is(AsmToken::Comma))
4588       Parser.Lex();
4589 
4590     switch (Res) {
4591       case MatchOperand_Success: break;
4592       case MatchOperand_ParseFail:
4593         // FIXME: use real operand location rather than the current location.
4594         Error(getLexer().getLoc(), "failed parsing operand.");
4595         while (!getLexer().is(AsmToken::EndOfStatement)) {
4596           Parser.Lex();
4597         }
4598         return true;
4599       case MatchOperand_NoMatch:
4600         // FIXME: use real operand location rather than the current location.
4601         Error(getLexer().getLoc(), "not a valid operand.");
4602         while (!getLexer().is(AsmToken::EndOfStatement)) {
4603           Parser.Lex();
4604         }
4605         return true;
4606     }
4607   }
4608 
4609   return false;
4610 }
4611 
4612 //===----------------------------------------------------------------------===//
4613 // Utility functions
4614 //===----------------------------------------------------------------------===//
4615 
4616 OperandMatchResultTy
4617 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4618 
4619   if (!trySkipId(Prefix, AsmToken::Colon))
4620     return MatchOperand_NoMatch;
4621 
4622   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4623 }
4624 
4625 OperandMatchResultTy
4626 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4627                                     AMDGPUOperand::ImmTy ImmTy,
4628                                     bool (*ConvertResult)(int64_t&)) {
4629   SMLoc S = getLoc();
4630   int64_t Value = 0;
4631 
4632   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4633   if (Res != MatchOperand_Success)
4634     return Res;
4635 
4636   if (ConvertResult && !ConvertResult(Value)) {
4637     Error(S, "invalid " + StringRef(Prefix) + " value.");
4638   }
4639 
4640   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4641   return MatchOperand_Success;
4642 }
4643 
4644 OperandMatchResultTy
4645 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4646                                              OperandVector &Operands,
4647                                              AMDGPUOperand::ImmTy ImmTy,
4648                                              bool (*ConvertResult)(int64_t&)) {
4649   SMLoc S = getLoc();
4650   if (!trySkipId(Prefix, AsmToken::Colon))
4651     return MatchOperand_NoMatch;
4652 
4653   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4654     return MatchOperand_ParseFail;
4655 
4656   unsigned Val = 0;
4657   const unsigned MaxSize = 4;
4658 
4659   // FIXME: How to verify the number of elements matches the number of src
4660   // operands?
4661   for (int I = 0; ; ++I) {
4662     int64_t Op;
4663     SMLoc Loc = getLoc();
4664     if (!parseExpr(Op))
4665       return MatchOperand_ParseFail;
4666 
4667     if (Op != 0 && Op != 1) {
4668       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4669       return MatchOperand_ParseFail;
4670     }
4671 
4672     Val |= (Op << I);
4673 
4674     if (trySkipToken(AsmToken::RBrac))
4675       break;
4676 
4677     if (I + 1 == MaxSize) {
4678       Error(getLoc(), "expected a closing square bracket");
4679       return MatchOperand_ParseFail;
4680     }
4681 
4682     if (!skipToken(AsmToken::Comma, "expected a comma"))
4683       return MatchOperand_ParseFail;
4684   }
4685 
4686   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4687   return MatchOperand_Success;
4688 }
4689 
4690 OperandMatchResultTy
4691 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4692                                AMDGPUOperand::ImmTy ImmTy) {
4693   int64_t Bit = 0;
4694   SMLoc S = Parser.getTok().getLoc();
4695 
4696   // We are at the end of the statement, and this is a default argument, so
4697   // use a default value.
4698   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4699     switch(getLexer().getKind()) {
4700       case AsmToken::Identifier: {
4701         StringRef Tok = Parser.getTok().getString();
4702         if (Tok == Name) {
4703           if (Tok == "r128" && !hasMIMG_R128())
4704             Error(S, "r128 modifier is not supported on this GPU");
4705           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
4706             Error(S, "a16 modifier is not supported on this GPU");
4707           Bit = 1;
4708           Parser.Lex();
4709         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4710           Bit = 0;
4711           Parser.Lex();
4712         } else {
4713           return MatchOperand_NoMatch;
4714         }
4715         break;
4716       }
4717       default:
4718         return MatchOperand_NoMatch;
4719     }
4720   }
4721 
4722   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4723     return MatchOperand_ParseFail;
4724 
4725   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
4726     ImmTy = AMDGPUOperand::ImmTyR128A16;
4727 
4728   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4729   return MatchOperand_Success;
4730 }
4731 
4732 static void addOptionalImmOperand(
4733   MCInst& Inst, const OperandVector& Operands,
4734   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4735   AMDGPUOperand::ImmTy ImmT,
4736   int64_t Default = 0) {
4737   auto i = OptionalIdx.find(ImmT);
4738   if (i != OptionalIdx.end()) {
4739     unsigned Idx = i->second;
4740     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4741   } else {
4742     Inst.addOperand(MCOperand::createImm(Default));
4743   }
4744 }
4745 
4746 OperandMatchResultTy
4747 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4748   if (getLexer().isNot(AsmToken::Identifier)) {
4749     return MatchOperand_NoMatch;
4750   }
4751   StringRef Tok = Parser.getTok().getString();
4752   if (Tok != Prefix) {
4753     return MatchOperand_NoMatch;
4754   }
4755 
4756   Parser.Lex();
4757   if (getLexer().isNot(AsmToken::Colon)) {
4758     return MatchOperand_ParseFail;
4759   }
4760 
4761   Parser.Lex();
4762   if (getLexer().isNot(AsmToken::Identifier)) {
4763     return MatchOperand_ParseFail;
4764   }
4765 
4766   Value = Parser.getTok().getString();
4767   return MatchOperand_Success;
4768 }
4769 
4770 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4771 // values to live in a joint format operand in the MCInst encoding.
4772 OperandMatchResultTy
4773 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4774   SMLoc S = Parser.getTok().getLoc();
4775   int64_t Dfmt = 0, Nfmt = 0;
4776   // dfmt and nfmt can appear in either order, and each is optional.
4777   bool GotDfmt = false, GotNfmt = false;
4778   while (!GotDfmt || !GotNfmt) {
4779     if (!GotDfmt) {
4780       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4781       if (Res != MatchOperand_NoMatch) {
4782         if (Res != MatchOperand_Success)
4783           return Res;
4784         if (Dfmt >= 16) {
4785           Error(Parser.getTok().getLoc(), "out of range dfmt");
4786           return MatchOperand_ParseFail;
4787         }
4788         GotDfmt = true;
4789         Parser.Lex();
4790         continue;
4791       }
4792     }
4793     if (!GotNfmt) {
4794       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4795       if (Res != MatchOperand_NoMatch) {
4796         if (Res != MatchOperand_Success)
4797           return Res;
4798         if (Nfmt >= 8) {
4799           Error(Parser.getTok().getLoc(), "out of range nfmt");
4800           return MatchOperand_ParseFail;
4801         }
4802         GotNfmt = true;
4803         Parser.Lex();
4804         continue;
4805       }
4806     }
4807     break;
4808   }
4809   if (!GotDfmt && !GotNfmt)
4810     return MatchOperand_NoMatch;
4811   auto Format = Dfmt | Nfmt << 4;
4812   Operands.push_back(
4813       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4814   return MatchOperand_Success;
4815 }
4816 
4817 //===----------------------------------------------------------------------===//
4818 // ds
4819 //===----------------------------------------------------------------------===//
4820 
4821 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4822                                     const OperandVector &Operands) {
4823   OptionalImmIndexMap OptionalIdx;
4824 
4825   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4826     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4827 
4828     // Add the register arguments
4829     if (Op.isReg()) {
4830       Op.addRegOperands(Inst, 1);
4831       continue;
4832     }
4833 
4834     // Handle optional arguments
4835     OptionalIdx[Op.getImmTy()] = i;
4836   }
4837 
4838   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4839   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4840   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4841 
4842   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4843 }
4844 
4845 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4846                                 bool IsGdsHardcoded) {
4847   OptionalImmIndexMap OptionalIdx;
4848 
4849   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4850     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4851 
4852     // Add the register arguments
4853     if (Op.isReg()) {
4854       Op.addRegOperands(Inst, 1);
4855       continue;
4856     }
4857 
4858     if (Op.isToken() && Op.getToken() == "gds") {
4859       IsGdsHardcoded = true;
4860       continue;
4861     }
4862 
4863     // Handle optional arguments
4864     OptionalIdx[Op.getImmTy()] = i;
4865   }
4866 
4867   AMDGPUOperand::ImmTy OffsetType =
4868     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4869      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4870      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4871                                                       AMDGPUOperand::ImmTyOffset;
4872 
4873   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4874 
4875   if (!IsGdsHardcoded) {
4876     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4877   }
4878   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4879 }
4880 
4881 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4882   OptionalImmIndexMap OptionalIdx;
4883 
4884   unsigned OperandIdx[4];
4885   unsigned EnMask = 0;
4886   int SrcIdx = 0;
4887 
4888   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4889     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4890 
4891     // Add the register arguments
4892     if (Op.isReg()) {
4893       assert(SrcIdx < 4);
4894       OperandIdx[SrcIdx] = Inst.size();
4895       Op.addRegOperands(Inst, 1);
4896       ++SrcIdx;
4897       continue;
4898     }
4899 
4900     if (Op.isOff()) {
4901       assert(SrcIdx < 4);
4902       OperandIdx[SrcIdx] = Inst.size();
4903       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4904       ++SrcIdx;
4905       continue;
4906     }
4907 
4908     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4909       Op.addImmOperands(Inst, 1);
4910       continue;
4911     }
4912 
4913     if (Op.isToken() && Op.getToken() == "done")
4914       continue;
4915 
4916     // Handle optional arguments
4917     OptionalIdx[Op.getImmTy()] = i;
4918   }
4919 
4920   assert(SrcIdx == 4);
4921 
4922   bool Compr = false;
4923   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4924     Compr = true;
4925     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4926     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4927     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4928   }
4929 
4930   for (auto i = 0; i < SrcIdx; ++i) {
4931     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4932       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4933     }
4934   }
4935 
4936   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4937   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4938 
4939   Inst.addOperand(MCOperand::createImm(EnMask));
4940 }
4941 
4942 //===----------------------------------------------------------------------===//
4943 // s_waitcnt
4944 //===----------------------------------------------------------------------===//
4945 
4946 static bool
4947 encodeCnt(
4948   const AMDGPU::IsaVersion ISA,
4949   int64_t &IntVal,
4950   int64_t CntVal,
4951   bool Saturate,
4952   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4953   unsigned (*decode)(const IsaVersion &Version, unsigned))
4954 {
4955   bool Failed = false;
4956 
4957   IntVal = encode(ISA, IntVal, CntVal);
4958   if (CntVal != decode(ISA, IntVal)) {
4959     if (Saturate) {
4960       IntVal = encode(ISA, IntVal, -1);
4961     } else {
4962       Failed = true;
4963     }
4964   }
4965   return Failed;
4966 }
4967 
4968 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4969 
4970   SMLoc CntLoc = getLoc();
4971   StringRef CntName = getTokenStr();
4972 
4973   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4974       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4975     return false;
4976 
4977   int64_t CntVal;
4978   SMLoc ValLoc = getLoc();
4979   if (!parseExpr(CntVal))
4980     return false;
4981 
4982   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4983 
4984   bool Failed = true;
4985   bool Sat = CntName.endswith("_sat");
4986 
4987   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4988     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4989   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4990     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4991   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4992     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4993   } else {
4994     Error(CntLoc, "invalid counter name " + CntName);
4995     return false;
4996   }
4997 
4998   if (Failed) {
4999     Error(ValLoc, "too large value for " + CntName);
5000     return false;
5001   }
5002 
5003   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5004     return false;
5005 
5006   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5007     if (isToken(AsmToken::EndOfStatement)) {
5008       Error(getLoc(), "expected a counter name");
5009       return false;
5010     }
5011   }
5012 
5013   return true;
5014 }
5015 
5016 OperandMatchResultTy
5017 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5018   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5019   int64_t Waitcnt = getWaitcntBitMask(ISA);
5020   SMLoc S = getLoc();
5021 
5022   // If parse failed, do not return error code
5023   // to avoid excessive error messages.
5024   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5025     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
5026   } else {
5027     parseExpr(Waitcnt);
5028   }
5029 
5030   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5031   return MatchOperand_Success;
5032 }
5033 
5034 bool
5035 AMDGPUOperand::isSWaitCnt() const {
5036   return isImm();
5037 }
5038 
5039 //===----------------------------------------------------------------------===//
5040 // hwreg
5041 //===----------------------------------------------------------------------===//
5042 
5043 bool
5044 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5045                                 int64_t &Offset,
5046                                 int64_t &Width) {
5047   using namespace llvm::AMDGPU::Hwreg;
5048 
5049   // The register may be specified by name or using a numeric code
5050   if (isToken(AsmToken::Identifier) &&
5051       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5052     HwReg.IsSymbolic = true;
5053     lex(); // skip message name
5054   } else if (!parseExpr(HwReg.Id)) {
5055     return false;
5056   }
5057 
5058   if (trySkipToken(AsmToken::RParen))
5059     return true;
5060 
5061   // parse optional params
5062   return
5063     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
5064     parseExpr(Offset) &&
5065     skipToken(AsmToken::Comma, "expected a comma") &&
5066     parseExpr(Width) &&
5067     skipToken(AsmToken::RParen, "expected a closing parenthesis");
5068 }
5069 
5070 bool
5071 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5072                                const int64_t Offset,
5073                                const int64_t Width,
5074                                const SMLoc Loc) {
5075 
5076   using namespace llvm::AMDGPU::Hwreg;
5077 
5078   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5079     Error(Loc, "specified hardware register is not supported on this GPU");
5080     return false;
5081   } else if (!isValidHwreg(HwReg.Id)) {
5082     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
5083     return false;
5084   } else if (!isValidHwregOffset(Offset)) {
5085     Error(Loc, "invalid bit offset: only 5-bit values are legal");
5086     return false;
5087   } else if (!isValidHwregWidth(Width)) {
5088     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
5089     return false;
5090   }
5091   return true;
5092 }
5093 
5094 OperandMatchResultTy
5095 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5096   using namespace llvm::AMDGPU::Hwreg;
5097 
5098   int64_t ImmVal = 0;
5099   SMLoc Loc = getLoc();
5100 
5101   // If parse failed, do not return error code
5102   // to avoid excessive error messages.
5103   if (trySkipId("hwreg", AsmToken::LParen)) {
5104     OperandInfoTy HwReg(ID_UNKNOWN_);
5105     int64_t Offset = OFFSET_DEFAULT_;
5106     int64_t Width = WIDTH_DEFAULT_;
5107     if (parseHwregBody(HwReg, Offset, Width) &&
5108         validateHwreg(HwReg, Offset, Width, Loc)) {
5109       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5110     }
5111   } else if (parseExpr(ImmVal)) {
5112     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5113       Error(Loc, "invalid immediate: only 16-bit values are legal");
5114   }
5115 
5116   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5117   return MatchOperand_Success;
5118 }
5119 
5120 bool AMDGPUOperand::isHwreg() const {
5121   return isImmTy(ImmTyHwreg);
5122 }
5123 
5124 //===----------------------------------------------------------------------===//
5125 // sendmsg
5126 //===----------------------------------------------------------------------===//
5127 
5128 bool
5129 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5130                                   OperandInfoTy &Op,
5131                                   OperandInfoTy &Stream) {
5132   using namespace llvm::AMDGPU::SendMsg;
5133 
5134   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5135     Msg.IsSymbolic = true;
5136     lex(); // skip message name
5137   } else if (!parseExpr(Msg.Id)) {
5138     return false;
5139   }
5140 
5141   if (trySkipToken(AsmToken::Comma)) {
5142     Op.IsDefined = true;
5143     if (isToken(AsmToken::Identifier) &&
5144         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5145       lex(); // skip operation name
5146     } else if (!parseExpr(Op.Id)) {
5147       return false;
5148     }
5149 
5150     if (trySkipToken(AsmToken::Comma)) {
5151       Stream.IsDefined = true;
5152       if (!parseExpr(Stream.Id))
5153         return false;
5154     }
5155   }
5156 
5157   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5158 }
5159 
5160 bool
5161 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5162                                  const OperandInfoTy &Op,
5163                                  const OperandInfoTy &Stream,
5164                                  const SMLoc S) {
5165   using namespace llvm::AMDGPU::SendMsg;
5166 
5167   // Validation strictness depends on whether message is specified
5168   // in a symbolc or in a numeric form. In the latter case
5169   // only encoding possibility is checked.
5170   bool Strict = Msg.IsSymbolic;
5171 
5172   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5173     Error(S, "invalid message id");
5174     return false;
5175   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5176     Error(S, Op.IsDefined ?
5177              "message does not support operations" :
5178              "missing message operation");
5179     return false;
5180   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5181     Error(S, "invalid operation id");
5182     return false;
5183   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5184     Error(S, "message operation does not support streams");
5185     return false;
5186   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5187     Error(S, "invalid message stream id");
5188     return false;
5189   }
5190   return true;
5191 }
5192 
5193 OperandMatchResultTy
5194 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5195   using namespace llvm::AMDGPU::SendMsg;
5196 
5197   int64_t ImmVal = 0;
5198   SMLoc Loc = getLoc();
5199 
5200   // If parse failed, do not return error code
5201   // to avoid excessive error messages.
5202   if (trySkipId("sendmsg", AsmToken::LParen)) {
5203     OperandInfoTy Msg(ID_UNKNOWN_);
5204     OperandInfoTy Op(OP_NONE_);
5205     OperandInfoTy Stream(STREAM_ID_NONE_);
5206     if (parseSendMsgBody(Msg, Op, Stream) &&
5207         validateSendMsg(Msg, Op, Stream, Loc)) {
5208       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5209     }
5210   } else if (parseExpr(ImmVal)) {
5211     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5212       Error(Loc, "invalid immediate: only 16-bit values are legal");
5213   }
5214 
5215   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5216   return MatchOperand_Success;
5217 }
5218 
5219 bool AMDGPUOperand::isSendMsg() const {
5220   return isImmTy(ImmTySendMsg);
5221 }
5222 
5223 //===----------------------------------------------------------------------===//
5224 // v_interp
5225 //===----------------------------------------------------------------------===//
5226 
5227 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5228   if (getLexer().getKind() != AsmToken::Identifier)
5229     return MatchOperand_NoMatch;
5230 
5231   StringRef Str = Parser.getTok().getString();
5232   int Slot = StringSwitch<int>(Str)
5233     .Case("p10", 0)
5234     .Case("p20", 1)
5235     .Case("p0", 2)
5236     .Default(-1);
5237 
5238   SMLoc S = Parser.getTok().getLoc();
5239   if (Slot == -1)
5240     return MatchOperand_ParseFail;
5241 
5242   Parser.Lex();
5243   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5244                                               AMDGPUOperand::ImmTyInterpSlot));
5245   return MatchOperand_Success;
5246 }
5247 
5248 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5249   if (getLexer().getKind() != AsmToken::Identifier)
5250     return MatchOperand_NoMatch;
5251 
5252   StringRef Str = Parser.getTok().getString();
5253   if (!Str.startswith("attr"))
5254     return MatchOperand_NoMatch;
5255 
5256   StringRef Chan = Str.take_back(2);
5257   int AttrChan = StringSwitch<int>(Chan)
5258     .Case(".x", 0)
5259     .Case(".y", 1)
5260     .Case(".z", 2)
5261     .Case(".w", 3)
5262     .Default(-1);
5263   if (AttrChan == -1)
5264     return MatchOperand_ParseFail;
5265 
5266   Str = Str.drop_back(2).drop_front(4);
5267 
5268   uint8_t Attr;
5269   if (Str.getAsInteger(10, Attr))
5270     return MatchOperand_ParseFail;
5271 
5272   SMLoc S = Parser.getTok().getLoc();
5273   Parser.Lex();
5274   if (Attr > 63) {
5275     Error(S, "out of bounds attr");
5276     return MatchOperand_Success;
5277   }
5278 
5279   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5280 
5281   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5282                                               AMDGPUOperand::ImmTyInterpAttr));
5283   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5284                                               AMDGPUOperand::ImmTyAttrChan));
5285   return MatchOperand_Success;
5286 }
5287 
5288 //===----------------------------------------------------------------------===//
5289 // exp
5290 //===----------------------------------------------------------------------===//
5291 
5292 void AMDGPUAsmParser::errorExpTgt() {
5293   Error(Parser.getTok().getLoc(), "invalid exp target");
5294 }
5295 
5296 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5297                                                       uint8_t &Val) {
5298   if (Str == "null") {
5299     Val = 9;
5300     return MatchOperand_Success;
5301   }
5302 
5303   if (Str.startswith("mrt")) {
5304     Str = Str.drop_front(3);
5305     if (Str == "z") { // == mrtz
5306       Val = 8;
5307       return MatchOperand_Success;
5308     }
5309 
5310     if (Str.getAsInteger(10, Val))
5311       return MatchOperand_ParseFail;
5312 
5313     if (Val > 7)
5314       errorExpTgt();
5315 
5316     return MatchOperand_Success;
5317   }
5318 
5319   if (Str.startswith("pos")) {
5320     Str = Str.drop_front(3);
5321     if (Str.getAsInteger(10, Val))
5322       return MatchOperand_ParseFail;
5323 
5324     if (Val > 4 || (Val == 4 && !isGFX10()))
5325       errorExpTgt();
5326 
5327     Val += 12;
5328     return MatchOperand_Success;
5329   }
5330 
5331   if (isGFX10() && Str == "prim") {
5332     Val = 20;
5333     return MatchOperand_Success;
5334   }
5335 
5336   if (Str.startswith("param")) {
5337     Str = Str.drop_front(5);
5338     if (Str.getAsInteger(10, Val))
5339       return MatchOperand_ParseFail;
5340 
5341     if (Val >= 32)
5342       errorExpTgt();
5343 
5344     Val += 32;
5345     return MatchOperand_Success;
5346   }
5347 
5348   if (Str.startswith("invalid_target_")) {
5349     Str = Str.drop_front(15);
5350     if (Str.getAsInteger(10, Val))
5351       return MatchOperand_ParseFail;
5352 
5353     errorExpTgt();
5354     return MatchOperand_Success;
5355   }
5356 
5357   return MatchOperand_NoMatch;
5358 }
5359 
5360 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5361   uint8_t Val;
5362   StringRef Str = Parser.getTok().getString();
5363 
5364   auto Res = parseExpTgtImpl(Str, Val);
5365   if (Res != MatchOperand_Success)
5366     return Res;
5367 
5368   SMLoc S = Parser.getTok().getLoc();
5369   Parser.Lex();
5370 
5371   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5372                                               AMDGPUOperand::ImmTyExpTgt));
5373   return MatchOperand_Success;
5374 }
5375 
5376 //===----------------------------------------------------------------------===//
5377 // parser helpers
5378 //===----------------------------------------------------------------------===//
5379 
5380 bool
5381 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5382   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5383 }
5384 
5385 bool
5386 AMDGPUAsmParser::isId(const StringRef Id) const {
5387   return isId(getToken(), Id);
5388 }
5389 
5390 bool
5391 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5392   return getTokenKind() == Kind;
5393 }
5394 
5395 bool
5396 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5397   if (isId(Id)) {
5398     lex();
5399     return true;
5400   }
5401   return false;
5402 }
5403 
5404 bool
5405 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5406   if (isId(Id) && peekToken().is(Kind)) {
5407     lex();
5408     lex();
5409     return true;
5410   }
5411   return false;
5412 }
5413 
5414 bool
5415 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5416   if (isToken(Kind)) {
5417     lex();
5418     return true;
5419   }
5420   return false;
5421 }
5422 
5423 bool
5424 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5425                            const StringRef ErrMsg) {
5426   if (!trySkipToken(Kind)) {
5427     Error(getLoc(), ErrMsg);
5428     return false;
5429   }
5430   return true;
5431 }
5432 
5433 bool
5434 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5435   return !getParser().parseAbsoluteExpression(Imm);
5436 }
5437 
5438 bool
5439 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5440   SMLoc S = getLoc();
5441 
5442   const MCExpr *Expr;
5443   if (Parser.parseExpression(Expr))
5444     return false;
5445 
5446   int64_t IntVal;
5447   if (Expr->evaluateAsAbsolute(IntVal)) {
5448     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5449   } else {
5450     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5451   }
5452   return true;
5453 }
5454 
5455 bool
5456 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5457   if (isToken(AsmToken::String)) {
5458     Val = getToken().getStringContents();
5459     lex();
5460     return true;
5461   } else {
5462     Error(getLoc(), ErrMsg);
5463     return false;
5464   }
5465 }
5466 
5467 AsmToken
5468 AMDGPUAsmParser::getToken() const {
5469   return Parser.getTok();
5470 }
5471 
5472 AsmToken
5473 AMDGPUAsmParser::peekToken() {
5474   return getLexer().peekTok();
5475 }
5476 
5477 void
5478 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5479   auto TokCount = getLexer().peekTokens(Tokens);
5480 
5481   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5482     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5483 }
5484 
5485 AsmToken::TokenKind
5486 AMDGPUAsmParser::getTokenKind() const {
5487   return getLexer().getKind();
5488 }
5489 
5490 SMLoc
5491 AMDGPUAsmParser::getLoc() const {
5492   return getToken().getLoc();
5493 }
5494 
5495 StringRef
5496 AMDGPUAsmParser::getTokenStr() const {
5497   return getToken().getString();
5498 }
5499 
5500 void
5501 AMDGPUAsmParser::lex() {
5502   Parser.Lex();
5503 }
5504 
5505 //===----------------------------------------------------------------------===//
5506 // swizzle
5507 //===----------------------------------------------------------------------===//
5508 
5509 LLVM_READNONE
5510 static unsigned
5511 encodeBitmaskPerm(const unsigned AndMask,
5512                   const unsigned OrMask,
5513                   const unsigned XorMask) {
5514   using namespace llvm::AMDGPU::Swizzle;
5515 
5516   return BITMASK_PERM_ENC |
5517          (AndMask << BITMASK_AND_SHIFT) |
5518          (OrMask  << BITMASK_OR_SHIFT)  |
5519          (XorMask << BITMASK_XOR_SHIFT);
5520 }
5521 
5522 bool
5523 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5524                                       const unsigned MinVal,
5525                                       const unsigned MaxVal,
5526                                       const StringRef ErrMsg) {
5527   for (unsigned i = 0; i < OpNum; ++i) {
5528     if (!skipToken(AsmToken::Comma, "expected a comma")){
5529       return false;
5530     }
5531     SMLoc ExprLoc = Parser.getTok().getLoc();
5532     if (!parseExpr(Op[i])) {
5533       return false;
5534     }
5535     if (Op[i] < MinVal || Op[i] > MaxVal) {
5536       Error(ExprLoc, ErrMsg);
5537       return false;
5538     }
5539   }
5540 
5541   return true;
5542 }
5543 
5544 bool
5545 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5546   using namespace llvm::AMDGPU::Swizzle;
5547 
5548   int64_t Lane[LANE_NUM];
5549   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5550                            "expected a 2-bit lane id")) {
5551     Imm = QUAD_PERM_ENC;
5552     for (unsigned I = 0; I < LANE_NUM; ++I) {
5553       Imm |= Lane[I] << (LANE_SHIFT * I);
5554     }
5555     return true;
5556   }
5557   return false;
5558 }
5559 
5560 bool
5561 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5562   using namespace llvm::AMDGPU::Swizzle;
5563 
5564   SMLoc S = Parser.getTok().getLoc();
5565   int64_t GroupSize;
5566   int64_t LaneIdx;
5567 
5568   if (!parseSwizzleOperands(1, &GroupSize,
5569                             2, 32,
5570                             "group size must be in the interval [2,32]")) {
5571     return false;
5572   }
5573   if (!isPowerOf2_64(GroupSize)) {
5574     Error(S, "group size must be a power of two");
5575     return false;
5576   }
5577   if (parseSwizzleOperands(1, &LaneIdx,
5578                            0, GroupSize - 1,
5579                            "lane id must be in the interval [0,group size - 1]")) {
5580     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5581     return true;
5582   }
5583   return false;
5584 }
5585 
5586 bool
5587 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5588   using namespace llvm::AMDGPU::Swizzle;
5589 
5590   SMLoc S = Parser.getTok().getLoc();
5591   int64_t GroupSize;
5592 
5593   if (!parseSwizzleOperands(1, &GroupSize,
5594       2, 32, "group size must be in the interval [2,32]")) {
5595     return false;
5596   }
5597   if (!isPowerOf2_64(GroupSize)) {
5598     Error(S, "group size must be a power of two");
5599     return false;
5600   }
5601 
5602   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5603   return true;
5604 }
5605 
5606 bool
5607 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5608   using namespace llvm::AMDGPU::Swizzle;
5609 
5610   SMLoc S = Parser.getTok().getLoc();
5611   int64_t GroupSize;
5612 
5613   if (!parseSwizzleOperands(1, &GroupSize,
5614       1, 16, "group size must be in the interval [1,16]")) {
5615     return false;
5616   }
5617   if (!isPowerOf2_64(GroupSize)) {
5618     Error(S, "group size must be a power of two");
5619     return false;
5620   }
5621 
5622   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5623   return true;
5624 }
5625 
5626 bool
5627 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5628   using namespace llvm::AMDGPU::Swizzle;
5629 
5630   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5631     return false;
5632   }
5633 
5634   StringRef Ctl;
5635   SMLoc StrLoc = Parser.getTok().getLoc();
5636   if (!parseString(Ctl)) {
5637     return false;
5638   }
5639   if (Ctl.size() != BITMASK_WIDTH) {
5640     Error(StrLoc, "expected a 5-character mask");
5641     return false;
5642   }
5643 
5644   unsigned AndMask = 0;
5645   unsigned OrMask = 0;
5646   unsigned XorMask = 0;
5647 
5648   for (size_t i = 0; i < Ctl.size(); ++i) {
5649     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5650     switch(Ctl[i]) {
5651     default:
5652       Error(StrLoc, "invalid mask");
5653       return false;
5654     case '0':
5655       break;
5656     case '1':
5657       OrMask |= Mask;
5658       break;
5659     case 'p':
5660       AndMask |= Mask;
5661       break;
5662     case 'i':
5663       AndMask |= Mask;
5664       XorMask |= Mask;
5665       break;
5666     }
5667   }
5668 
5669   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5670   return true;
5671 }
5672 
5673 bool
5674 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5675 
5676   SMLoc OffsetLoc = Parser.getTok().getLoc();
5677 
5678   if (!parseExpr(Imm)) {
5679     return false;
5680   }
5681   if (!isUInt<16>(Imm)) {
5682     Error(OffsetLoc, "expected a 16-bit offset");
5683     return false;
5684   }
5685   return true;
5686 }
5687 
5688 bool
5689 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5690   using namespace llvm::AMDGPU::Swizzle;
5691 
5692   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5693 
5694     SMLoc ModeLoc = Parser.getTok().getLoc();
5695     bool Ok = false;
5696 
5697     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5698       Ok = parseSwizzleQuadPerm(Imm);
5699     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5700       Ok = parseSwizzleBitmaskPerm(Imm);
5701     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5702       Ok = parseSwizzleBroadcast(Imm);
5703     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5704       Ok = parseSwizzleSwap(Imm);
5705     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5706       Ok = parseSwizzleReverse(Imm);
5707     } else {
5708       Error(ModeLoc, "expected a swizzle mode");
5709     }
5710 
5711     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5712   }
5713 
5714   return false;
5715 }
5716 
5717 OperandMatchResultTy
5718 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5719   SMLoc S = Parser.getTok().getLoc();
5720   int64_t Imm = 0;
5721 
5722   if (trySkipId("offset")) {
5723 
5724     bool Ok = false;
5725     if (skipToken(AsmToken::Colon, "expected a colon")) {
5726       if (trySkipId("swizzle")) {
5727         Ok = parseSwizzleMacro(Imm);
5728       } else {
5729         Ok = parseSwizzleOffset(Imm);
5730       }
5731     }
5732 
5733     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5734 
5735     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5736   } else {
5737     // Swizzle "offset" operand is optional.
5738     // If it is omitted, try parsing other optional operands.
5739     return parseOptionalOpr(Operands);
5740   }
5741 }
5742 
5743 bool
5744 AMDGPUOperand::isSwizzle() const {
5745   return isImmTy(ImmTySwizzle);
5746 }
5747 
5748 //===----------------------------------------------------------------------===//
5749 // VGPR Index Mode
5750 //===----------------------------------------------------------------------===//
5751 
5752 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5753 
5754   using namespace llvm::AMDGPU::VGPRIndexMode;
5755 
5756   if (trySkipToken(AsmToken::RParen)) {
5757     return OFF;
5758   }
5759 
5760   int64_t Imm = 0;
5761 
5762   while (true) {
5763     unsigned Mode = 0;
5764     SMLoc S = Parser.getTok().getLoc();
5765 
5766     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5767       if (trySkipId(IdSymbolic[ModeId])) {
5768         Mode = 1 << ModeId;
5769         break;
5770       }
5771     }
5772 
5773     if (Mode == 0) {
5774       Error(S, (Imm == 0)?
5775                "expected a VGPR index mode or a closing parenthesis" :
5776                "expected a VGPR index mode");
5777       break;
5778     }
5779 
5780     if (Imm & Mode) {
5781       Error(S, "duplicate VGPR index mode");
5782       break;
5783     }
5784     Imm |= Mode;
5785 
5786     if (trySkipToken(AsmToken::RParen))
5787       break;
5788     if (!skipToken(AsmToken::Comma,
5789                    "expected a comma or a closing parenthesis"))
5790       break;
5791   }
5792 
5793   return Imm;
5794 }
5795 
5796 OperandMatchResultTy
5797 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5798 
5799   int64_t Imm = 0;
5800   SMLoc S = Parser.getTok().getLoc();
5801 
5802   if (getLexer().getKind() == AsmToken::Identifier &&
5803       Parser.getTok().getString() == "gpr_idx" &&
5804       getLexer().peekTok().is(AsmToken::LParen)) {
5805 
5806     Parser.Lex();
5807     Parser.Lex();
5808 
5809     // If parse failed, trigger an error but do not return error code
5810     // to avoid excessive error messages.
5811     Imm = parseGPRIdxMacro();
5812 
5813   } else {
5814     if (getParser().parseAbsoluteExpression(Imm))
5815       return MatchOperand_NoMatch;
5816     if (Imm < 0 || !isUInt<4>(Imm)) {
5817       Error(S, "invalid immediate: only 4-bit values are legal");
5818     }
5819   }
5820 
5821   Operands.push_back(
5822       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5823   return MatchOperand_Success;
5824 }
5825 
5826 bool AMDGPUOperand::isGPRIdxMode() const {
5827   return isImmTy(ImmTyGprIdxMode);
5828 }
5829 
5830 //===----------------------------------------------------------------------===//
5831 // sopp branch targets
5832 //===----------------------------------------------------------------------===//
5833 
5834 OperandMatchResultTy
5835 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5836 
5837   // Make sure we are not parsing something
5838   // that looks like a label or an expression but is not.
5839   // This will improve error messages.
5840   if (isRegister() || isModifier())
5841     return MatchOperand_NoMatch;
5842 
5843   if (parseExpr(Operands)) {
5844 
5845     AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5846     assert(Opr.isImm() || Opr.isExpr());
5847     SMLoc Loc = Opr.getStartLoc();
5848 
5849     // Currently we do not support arbitrary expressions as branch targets.
5850     // Only labels and absolute expressions are accepted.
5851     if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5852       Error(Loc, "expected an absolute expression or a label");
5853     } else if (Opr.isImm() && !Opr.isS16Imm()) {
5854       Error(Loc, "expected a 16-bit signed jump offset");
5855     }
5856   }
5857 
5858   return MatchOperand_Success; // avoid excessive error messages
5859 }
5860 
5861 //===----------------------------------------------------------------------===//
5862 // Boolean holding registers
5863 //===----------------------------------------------------------------------===//
5864 
5865 OperandMatchResultTy
5866 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5867   return parseReg(Operands);
5868 }
5869 
5870 //===----------------------------------------------------------------------===//
5871 // mubuf
5872 //===----------------------------------------------------------------------===//
5873 
5874 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5875   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5876 }
5877 
5878 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5879   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5880 }
5881 
5882 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5883   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5884 }
5885 
5886 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5887                                const OperandVector &Operands,
5888                                bool IsAtomic,
5889                                bool IsAtomicReturn,
5890                                bool IsLds) {
5891   bool IsLdsOpcode = IsLds;
5892   bool HasLdsModifier = false;
5893   OptionalImmIndexMap OptionalIdx;
5894   assert(IsAtomicReturn ? IsAtomic : true);
5895   unsigned FirstOperandIdx = 1;
5896 
5897   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5898     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5899 
5900     // Add the register arguments
5901     if (Op.isReg()) {
5902       Op.addRegOperands(Inst, 1);
5903       // Insert a tied src for atomic return dst.
5904       // This cannot be postponed as subsequent calls to
5905       // addImmOperands rely on correct number of MC operands.
5906       if (IsAtomicReturn && i == FirstOperandIdx)
5907         Op.addRegOperands(Inst, 1);
5908       continue;
5909     }
5910 
5911     // Handle the case where soffset is an immediate
5912     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5913       Op.addImmOperands(Inst, 1);
5914       continue;
5915     }
5916 
5917     HasLdsModifier |= Op.isLDS();
5918 
5919     // Handle tokens like 'offen' which are sometimes hard-coded into the
5920     // asm string.  There are no MCInst operands for these.
5921     if (Op.isToken()) {
5922       continue;
5923     }
5924     assert(Op.isImm());
5925 
5926     // Handle optional arguments
5927     OptionalIdx[Op.getImmTy()] = i;
5928   }
5929 
5930   // This is a workaround for an llvm quirk which may result in an
5931   // incorrect instruction selection. Lds and non-lds versions of
5932   // MUBUF instructions are identical except that lds versions
5933   // have mandatory 'lds' modifier. However this modifier follows
5934   // optional modifiers and llvm asm matcher regards this 'lds'
5935   // modifier as an optional one. As a result, an lds version
5936   // of opcode may be selected even if it has no 'lds' modifier.
5937   if (IsLdsOpcode && !HasLdsModifier) {
5938     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5939     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5940       Inst.setOpcode(NoLdsOpcode);
5941       IsLdsOpcode = false;
5942     }
5943   }
5944 
5945   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5946   if (!IsAtomic) { // glc is hard-coded.
5947     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5948   }
5949   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5950 
5951   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5952     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5953   }
5954 
5955   if (isGFX10())
5956     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5957 }
5958 
5959 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5960   OptionalImmIndexMap OptionalIdx;
5961 
5962   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5963     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5964 
5965     // Add the register arguments
5966     if (Op.isReg()) {
5967       Op.addRegOperands(Inst, 1);
5968       continue;
5969     }
5970 
5971     // Handle the case where soffset is an immediate
5972     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5973       Op.addImmOperands(Inst, 1);
5974       continue;
5975     }
5976 
5977     // Handle tokens like 'offen' which are sometimes hard-coded into the
5978     // asm string.  There are no MCInst operands for these.
5979     if (Op.isToken()) {
5980       continue;
5981     }
5982     assert(Op.isImm());
5983 
5984     // Handle optional arguments
5985     OptionalIdx[Op.getImmTy()] = i;
5986   }
5987 
5988   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5989                         AMDGPUOperand::ImmTyOffset);
5990   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5991   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5992   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5993   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5994 
5995   if (isGFX10())
5996     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5997 }
5998 
5999 //===----------------------------------------------------------------------===//
6000 // mimg
6001 //===----------------------------------------------------------------------===//
6002 
6003 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6004                               bool IsAtomic) {
6005   unsigned I = 1;
6006   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6007   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6008     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6009   }
6010 
6011   if (IsAtomic) {
6012     // Add src, same as dst
6013     assert(Desc.getNumDefs() == 1);
6014     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6015   }
6016 
6017   OptionalImmIndexMap OptionalIdx;
6018 
6019   for (unsigned E = Operands.size(); I != E; ++I) {
6020     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6021 
6022     // Add the register arguments
6023     if (Op.isReg()) {
6024       Op.addRegOperands(Inst, 1);
6025     } else if (Op.isImmModifier()) {
6026       OptionalIdx[Op.getImmTy()] = I;
6027     } else if (!Op.isToken()) {
6028       llvm_unreachable("unexpected operand type");
6029     }
6030   }
6031 
6032   bool IsGFX10 = isGFX10();
6033 
6034   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6035   if (IsGFX10)
6036     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6037   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6038   if (IsGFX10)
6039     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6040   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6041   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6042   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6043   if (IsGFX10)
6044     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6045   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6046   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6047   if (!IsGFX10)
6048     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6049   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6050 }
6051 
6052 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6053   cvtMIMG(Inst, Operands, true);
6054 }
6055 
6056 //===----------------------------------------------------------------------===//
6057 // smrd
6058 //===----------------------------------------------------------------------===//
6059 
6060 bool AMDGPUOperand::isSMRDOffset8() const {
6061   return isImm() && isUInt<8>(getImm());
6062 }
6063 
6064 bool AMDGPUOperand::isSMRDOffset20() const {
6065   return isImm() && isUInt<20>(getImm());
6066 }
6067 
6068 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6069   // 32-bit literals are only supported on CI and we only want to use them
6070   // when the offset is > 8-bits.
6071   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6072 }
6073 
6074 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6075   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6076 }
6077 
6078 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
6079   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6080 }
6081 
6082 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6083   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6084 }
6085 
6086 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6087   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6088 }
6089 
6090 //===----------------------------------------------------------------------===//
6091 // vop3
6092 //===----------------------------------------------------------------------===//
6093 
6094 static bool ConvertOmodMul(int64_t &Mul) {
6095   if (Mul != 1 && Mul != 2 && Mul != 4)
6096     return false;
6097 
6098   Mul >>= 1;
6099   return true;
6100 }
6101 
6102 static bool ConvertOmodDiv(int64_t &Div) {
6103   if (Div == 1) {
6104     Div = 0;
6105     return true;
6106   }
6107 
6108   if (Div == 2) {
6109     Div = 3;
6110     return true;
6111   }
6112 
6113   return false;
6114 }
6115 
6116 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6117   if (BoundCtrl == 0) {
6118     BoundCtrl = 1;
6119     return true;
6120   }
6121 
6122   if (BoundCtrl == -1) {
6123     BoundCtrl = 0;
6124     return true;
6125   }
6126 
6127   return false;
6128 }
6129 
6130 // Note: the order in this table matches the order of operands in AsmString.
6131 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6132   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6133   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6134   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6135   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6136   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6137   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6138   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6139   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6140   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6141   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6142   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
6143   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6144   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6145   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6146   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6147   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6148   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6149   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6150   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6151   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6152   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6153   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6154   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6155   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6156   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6157   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6158   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6159   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6160   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6161   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6162   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6163   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6164   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6165   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6166   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6167   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6168   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6169   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6170   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6171   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6172   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6173   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6174   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6175   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6176 };
6177 
6178 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6179 
6180   OperandMatchResultTy res = parseOptionalOpr(Operands);
6181 
6182   // This is a hack to enable hardcoded mandatory operands which follow
6183   // optional operands.
6184   //
6185   // Current design assumes that all operands after the first optional operand
6186   // are also optional. However implementation of some instructions violates
6187   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6188   //
6189   // To alleviate this problem, we have to (implicitly) parse extra operands
6190   // to make sure autogenerated parser of custom operands never hit hardcoded
6191   // mandatory operands.
6192 
6193   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6194     if (res != MatchOperand_Success ||
6195         isToken(AsmToken::EndOfStatement))
6196       break;
6197 
6198     trySkipToken(AsmToken::Comma);
6199     res = parseOptionalOpr(Operands);
6200   }
6201 
6202   return res;
6203 }
6204 
6205 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6206   OperandMatchResultTy res;
6207   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6208     // try to parse any optional operand here
6209     if (Op.IsBit) {
6210       res = parseNamedBit(Op.Name, Operands, Op.Type);
6211     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6212       res = parseOModOperand(Operands);
6213     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6214                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6215                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6216       res = parseSDWASel(Operands, Op.Name, Op.Type);
6217     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6218       res = parseSDWADstUnused(Operands);
6219     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6220                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6221                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6222                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6223       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6224                                         Op.ConvertResult);
6225     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6226       res = parseDim(Operands);
6227     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6228       res = parseDfmtNfmt(Operands);
6229     } else {
6230       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6231     }
6232     if (res != MatchOperand_NoMatch) {
6233       return res;
6234     }
6235   }
6236   return MatchOperand_NoMatch;
6237 }
6238 
6239 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6240   StringRef Name = Parser.getTok().getString();
6241   if (Name == "mul") {
6242     return parseIntWithPrefix("mul", Operands,
6243                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6244   }
6245 
6246   if (Name == "div") {
6247     return parseIntWithPrefix("div", Operands,
6248                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6249   }
6250 
6251   return MatchOperand_NoMatch;
6252 }
6253 
6254 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6255   cvtVOP3P(Inst, Operands);
6256 
6257   int Opc = Inst.getOpcode();
6258 
6259   int SrcNum;
6260   const int Ops[] = { AMDGPU::OpName::src0,
6261                       AMDGPU::OpName::src1,
6262                       AMDGPU::OpName::src2 };
6263   for (SrcNum = 0;
6264        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6265        ++SrcNum);
6266   assert(SrcNum > 0);
6267 
6268   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6269   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6270 
6271   if ((OpSel & (1 << SrcNum)) != 0) {
6272     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6273     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6274     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6275   }
6276 }
6277 
6278 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6279       // 1. This operand is input modifiers
6280   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6281       // 2. This is not last operand
6282       && Desc.NumOperands > (OpNum + 1)
6283       // 3. Next operand is register class
6284       && Desc.OpInfo[OpNum + 1].RegClass != -1
6285       // 4. Next register is not tied to any other operand
6286       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6287 }
6288 
6289 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6290 {
6291   OptionalImmIndexMap OptionalIdx;
6292   unsigned Opc = Inst.getOpcode();
6293 
6294   unsigned I = 1;
6295   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6296   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6297     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6298   }
6299 
6300   for (unsigned E = Operands.size(); I != E; ++I) {
6301     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6302     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6303       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6304     } else if (Op.isInterpSlot() ||
6305                Op.isInterpAttr() ||
6306                Op.isAttrChan()) {
6307       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6308     } else if (Op.isImmModifier()) {
6309       OptionalIdx[Op.getImmTy()] = I;
6310     } else {
6311       llvm_unreachable("unhandled operand type");
6312     }
6313   }
6314 
6315   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6316     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6317   }
6318 
6319   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6320     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6321   }
6322 
6323   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6324     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6325   }
6326 }
6327 
6328 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6329                               OptionalImmIndexMap &OptionalIdx) {
6330   unsigned Opc = Inst.getOpcode();
6331 
6332   unsigned I = 1;
6333   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6334   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6335     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6336   }
6337 
6338   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6339     // This instruction has src modifiers
6340     for (unsigned E = Operands.size(); I != E; ++I) {
6341       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6342       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6343         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6344       } else if (Op.isImmModifier()) {
6345         OptionalIdx[Op.getImmTy()] = I;
6346       } else if (Op.isRegOrImm()) {
6347         Op.addRegOrImmOperands(Inst, 1);
6348       } else {
6349         llvm_unreachable("unhandled operand type");
6350       }
6351     }
6352   } else {
6353     // No src modifiers
6354     for (unsigned E = Operands.size(); I != E; ++I) {
6355       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6356       if (Op.isMod()) {
6357         OptionalIdx[Op.getImmTy()] = I;
6358       } else {
6359         Op.addRegOrImmOperands(Inst, 1);
6360       }
6361     }
6362   }
6363 
6364   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6365     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6366   }
6367 
6368   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6369     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6370   }
6371 
6372   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6373   // it has src2 register operand that is tied to dst operand
6374   // we don't allow modifiers for this operand in assembler so src2_modifiers
6375   // should be 0.
6376   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6377       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6378       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6379       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6380       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6381       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6382       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6383     auto it = Inst.begin();
6384     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6385     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6386     ++it;
6387     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6388   }
6389 }
6390 
6391 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6392   OptionalImmIndexMap OptionalIdx;
6393   cvtVOP3(Inst, Operands, OptionalIdx);
6394 }
6395 
6396 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6397                                const OperandVector &Operands) {
6398   OptionalImmIndexMap OptIdx;
6399   const int Opc = Inst.getOpcode();
6400   const MCInstrDesc &Desc = MII.get(Opc);
6401 
6402   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6403 
6404   cvtVOP3(Inst, Operands, OptIdx);
6405 
6406   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6407     assert(!IsPacked);
6408     Inst.addOperand(Inst.getOperand(0));
6409   }
6410 
6411   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6412   // instruction, and then figure out where to actually put the modifiers
6413 
6414   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6415 
6416   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6417   if (OpSelHiIdx != -1) {
6418     int DefaultVal = IsPacked ? -1 : 0;
6419     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6420                           DefaultVal);
6421   }
6422 
6423   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6424   if (NegLoIdx != -1) {
6425     assert(IsPacked);
6426     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6427     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6428   }
6429 
6430   const int Ops[] = { AMDGPU::OpName::src0,
6431                       AMDGPU::OpName::src1,
6432                       AMDGPU::OpName::src2 };
6433   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6434                          AMDGPU::OpName::src1_modifiers,
6435                          AMDGPU::OpName::src2_modifiers };
6436 
6437   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6438 
6439   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6440   unsigned OpSelHi = 0;
6441   unsigned NegLo = 0;
6442   unsigned NegHi = 0;
6443 
6444   if (OpSelHiIdx != -1) {
6445     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6446   }
6447 
6448   if (NegLoIdx != -1) {
6449     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6450     NegLo = Inst.getOperand(NegLoIdx).getImm();
6451     NegHi = Inst.getOperand(NegHiIdx).getImm();
6452   }
6453 
6454   for (int J = 0; J < 3; ++J) {
6455     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6456     if (OpIdx == -1)
6457       break;
6458 
6459     uint32_t ModVal = 0;
6460 
6461     if ((OpSel & (1 << J)) != 0)
6462       ModVal |= SISrcMods::OP_SEL_0;
6463 
6464     if ((OpSelHi & (1 << J)) != 0)
6465       ModVal |= SISrcMods::OP_SEL_1;
6466 
6467     if ((NegLo & (1 << J)) != 0)
6468       ModVal |= SISrcMods::NEG;
6469 
6470     if ((NegHi & (1 << J)) != 0)
6471       ModVal |= SISrcMods::NEG_HI;
6472 
6473     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6474 
6475     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6476   }
6477 }
6478 
6479 //===----------------------------------------------------------------------===//
6480 // dpp
6481 //===----------------------------------------------------------------------===//
6482 
6483 bool AMDGPUOperand::isDPP8() const {
6484   return isImmTy(ImmTyDPP8);
6485 }
6486 
6487 bool AMDGPUOperand::isDPPCtrl() const {
6488   using namespace AMDGPU::DPP;
6489 
6490   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6491   if (result) {
6492     int64_t Imm = getImm();
6493     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6494            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6495            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6496            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6497            (Imm == DppCtrl::WAVE_SHL1) ||
6498            (Imm == DppCtrl::WAVE_ROL1) ||
6499            (Imm == DppCtrl::WAVE_SHR1) ||
6500            (Imm == DppCtrl::WAVE_ROR1) ||
6501            (Imm == DppCtrl::ROW_MIRROR) ||
6502            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6503            (Imm == DppCtrl::BCAST15) ||
6504            (Imm == DppCtrl::BCAST31) ||
6505            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6506            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6507   }
6508   return false;
6509 }
6510 
6511 //===----------------------------------------------------------------------===//
6512 // mAI
6513 //===----------------------------------------------------------------------===//
6514 
6515 bool AMDGPUOperand::isBLGP() const {
6516   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6517 }
6518 
6519 bool AMDGPUOperand::isCBSZ() const {
6520   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6521 }
6522 
6523 bool AMDGPUOperand::isABID() const {
6524   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6525 }
6526 
6527 bool AMDGPUOperand::isS16Imm() const {
6528   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6529 }
6530 
6531 bool AMDGPUOperand::isU16Imm() const {
6532   return isImm() && isUInt<16>(getImm());
6533 }
6534 
6535 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6536   if (!isGFX10())
6537     return MatchOperand_NoMatch;
6538 
6539   SMLoc S = Parser.getTok().getLoc();
6540 
6541   if (getLexer().isNot(AsmToken::Identifier))
6542     return MatchOperand_NoMatch;
6543   if (getLexer().getTok().getString() != "dim")
6544     return MatchOperand_NoMatch;
6545 
6546   Parser.Lex();
6547   if (getLexer().isNot(AsmToken::Colon))
6548     return MatchOperand_ParseFail;
6549 
6550   Parser.Lex();
6551 
6552   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6553   // integer.
6554   std::string Token;
6555   if (getLexer().is(AsmToken::Integer)) {
6556     SMLoc Loc = getLexer().getTok().getEndLoc();
6557     Token = std::string(getLexer().getTok().getString());
6558     Parser.Lex();
6559     if (getLexer().getTok().getLoc() != Loc)
6560       return MatchOperand_ParseFail;
6561   }
6562   if (getLexer().isNot(AsmToken::Identifier))
6563     return MatchOperand_ParseFail;
6564   Token += getLexer().getTok().getString();
6565 
6566   StringRef DimId = Token;
6567   if (DimId.startswith("SQ_RSRC_IMG_"))
6568     DimId = DimId.substr(12);
6569 
6570   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6571   if (!DimInfo)
6572     return MatchOperand_ParseFail;
6573 
6574   Parser.Lex();
6575 
6576   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6577                                               AMDGPUOperand::ImmTyDim));
6578   return MatchOperand_Success;
6579 }
6580 
6581 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6582   SMLoc S = Parser.getTok().getLoc();
6583   StringRef Prefix;
6584 
6585   if (getLexer().getKind() == AsmToken::Identifier) {
6586     Prefix = Parser.getTok().getString();
6587   } else {
6588     return MatchOperand_NoMatch;
6589   }
6590 
6591   if (Prefix != "dpp8")
6592     return parseDPPCtrl(Operands);
6593   if (!isGFX10())
6594     return MatchOperand_NoMatch;
6595 
6596   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6597 
6598   int64_t Sels[8];
6599 
6600   Parser.Lex();
6601   if (getLexer().isNot(AsmToken::Colon))
6602     return MatchOperand_ParseFail;
6603 
6604   Parser.Lex();
6605   if (getLexer().isNot(AsmToken::LBrac))
6606     return MatchOperand_ParseFail;
6607 
6608   Parser.Lex();
6609   if (getParser().parseAbsoluteExpression(Sels[0]))
6610     return MatchOperand_ParseFail;
6611   if (0 > Sels[0] || 7 < Sels[0])
6612     return MatchOperand_ParseFail;
6613 
6614   for (size_t i = 1; i < 8; ++i) {
6615     if (getLexer().isNot(AsmToken::Comma))
6616       return MatchOperand_ParseFail;
6617 
6618     Parser.Lex();
6619     if (getParser().parseAbsoluteExpression(Sels[i]))
6620       return MatchOperand_ParseFail;
6621     if (0 > Sels[i] || 7 < Sels[i])
6622       return MatchOperand_ParseFail;
6623   }
6624 
6625   if (getLexer().isNot(AsmToken::RBrac))
6626     return MatchOperand_ParseFail;
6627   Parser.Lex();
6628 
6629   unsigned DPP8 = 0;
6630   for (size_t i = 0; i < 8; ++i)
6631     DPP8 |= (Sels[i] << (i * 3));
6632 
6633   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6634   return MatchOperand_Success;
6635 }
6636 
6637 OperandMatchResultTy
6638 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6639   using namespace AMDGPU::DPP;
6640 
6641   SMLoc S = Parser.getTok().getLoc();
6642   StringRef Prefix;
6643   int64_t Int;
6644 
6645   if (getLexer().getKind() == AsmToken::Identifier) {
6646     Prefix = Parser.getTok().getString();
6647   } else {
6648     return MatchOperand_NoMatch;
6649   }
6650 
6651   if (Prefix == "row_mirror") {
6652     Int = DppCtrl::ROW_MIRROR;
6653     Parser.Lex();
6654   } else if (Prefix == "row_half_mirror") {
6655     Int = DppCtrl::ROW_HALF_MIRROR;
6656     Parser.Lex();
6657   } else {
6658     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6659     if (Prefix != "quad_perm"
6660         && Prefix != "row_shl"
6661         && Prefix != "row_shr"
6662         && Prefix != "row_ror"
6663         && Prefix != "wave_shl"
6664         && Prefix != "wave_rol"
6665         && Prefix != "wave_shr"
6666         && Prefix != "wave_ror"
6667         && Prefix != "row_bcast"
6668         && Prefix != "row_share"
6669         && Prefix != "row_xmask") {
6670       return MatchOperand_NoMatch;
6671     }
6672 
6673     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6674       return MatchOperand_NoMatch;
6675 
6676     if (!isVI() && !isGFX9() &&
6677         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6678          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6679          Prefix == "row_bcast"))
6680       return MatchOperand_NoMatch;
6681 
6682     Parser.Lex();
6683     if (getLexer().isNot(AsmToken::Colon))
6684       return MatchOperand_ParseFail;
6685 
6686     if (Prefix == "quad_perm") {
6687       // quad_perm:[%d,%d,%d,%d]
6688       Parser.Lex();
6689       if (getLexer().isNot(AsmToken::LBrac))
6690         return MatchOperand_ParseFail;
6691       Parser.Lex();
6692 
6693       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6694         return MatchOperand_ParseFail;
6695 
6696       for (int i = 0; i < 3; ++i) {
6697         if (getLexer().isNot(AsmToken::Comma))
6698           return MatchOperand_ParseFail;
6699         Parser.Lex();
6700 
6701         int64_t Temp;
6702         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6703           return MatchOperand_ParseFail;
6704         const int shift = i*2 + 2;
6705         Int += (Temp << shift);
6706       }
6707 
6708       if (getLexer().isNot(AsmToken::RBrac))
6709         return MatchOperand_ParseFail;
6710       Parser.Lex();
6711     } else {
6712       // sel:%d
6713       Parser.Lex();
6714       if (getParser().parseAbsoluteExpression(Int))
6715         return MatchOperand_ParseFail;
6716 
6717       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6718         Int |= DppCtrl::ROW_SHL0;
6719       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6720         Int |= DppCtrl::ROW_SHR0;
6721       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6722         Int |= DppCtrl::ROW_ROR0;
6723       } else if (Prefix == "wave_shl" && 1 == Int) {
6724         Int = DppCtrl::WAVE_SHL1;
6725       } else if (Prefix == "wave_rol" && 1 == Int) {
6726         Int = DppCtrl::WAVE_ROL1;
6727       } else if (Prefix == "wave_shr" && 1 == Int) {
6728         Int = DppCtrl::WAVE_SHR1;
6729       } else if (Prefix == "wave_ror" && 1 == Int) {
6730         Int = DppCtrl::WAVE_ROR1;
6731       } else if (Prefix == "row_bcast") {
6732         if (Int == 15) {
6733           Int = DppCtrl::BCAST15;
6734         } else if (Int == 31) {
6735           Int = DppCtrl::BCAST31;
6736         } else {
6737           return MatchOperand_ParseFail;
6738         }
6739       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6740         Int |= DppCtrl::ROW_SHARE_FIRST;
6741       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6742         Int |= DppCtrl::ROW_XMASK_FIRST;
6743       } else {
6744         return MatchOperand_ParseFail;
6745       }
6746     }
6747   }
6748 
6749   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6750   return MatchOperand_Success;
6751 }
6752 
6753 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6754   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6755 }
6756 
6757 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6758   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6759 }
6760 
6761 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6762   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6763 }
6764 
6765 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6766   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6767 }
6768 
6769 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6770   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6771 }
6772 
6773 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6774   OptionalImmIndexMap OptionalIdx;
6775 
6776   unsigned I = 1;
6777   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6778   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6779     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6780   }
6781 
6782   int Fi = 0;
6783   for (unsigned E = Operands.size(); I != E; ++I) {
6784     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6785                                             MCOI::TIED_TO);
6786     if (TiedTo != -1) {
6787       assert((unsigned)TiedTo < Inst.getNumOperands());
6788       // handle tied old or src2 for MAC instructions
6789       Inst.addOperand(Inst.getOperand(TiedTo));
6790     }
6791     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6792     // Add the register arguments
6793     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6794       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6795       // Skip it.
6796       continue;
6797     }
6798 
6799     if (IsDPP8) {
6800       if (Op.isDPP8()) {
6801         Op.addImmOperands(Inst, 1);
6802       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6803         Op.addRegWithFPInputModsOperands(Inst, 2);
6804       } else if (Op.isFI()) {
6805         Fi = Op.getImm();
6806       } else if (Op.isReg()) {
6807         Op.addRegOperands(Inst, 1);
6808       } else {
6809         llvm_unreachable("Invalid operand type");
6810       }
6811     } else {
6812       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6813         Op.addRegWithFPInputModsOperands(Inst, 2);
6814       } else if (Op.isDPPCtrl()) {
6815         Op.addImmOperands(Inst, 1);
6816       } else if (Op.isImm()) {
6817         // Handle optional arguments
6818         OptionalIdx[Op.getImmTy()] = I;
6819       } else {
6820         llvm_unreachable("Invalid operand type");
6821       }
6822     }
6823   }
6824 
6825   if (IsDPP8) {
6826     using namespace llvm::AMDGPU::DPP;
6827     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6828   } else {
6829     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6830     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6831     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6832     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6833       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6834     }
6835   }
6836 }
6837 
6838 //===----------------------------------------------------------------------===//
6839 // sdwa
6840 //===----------------------------------------------------------------------===//
6841 
6842 OperandMatchResultTy
6843 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6844                               AMDGPUOperand::ImmTy Type) {
6845   using namespace llvm::AMDGPU::SDWA;
6846 
6847   SMLoc S = Parser.getTok().getLoc();
6848   StringRef Value;
6849   OperandMatchResultTy res;
6850 
6851   res = parseStringWithPrefix(Prefix, Value);
6852   if (res != MatchOperand_Success) {
6853     return res;
6854   }
6855 
6856   int64_t Int;
6857   Int = StringSwitch<int64_t>(Value)
6858         .Case("BYTE_0", SdwaSel::BYTE_0)
6859         .Case("BYTE_1", SdwaSel::BYTE_1)
6860         .Case("BYTE_2", SdwaSel::BYTE_2)
6861         .Case("BYTE_3", SdwaSel::BYTE_3)
6862         .Case("WORD_0", SdwaSel::WORD_0)
6863         .Case("WORD_1", SdwaSel::WORD_1)
6864         .Case("DWORD", SdwaSel::DWORD)
6865         .Default(0xffffffff);
6866   Parser.Lex(); // eat last token
6867 
6868   if (Int == 0xffffffff) {
6869     return MatchOperand_ParseFail;
6870   }
6871 
6872   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6873   return MatchOperand_Success;
6874 }
6875 
6876 OperandMatchResultTy
6877 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6878   using namespace llvm::AMDGPU::SDWA;
6879 
6880   SMLoc S = Parser.getTok().getLoc();
6881   StringRef Value;
6882   OperandMatchResultTy res;
6883 
6884   res = parseStringWithPrefix("dst_unused", Value);
6885   if (res != MatchOperand_Success) {
6886     return res;
6887   }
6888 
6889   int64_t Int;
6890   Int = StringSwitch<int64_t>(Value)
6891         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6892         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6893         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6894         .Default(0xffffffff);
6895   Parser.Lex(); // eat last token
6896 
6897   if (Int == 0xffffffff) {
6898     return MatchOperand_ParseFail;
6899   }
6900 
6901   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6902   return MatchOperand_Success;
6903 }
6904 
6905 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6906   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6907 }
6908 
6909 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6910   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6911 }
6912 
6913 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6914   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
6915 }
6916 
6917 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
6918   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
6919 }
6920 
6921 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6922   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6923 }
6924 
6925 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6926                               uint64_t BasicInstType,
6927                               bool SkipDstVcc,
6928                               bool SkipSrcVcc) {
6929   using namespace llvm::AMDGPU::SDWA;
6930 
6931   OptionalImmIndexMap OptionalIdx;
6932   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
6933   bool SkippedVcc = false;
6934 
6935   unsigned I = 1;
6936   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6937   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6938     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6939   }
6940 
6941   for (unsigned E = Operands.size(); I != E; ++I) {
6942     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6943     if (SkipVcc && !SkippedVcc && Op.isReg() &&
6944         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6945       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6946       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6947       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6948       // Skip VCC only if we didn't skip it on previous iteration.
6949       // Note that src0 and src1 occupy 2 slots each because of modifiers.
6950       if (BasicInstType == SIInstrFlags::VOP2 &&
6951           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
6952            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
6953         SkippedVcc = true;
6954         continue;
6955       } else if (BasicInstType == SIInstrFlags::VOPC &&
6956                  Inst.getNumOperands() == 0) {
6957         SkippedVcc = true;
6958         continue;
6959       }
6960     }
6961     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6962       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6963     } else if (Op.isImm()) {
6964       // Handle optional arguments
6965       OptionalIdx[Op.getImmTy()] = I;
6966     } else {
6967       llvm_unreachable("Invalid operand type");
6968     }
6969     SkippedVcc = false;
6970   }
6971 
6972   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6973       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6974       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6975     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6976     switch (BasicInstType) {
6977     case SIInstrFlags::VOP1:
6978       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6979       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6980         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6981       }
6982       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6983       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6984       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6985       break;
6986 
6987     case SIInstrFlags::VOP2:
6988       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6989       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6990         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6991       }
6992       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6993       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6994       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6995       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6996       break;
6997 
6998     case SIInstrFlags::VOPC:
6999       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7000         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7001       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7002       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7003       break;
7004 
7005     default:
7006       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7007     }
7008   }
7009 
7010   // special case v_mac_{f16, f32}:
7011   // it has src2 register operand that is tied to dst operand
7012   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7013       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7014     auto it = Inst.begin();
7015     std::advance(
7016       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7017     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7018   }
7019 }
7020 
7021 //===----------------------------------------------------------------------===//
7022 // mAI
7023 //===----------------------------------------------------------------------===//
7024 
7025 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7026   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7027 }
7028 
7029 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7030   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7031 }
7032 
7033 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7034   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7035 }
7036 
7037 /// Force static initialization.
7038 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7039   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7040   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7041 }
7042 
7043 #define GET_REGISTER_MATCHER
7044 #define GET_MATCHER_IMPLEMENTATION
7045 #define GET_MNEMONIC_SPELL_CHECKER
7046 #include "AMDGPUGenAsmMatcher.inc"
7047 
7048 // This fuction should be defined after auto-generated include so that we have
7049 // MatchClassKind enum defined
7050 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7051                                                      unsigned Kind) {
7052   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7053   // But MatchInstructionImpl() expects to meet token and fails to validate
7054   // operand. This method checks if we are given immediate operand but expect to
7055   // get corresponding token.
7056   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7057   switch (Kind) {
7058   case MCK_addr64:
7059     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7060   case MCK_gds:
7061     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7062   case MCK_lds:
7063     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7064   case MCK_glc:
7065     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7066   case MCK_idxen:
7067     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7068   case MCK_offen:
7069     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7070   case MCK_SSrcB32:
7071     // When operands have expression values, they will return true for isToken,
7072     // because it is not possible to distinguish between a token and an
7073     // expression at parse time. MatchInstructionImpl() will always try to
7074     // match an operand as a token, when isToken returns true, and when the
7075     // name of the expression is not a valid token, the match will fail,
7076     // so we need to handle it here.
7077     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7078   case MCK_SSrcF32:
7079     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7080   case MCK_SoppBrTarget:
7081     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7082   case MCK_VReg32OrOff:
7083     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7084   case MCK_InterpSlot:
7085     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7086   case MCK_Attr:
7087     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7088   case MCK_AttrChan:
7089     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7090   case MCK_SReg_64:
7091   case MCK_SReg_64_XEXEC:
7092     // Null is defined as a 32-bit register but
7093     // it should also be enabled with 64-bit operands.
7094     // The following code enables it for SReg_64 operands
7095     // used as source and destination. Remaining source
7096     // operands are handled in isInlinableImm.
7097     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7098   default:
7099     return Match_InvalidOperand;
7100   }
7101 }
7102 
7103 //===----------------------------------------------------------------------===//
7104 // endpgm
7105 //===----------------------------------------------------------------------===//
7106 
7107 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7108   SMLoc S = Parser.getTok().getLoc();
7109   int64_t Imm = 0;
7110 
7111   if (!parseExpr(Imm)) {
7112     // The operand is optional, if not present default to 0
7113     Imm = 0;
7114   }
7115 
7116   if (!isUInt<16>(Imm)) {
7117     Error(S, "expected a 16-bit value");
7118     return MatchOperand_ParseFail;
7119   }
7120 
7121   Operands.push_back(
7122       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7123   return MatchOperand_Success;
7124 }
7125 
7126 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7127