1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyA16,
167     ImmTyLWE,
168     ImmTyExpTgt,
169     ImmTyExpCompr,
170     ImmTyExpVM,
171     ImmTyFORMAT,
172     ImmTyHwreg,
173     ImmTyOff,
174     ImmTySendMsg,
175     ImmTyInterpSlot,
176     ImmTyInterpAttr,
177     ImmTyAttrChan,
178     ImmTyOpSel,
179     ImmTyOpSelHi,
180     ImmTyNegLo,
181     ImmTyNegHi,
182     ImmTySwizzle,
183     ImmTyGprIdxMode,
184     ImmTyHigh,
185     ImmTyBLGP,
186     ImmTyCBSZ,
187     ImmTyABID,
188     ImmTyEndpgm,
189   };
190 
191 private:
192   struct TokOp {
193     const char *Data;
194     unsigned Length;
195   };
196 
197   struct ImmOp {
198     int64_t Val;
199     ImmTy Type;
200     bool IsFPImm;
201     Modifiers Mods;
202   };
203 
204   struct RegOp {
205     unsigned RegNo;
206     Modifiers Mods;
207   };
208 
209   union {
210     TokOp Tok;
211     ImmOp Imm;
212     RegOp Reg;
213     const MCExpr *Expr;
214   };
215 
216 public:
217   bool isToken() const override {
218     if (Kind == Token)
219       return true;
220 
221     // When parsing operands, we can't always tell if something was meant to be
222     // a token, like 'gds', or an expression that references a global variable.
223     // In this case, we assume the string is an expression, and if we need to
224     // interpret is a token, then we treat the symbol name as the token.
225     return isSymbolRefExpr();
226   }
227 
228   bool isSymbolRefExpr() const {
229     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230   }
231 
232   bool isImm() const override {
233     return Kind == Immediate;
234   }
235 
236   bool isInlinableImm(MVT type) const;
237   bool isLiteralImm(MVT type) const;
238 
239   bool isRegKind() const {
240     return Kind == Register;
241   }
242 
243   bool isReg() const override {
244     return isRegKind() && !hasModifiers();
245   }
246 
247   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
248     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
249   }
250 
251   bool isRegOrImmWithInt16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
253   }
254 
255   bool isRegOrImmWithInt32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
257   }
258 
259   bool isRegOrImmWithInt64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
261   }
262 
263   bool isRegOrImmWithFP16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
265   }
266 
267   bool isRegOrImmWithFP32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
269   }
270 
271   bool isRegOrImmWithFP64InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
273   }
274 
275   bool isVReg() const {
276     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
277            isRegClass(AMDGPU::VReg_64RegClassID) ||
278            isRegClass(AMDGPU::VReg_96RegClassID) ||
279            isRegClass(AMDGPU::VReg_128RegClassID) ||
280            isRegClass(AMDGPU::VReg_160RegClassID) ||
281            isRegClass(AMDGPU::VReg_256RegClassID) ||
282            isRegClass(AMDGPU::VReg_512RegClassID) ||
283            isRegClass(AMDGPU::VReg_1024RegClassID);
284   }
285 
286   bool isVReg32() const {
287     return isRegClass(AMDGPU::VGPR_32RegClassID);
288   }
289 
290   bool isVReg32OrOff() const {
291     return isOff() || isVReg32();
292   }
293 
294   bool isNull() const {
295     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
296   }
297 
298   bool isSDWAOperand(MVT type) const;
299   bool isSDWAFP16Operand() const;
300   bool isSDWAFP32Operand() const;
301   bool isSDWAInt16Operand() const;
302   bool isSDWAInt32Operand() const;
303 
304   bool isImmTy(ImmTy ImmT) const {
305     return isImm() && Imm.Type == ImmT;
306   }
307 
308   bool isImmModifier() const {
309     return isImm() && Imm.Type != ImmTyNone;
310   }
311 
312   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
313   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
314   bool isDMask() const { return isImmTy(ImmTyDMask); }
315   bool isDim() const { return isImmTy(ImmTyDim); }
316   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
317   bool isDA() const { return isImmTy(ImmTyDA); }
318   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
319   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
320   bool isLWE() const { return isImmTy(ImmTyLWE); }
321   bool isOff() const { return isImmTy(ImmTyOff); }
322   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
323   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
324   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
325   bool isOffen() const { return isImmTy(ImmTyOffen); }
326   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
327   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
328   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
329   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
330   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
331 
332   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
333   bool isGDS() const { return isImmTy(ImmTyGDS); }
334   bool isLDS() const { return isImmTy(ImmTyLDS); }
335   bool isDLC() const { return isImmTy(ImmTyDLC); }
336   bool isGLC() const { return isImmTy(ImmTyGLC); }
337   bool isSLC() const { return isImmTy(ImmTySLC); }
338   bool isSWZ() const { return isImmTy(ImmTySWZ); }
339   bool isTFE() const { return isImmTy(ImmTyTFE); }
340   bool isD16() const { return isImmTy(ImmTyD16); }
341   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
342   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
343   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
344   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
345   bool isFI() const { return isImmTy(ImmTyDppFi); }
346   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
347   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
348   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
349   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
350   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
351   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
352   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
353   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
354   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
355   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
356   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
357   bool isHigh() const { return isImmTy(ImmTyHigh); }
358 
359   bool isMod() const {
360     return isClampSI() || isOModSI();
361   }
362 
363   bool isRegOrImm() const {
364     return isReg() || isImm();
365   }
366 
367   bool isRegClass(unsigned RCID) const;
368 
369   bool isInlineValue() const;
370 
371   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
372     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
373   }
374 
375   bool isSCSrcB16() const {
376     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
377   }
378 
379   bool isSCSrcV2B16() const {
380     return isSCSrcB16();
381   }
382 
383   bool isSCSrcB32() const {
384     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
385   }
386 
387   bool isSCSrcB64() const {
388     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
389   }
390 
391   bool isBoolReg() const;
392 
393   bool isSCSrcF16() const {
394     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
395   }
396 
397   bool isSCSrcV2F16() const {
398     return isSCSrcF16();
399   }
400 
401   bool isSCSrcF32() const {
402     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
403   }
404 
405   bool isSCSrcF64() const {
406     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
407   }
408 
409   bool isSSrcB32() const {
410     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
411   }
412 
413   bool isSSrcB16() const {
414     return isSCSrcB16() || isLiteralImm(MVT::i16);
415   }
416 
417   bool isSSrcV2B16() const {
418     llvm_unreachable("cannot happen");
419     return isSSrcB16();
420   }
421 
422   bool isSSrcB64() const {
423     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
424     // See isVSrc64().
425     return isSCSrcB64() || isLiteralImm(MVT::i64);
426   }
427 
428   bool isSSrcF32() const {
429     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
430   }
431 
432   bool isSSrcF64() const {
433     return isSCSrcB64() || isLiteralImm(MVT::f64);
434   }
435 
436   bool isSSrcF16() const {
437     return isSCSrcB16() || isLiteralImm(MVT::f16);
438   }
439 
440   bool isSSrcV2F16() const {
441     llvm_unreachable("cannot happen");
442     return isSSrcF16();
443   }
444 
445   bool isSSrcOrLdsB32() const {
446     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
447            isLiteralImm(MVT::i32) || isExpr();
448   }
449 
450   bool isVCSrcB32() const {
451     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
452   }
453 
454   bool isVCSrcB64() const {
455     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
456   }
457 
458   bool isVCSrcB16() const {
459     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
460   }
461 
462   bool isVCSrcV2B16() const {
463     return isVCSrcB16();
464   }
465 
466   bool isVCSrcF32() const {
467     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
468   }
469 
470   bool isVCSrcF64() const {
471     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
472   }
473 
474   bool isVCSrcF16() const {
475     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
476   }
477 
478   bool isVCSrcV2F16() const {
479     return isVCSrcF16();
480   }
481 
482   bool isVSrcB32() const {
483     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
484   }
485 
486   bool isVSrcB64() const {
487     return isVCSrcF64() || isLiteralImm(MVT::i64);
488   }
489 
490   bool isVSrcB16() const {
491     return isVCSrcF16() || isLiteralImm(MVT::i16);
492   }
493 
494   bool isVSrcV2B16() const {
495     return isVSrcB16() || isLiteralImm(MVT::v2i16);
496   }
497 
498   bool isVSrcF32() const {
499     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
500   }
501 
502   bool isVSrcF64() const {
503     return isVCSrcF64() || isLiteralImm(MVT::f64);
504   }
505 
506   bool isVSrcF16() const {
507     return isVCSrcF16() || isLiteralImm(MVT::f16);
508   }
509 
510   bool isVSrcV2F16() const {
511     return isVSrcF16() || isLiteralImm(MVT::v2f16);
512   }
513 
514   bool isVISrcB32() const {
515     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
516   }
517 
518   bool isVISrcB16() const {
519     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
520   }
521 
522   bool isVISrcV2B16() const {
523     return isVISrcB16();
524   }
525 
526   bool isVISrcF32() const {
527     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
528   }
529 
530   bool isVISrcF16() const {
531     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
532   }
533 
534   bool isVISrcV2F16() const {
535     return isVISrcF16() || isVISrcB32();
536   }
537 
538   bool isAISrcB32() const {
539     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
540   }
541 
542   bool isAISrcB16() const {
543     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
544   }
545 
546   bool isAISrcV2B16() const {
547     return isAISrcB16();
548   }
549 
550   bool isAISrcF32() const {
551     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
552   }
553 
554   bool isAISrcF16() const {
555     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
556   }
557 
558   bool isAISrcV2F16() const {
559     return isAISrcF16() || isAISrcB32();
560   }
561 
562   bool isAISrc_128B32() const {
563     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
564   }
565 
566   bool isAISrc_128B16() const {
567     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
568   }
569 
570   bool isAISrc_128V2B16() const {
571     return isAISrc_128B16();
572   }
573 
574   bool isAISrc_128F32() const {
575     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
576   }
577 
578   bool isAISrc_128F16() const {
579     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
580   }
581 
582   bool isAISrc_128V2F16() const {
583     return isAISrc_128F16() || isAISrc_128B32();
584   }
585 
586   bool isAISrc_512B32() const {
587     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
588   }
589 
590   bool isAISrc_512B16() const {
591     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
592   }
593 
594   bool isAISrc_512V2B16() const {
595     return isAISrc_512B16();
596   }
597 
598   bool isAISrc_512F32() const {
599     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
600   }
601 
602   bool isAISrc_512F16() const {
603     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
604   }
605 
606   bool isAISrc_512V2F16() const {
607     return isAISrc_512F16() || isAISrc_512B32();
608   }
609 
610   bool isAISrc_1024B32() const {
611     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
612   }
613 
614   bool isAISrc_1024B16() const {
615     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
616   }
617 
618   bool isAISrc_1024V2B16() const {
619     return isAISrc_1024B16();
620   }
621 
622   bool isAISrc_1024F32() const {
623     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
624   }
625 
626   bool isAISrc_1024F16() const {
627     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
628   }
629 
630   bool isAISrc_1024V2F16() const {
631     return isAISrc_1024F16() || isAISrc_1024B32();
632   }
633 
634   bool isKImmFP32() const {
635     return isLiteralImm(MVT::f32);
636   }
637 
638   bool isKImmFP16() const {
639     return isLiteralImm(MVT::f16);
640   }
641 
642   bool isMem() const override {
643     return false;
644   }
645 
646   bool isExpr() const {
647     return Kind == Expression;
648   }
649 
650   bool isSoppBrTarget() const {
651     return isExpr() || isImm();
652   }
653 
654   bool isSWaitCnt() const;
655   bool isHwreg() const;
656   bool isSendMsg() const;
657   bool isSwizzle() const;
658   bool isSMRDOffset8() const;
659   bool isSMRDOffset20() const;
660   bool isSMRDLiteralOffset() const;
661   bool isDPP8() const;
662   bool isDPPCtrl() const;
663   bool isBLGP() const;
664   bool isCBSZ() const;
665   bool isABID() const;
666   bool isGPRIdxMode() const;
667   bool isS16Imm() const;
668   bool isU16Imm() const;
669   bool isEndpgm() const;
670 
671   StringRef getExpressionAsToken() const {
672     assert(isExpr());
673     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
674     return S->getSymbol().getName();
675   }
676 
677   StringRef getToken() const {
678     assert(isToken());
679 
680     if (Kind == Expression)
681       return getExpressionAsToken();
682 
683     return StringRef(Tok.Data, Tok.Length);
684   }
685 
686   int64_t getImm() const {
687     assert(isImm());
688     return Imm.Val;
689   }
690 
691   ImmTy getImmTy() const {
692     assert(isImm());
693     return Imm.Type;
694   }
695 
696   unsigned getReg() const override {
697     assert(isRegKind());
698     return Reg.RegNo;
699   }
700 
701   SMLoc getStartLoc() const override {
702     return StartLoc;
703   }
704 
705   SMLoc getEndLoc() const override {
706     return EndLoc;
707   }
708 
709   SMRange getLocRange() const {
710     return SMRange(StartLoc, EndLoc);
711   }
712 
713   Modifiers getModifiers() const {
714     assert(isRegKind() || isImmTy(ImmTyNone));
715     return isRegKind() ? Reg.Mods : Imm.Mods;
716   }
717 
718   void setModifiers(Modifiers Mods) {
719     assert(isRegKind() || isImmTy(ImmTyNone));
720     if (isRegKind())
721       Reg.Mods = Mods;
722     else
723       Imm.Mods = Mods;
724   }
725 
726   bool hasModifiers() const {
727     return getModifiers().hasModifiers();
728   }
729 
730   bool hasFPModifiers() const {
731     return getModifiers().hasFPModifiers();
732   }
733 
734   bool hasIntModifiers() const {
735     return getModifiers().hasIntModifiers();
736   }
737 
738   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
739 
740   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
741 
742   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
743 
744   template <unsigned Bitwidth>
745   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
746 
747   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
748     addKImmFPOperands<16>(Inst, N);
749   }
750 
751   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
752     addKImmFPOperands<32>(Inst, N);
753   }
754 
755   void addRegOperands(MCInst &Inst, unsigned N) const;
756 
757   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
758     addRegOperands(Inst, N);
759   }
760 
761   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
762     if (isRegKind())
763       addRegOperands(Inst, N);
764     else if (isExpr())
765       Inst.addOperand(MCOperand::createExpr(Expr));
766     else
767       addImmOperands(Inst, N);
768   }
769 
770   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
771     Modifiers Mods = getModifiers();
772     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
773     if (isRegKind()) {
774       addRegOperands(Inst, N);
775     } else {
776       addImmOperands(Inst, N, false);
777     }
778   }
779 
780   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
781     assert(!hasIntModifiers());
782     addRegOrImmWithInputModsOperands(Inst, N);
783   }
784 
785   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
786     assert(!hasFPModifiers());
787     addRegOrImmWithInputModsOperands(Inst, N);
788   }
789 
790   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
791     Modifiers Mods = getModifiers();
792     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
793     assert(isRegKind());
794     addRegOperands(Inst, N);
795   }
796 
797   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
798     assert(!hasIntModifiers());
799     addRegWithInputModsOperands(Inst, N);
800   }
801 
802   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
803     assert(!hasFPModifiers());
804     addRegWithInputModsOperands(Inst, N);
805   }
806 
807   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
808     if (isImm())
809       addImmOperands(Inst, N);
810     else {
811       assert(isExpr());
812       Inst.addOperand(MCOperand::createExpr(Expr));
813     }
814   }
815 
816   static void printImmTy(raw_ostream& OS, ImmTy Type) {
817     switch (Type) {
818     case ImmTyNone: OS << "None"; break;
819     case ImmTyGDS: OS << "GDS"; break;
820     case ImmTyLDS: OS << "LDS"; break;
821     case ImmTyOffen: OS << "Offen"; break;
822     case ImmTyIdxen: OS << "Idxen"; break;
823     case ImmTyAddr64: OS << "Addr64"; break;
824     case ImmTyOffset: OS << "Offset"; break;
825     case ImmTyInstOffset: OS << "InstOffset"; break;
826     case ImmTyOffset0: OS << "Offset0"; break;
827     case ImmTyOffset1: OS << "Offset1"; break;
828     case ImmTyDLC: OS << "DLC"; break;
829     case ImmTyGLC: OS << "GLC"; break;
830     case ImmTySLC: OS << "SLC"; break;
831     case ImmTySWZ: OS << "SWZ"; break;
832     case ImmTyTFE: OS << "TFE"; break;
833     case ImmTyD16: OS << "D16"; break;
834     case ImmTyFORMAT: OS << "FORMAT"; break;
835     case ImmTyClampSI: OS << "ClampSI"; break;
836     case ImmTyOModSI: OS << "OModSI"; break;
837     case ImmTyDPP8: OS << "DPP8"; break;
838     case ImmTyDppCtrl: OS << "DppCtrl"; break;
839     case ImmTyDppRowMask: OS << "DppRowMask"; break;
840     case ImmTyDppBankMask: OS << "DppBankMask"; break;
841     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
842     case ImmTyDppFi: OS << "FI"; break;
843     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
844     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
845     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
846     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
847     case ImmTyDMask: OS << "DMask"; break;
848     case ImmTyDim: OS << "Dim"; break;
849     case ImmTyUNorm: OS << "UNorm"; break;
850     case ImmTyDA: OS << "DA"; break;
851     case ImmTyR128A16: OS << "R128A16"; break;
852     case ImmTyA16: OS << "A16"; break;
853     case ImmTyLWE: OS << "LWE"; break;
854     case ImmTyOff: OS << "Off"; break;
855     case ImmTyExpTgt: OS << "ExpTgt"; break;
856     case ImmTyExpCompr: OS << "ExpCompr"; break;
857     case ImmTyExpVM: OS << "ExpVM"; break;
858     case ImmTyHwreg: OS << "Hwreg"; break;
859     case ImmTySendMsg: OS << "SendMsg"; break;
860     case ImmTyInterpSlot: OS << "InterpSlot"; break;
861     case ImmTyInterpAttr: OS << "InterpAttr"; break;
862     case ImmTyAttrChan: OS << "AttrChan"; break;
863     case ImmTyOpSel: OS << "OpSel"; break;
864     case ImmTyOpSelHi: OS << "OpSelHi"; break;
865     case ImmTyNegLo: OS << "NegLo"; break;
866     case ImmTyNegHi: OS << "NegHi"; break;
867     case ImmTySwizzle: OS << "Swizzle"; break;
868     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
869     case ImmTyHigh: OS << "High"; break;
870     case ImmTyBLGP: OS << "BLGP"; break;
871     case ImmTyCBSZ: OS << "CBSZ"; break;
872     case ImmTyABID: OS << "ABID"; break;
873     case ImmTyEndpgm: OS << "Endpgm"; break;
874     }
875   }
876 
877   void print(raw_ostream &OS) const override {
878     switch (Kind) {
879     case Register:
880       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
881       break;
882     case Immediate:
883       OS << '<' << getImm();
884       if (getImmTy() != ImmTyNone) {
885         OS << " type: "; printImmTy(OS, getImmTy());
886       }
887       OS << " mods: " << Imm.Mods << '>';
888       break;
889     case Token:
890       OS << '\'' << getToken() << '\'';
891       break;
892     case Expression:
893       OS << "<expr " << *Expr << '>';
894       break;
895     }
896   }
897 
898   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
899                                       int64_t Val, SMLoc Loc,
900                                       ImmTy Type = ImmTyNone,
901                                       bool IsFPImm = false) {
902     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
903     Op->Imm.Val = Val;
904     Op->Imm.IsFPImm = IsFPImm;
905     Op->Imm.Type = Type;
906     Op->Imm.Mods = Modifiers();
907     Op->StartLoc = Loc;
908     Op->EndLoc = Loc;
909     return Op;
910   }
911 
912   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
913                                         StringRef Str, SMLoc Loc,
914                                         bool HasExplicitEncodingSize = true) {
915     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
916     Res->Tok.Data = Str.data();
917     Res->Tok.Length = Str.size();
918     Res->StartLoc = Loc;
919     Res->EndLoc = Loc;
920     return Res;
921   }
922 
923   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
924                                       unsigned RegNo, SMLoc S,
925                                       SMLoc E) {
926     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
927     Op->Reg.RegNo = RegNo;
928     Op->Reg.Mods = Modifiers();
929     Op->StartLoc = S;
930     Op->EndLoc = E;
931     return Op;
932   }
933 
934   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
935                                        const class MCExpr *Expr, SMLoc S) {
936     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
937     Op->Expr = Expr;
938     Op->StartLoc = S;
939     Op->EndLoc = S;
940     return Op;
941   }
942 };
943 
944 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
945   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
946   return OS;
947 }
948 
949 //===----------------------------------------------------------------------===//
950 // AsmParser
951 //===----------------------------------------------------------------------===//
952 
953 // Holds info related to the current kernel, e.g. count of SGPRs used.
954 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
955 // .amdgpu_hsa_kernel or at EOF.
956 class KernelScopeInfo {
957   int SgprIndexUnusedMin = -1;
958   int VgprIndexUnusedMin = -1;
959   MCContext *Ctx = nullptr;
960 
961   void usesSgprAt(int i) {
962     if (i >= SgprIndexUnusedMin) {
963       SgprIndexUnusedMin = ++i;
964       if (Ctx) {
965         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
966         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
967       }
968     }
969   }
970 
971   void usesVgprAt(int i) {
972     if (i >= VgprIndexUnusedMin) {
973       VgprIndexUnusedMin = ++i;
974       if (Ctx) {
975         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
976         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
977       }
978     }
979   }
980 
981 public:
982   KernelScopeInfo() = default;
983 
984   void initialize(MCContext &Context) {
985     Ctx = &Context;
986     usesSgprAt(SgprIndexUnusedMin = -1);
987     usesVgprAt(VgprIndexUnusedMin = -1);
988   }
989 
990   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
991     switch (RegKind) {
992       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
993       case IS_AGPR: // fall through
994       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
995       default: break;
996     }
997   }
998 };
999 
1000 class AMDGPUAsmParser : public MCTargetAsmParser {
1001   MCAsmParser &Parser;
1002 
1003   // Number of extra operands parsed after the first optional operand.
1004   // This may be necessary to skip hardcoded mandatory operands.
1005   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1006 
1007   unsigned ForcedEncodingSize = 0;
1008   bool ForcedDPP = false;
1009   bool ForcedSDWA = false;
1010   KernelScopeInfo KernelScope;
1011 
1012   /// @name Auto-generated Match Functions
1013   /// {
1014 
1015 #define GET_ASSEMBLER_HEADER
1016 #include "AMDGPUGenAsmMatcher.inc"
1017 
1018   /// }
1019 
1020 private:
1021   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1022   bool OutOfRangeError(SMRange Range);
1023   /// Calculate VGPR/SGPR blocks required for given target, reserved
1024   /// registers, and user-specified NextFreeXGPR values.
1025   ///
1026   /// \param Features [in] Target features, used for bug corrections.
1027   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1028   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1029   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1030   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1031   /// descriptor field, if valid.
1032   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1033   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1034   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1035   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1036   /// \param VGPRBlocks [out] Result VGPR block count.
1037   /// \param SGPRBlocks [out] Result SGPR block count.
1038   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1039                           bool FlatScrUsed, bool XNACKUsed,
1040                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1041                           SMRange VGPRRange, unsigned NextFreeSGPR,
1042                           SMRange SGPRRange, unsigned &VGPRBlocks,
1043                           unsigned &SGPRBlocks);
1044   bool ParseDirectiveAMDGCNTarget();
1045   bool ParseDirectiveAMDHSAKernel();
1046   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1047   bool ParseDirectiveHSACodeObjectVersion();
1048   bool ParseDirectiveHSACodeObjectISA();
1049   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1050   bool ParseDirectiveAMDKernelCodeT();
1051   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1052   bool ParseDirectiveAMDGPUHsaKernel();
1053 
1054   bool ParseDirectiveISAVersion();
1055   bool ParseDirectiveHSAMetadata();
1056   bool ParseDirectivePALMetadataBegin();
1057   bool ParseDirectivePALMetadata();
1058   bool ParseDirectiveAMDGPULDS();
1059 
1060   /// Common code to parse out a block of text (typically YAML) between start and
1061   /// end directives.
1062   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1063                            const char *AssemblerDirectiveEnd,
1064                            std::string &CollectString);
1065 
1066   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1067                              RegisterKind RegKind, unsigned Reg1);
1068   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
1069                            unsigned& RegNum, unsigned& RegWidth);
1070   unsigned ParseRegularReg(RegisterKind &RegKind,
1071                            unsigned &RegNum,
1072                            unsigned &RegWidth);
1073   unsigned ParseSpecialReg(RegisterKind &RegKind,
1074                            unsigned &RegNum,
1075                            unsigned &RegWidth);
1076   unsigned ParseRegList(RegisterKind &RegKind,
1077                         unsigned &RegNum,
1078                         unsigned &RegWidth);
1079   bool ParseRegRange(unsigned& Num, unsigned& Width);
1080   unsigned getRegularReg(RegisterKind RegKind,
1081                          unsigned RegNum,
1082                          unsigned RegWidth);
1083 
1084   bool isRegister();
1085   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1086   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1087   void initializeGprCountSymbol(RegisterKind RegKind);
1088   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1089                              unsigned RegWidth);
1090   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1091                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1092   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1093                  bool IsGdsHardcoded);
1094 
1095 public:
1096   enum AMDGPUMatchResultTy {
1097     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1098   };
1099   enum OperandMode {
1100     OperandMode_Default,
1101     OperandMode_NSA,
1102   };
1103 
1104   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1105 
1106   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1107                const MCInstrInfo &MII,
1108                const MCTargetOptions &Options)
1109       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1110     MCAsmParserExtension::Initialize(Parser);
1111 
1112     if (getFeatureBits().none()) {
1113       // Set default features.
1114       copySTI().ToggleFeature("southern-islands");
1115     }
1116 
1117     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1118 
1119     {
1120       // TODO: make those pre-defined variables read-only.
1121       // Currently there is none suitable machinery in the core llvm-mc for this.
1122       // MCSymbol::isRedefinable is intended for another purpose, and
1123       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1124       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1125       MCContext &Ctx = getContext();
1126       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1127         MCSymbol *Sym =
1128             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1129         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1130         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1131         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1132         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1133         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1134       } else {
1135         MCSymbol *Sym =
1136             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1137         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1138         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1139         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1140         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1141         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1142       }
1143       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1144         initializeGprCountSymbol(IS_VGPR);
1145         initializeGprCountSymbol(IS_SGPR);
1146       } else
1147         KernelScope.initialize(getContext());
1148     }
1149   }
1150 
1151   bool hasXNACK() const {
1152     return AMDGPU::hasXNACK(getSTI());
1153   }
1154 
1155   bool hasMIMG_R128() const {
1156     return AMDGPU::hasMIMG_R128(getSTI());
1157   }
1158 
1159   bool hasPackedD16() const {
1160     return AMDGPU::hasPackedD16(getSTI());
1161   }
1162 
1163   bool hasGFX10A16() const {
1164     return AMDGPU::hasGFX10A16(getSTI());
1165   }
1166 
1167   bool isSI() const {
1168     return AMDGPU::isSI(getSTI());
1169   }
1170 
1171   bool isCI() const {
1172     return AMDGPU::isCI(getSTI());
1173   }
1174 
1175   bool isVI() const {
1176     return AMDGPU::isVI(getSTI());
1177   }
1178 
1179   bool isGFX9() const {
1180     return AMDGPU::isGFX9(getSTI());
1181   }
1182 
1183   bool isGFX10() const {
1184     return AMDGPU::isGFX10(getSTI());
1185   }
1186 
1187   bool hasInv2PiInlineImm() const {
1188     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1189   }
1190 
1191   bool hasFlatOffsets() const {
1192     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1193   }
1194 
1195   bool hasSGPR102_SGPR103() const {
1196     return !isVI() && !isGFX9();
1197   }
1198 
1199   bool hasSGPR104_SGPR105() const {
1200     return isGFX10();
1201   }
1202 
1203   bool hasIntClamp() const {
1204     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1205   }
1206 
1207   AMDGPUTargetStreamer &getTargetStreamer() {
1208     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1209     return static_cast<AMDGPUTargetStreamer &>(TS);
1210   }
1211 
1212   const MCRegisterInfo *getMRI() const {
1213     // We need this const_cast because for some reason getContext() is not const
1214     // in MCAsmParser.
1215     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1216   }
1217 
1218   const MCInstrInfo *getMII() const {
1219     return &MII;
1220   }
1221 
1222   const FeatureBitset &getFeatureBits() const {
1223     return getSTI().getFeatureBits();
1224   }
1225 
1226   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1227   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1228   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1229 
1230   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1231   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1232   bool isForcedDPP() const { return ForcedDPP; }
1233   bool isForcedSDWA() const { return ForcedSDWA; }
1234   ArrayRef<unsigned> getMatchedVariants() const;
1235 
1236   std::unique_ptr<AMDGPUOperand> parseRegister();
1237   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1238   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1239   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1240                                       unsigned Kind) override;
1241   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1242                                OperandVector &Operands, MCStreamer &Out,
1243                                uint64_t &ErrorInfo,
1244                                bool MatchingInlineAsm) override;
1245   bool ParseDirective(AsmToken DirectiveID) override;
1246   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1247                                     OperandMode Mode = OperandMode_Default);
1248   StringRef parseMnemonicSuffix(StringRef Name);
1249   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1250                         SMLoc NameLoc, OperandVector &Operands) override;
1251   //bool ProcessInstruction(MCInst &Inst);
1252 
1253   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1254 
1255   OperandMatchResultTy
1256   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1257                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1258                      bool (*ConvertResult)(int64_t &) = nullptr);
1259 
1260   OperandMatchResultTy
1261   parseOperandArrayWithPrefix(const char *Prefix,
1262                               OperandVector &Operands,
1263                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1264                               bool (*ConvertResult)(int64_t&) = nullptr);
1265 
1266   OperandMatchResultTy
1267   parseNamedBit(const char *Name, OperandVector &Operands,
1268                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1269   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1270                                              StringRef &Value);
1271 
1272   bool isModifier();
1273   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1274   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1275   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1276   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1277   bool parseSP3NegModifier();
1278   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1279   OperandMatchResultTy parseReg(OperandVector &Operands);
1280   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1281   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1282   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1283   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1284   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1285   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1286   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1287 
1288   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1289   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1290   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1291   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1292 
1293   bool parseCnt(int64_t &IntVal);
1294   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1295   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1296 
1297 private:
1298   struct OperandInfoTy {
1299     int64_t Id;
1300     bool IsSymbolic = false;
1301     bool IsDefined = false;
1302 
1303     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1304   };
1305 
1306   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1307   bool validateSendMsg(const OperandInfoTy &Msg,
1308                        const OperandInfoTy &Op,
1309                        const OperandInfoTy &Stream,
1310                        const SMLoc Loc);
1311 
1312   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1313   bool validateHwreg(const OperandInfoTy &HwReg,
1314                      const int64_t Offset,
1315                      const int64_t Width,
1316                      const SMLoc Loc);
1317 
1318   void errorExpTgt();
1319   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1320   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1321 
1322   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1323   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1324   bool validateSOPLiteral(const MCInst &Inst) const;
1325   bool validateConstantBusLimitations(const MCInst &Inst);
1326   bool validateEarlyClobberLimitations(const MCInst &Inst);
1327   bool validateIntClampSupported(const MCInst &Inst);
1328   bool validateMIMGAtomicDMask(const MCInst &Inst);
1329   bool validateMIMGGatherDMask(const MCInst &Inst);
1330   bool validateMovrels(const MCInst &Inst);
1331   bool validateMIMGDataSize(const MCInst &Inst);
1332   bool validateMIMGAddrSize(const MCInst &Inst);
1333   bool validateMIMGD16(const MCInst &Inst);
1334   bool validateMIMGDim(const MCInst &Inst);
1335   bool validateLdsDirect(const MCInst &Inst);
1336   bool validateOpSel(const MCInst &Inst);
1337   bool validateVccOperand(unsigned Reg) const;
1338   bool validateVOP3Literal(const MCInst &Inst) const;
1339   unsigned getConstantBusLimit(unsigned Opcode) const;
1340   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1341   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1342   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1343 
1344   bool isId(const StringRef Id) const;
1345   bool isId(const AsmToken &Token, const StringRef Id) const;
1346   bool isToken(const AsmToken::TokenKind Kind) const;
1347   bool trySkipId(const StringRef Id);
1348   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1349   bool trySkipToken(const AsmToken::TokenKind Kind);
1350   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1351   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1352   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1353   AsmToken::TokenKind getTokenKind() const;
1354   bool parseExpr(int64_t &Imm);
1355   bool parseExpr(OperandVector &Operands);
1356   StringRef getTokenStr() const;
1357   AsmToken peekToken();
1358   AsmToken getToken() const;
1359   SMLoc getLoc() const;
1360   void lex();
1361 
1362 public:
1363   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1364   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1365 
1366   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1367   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1368   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1369   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1370   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1371   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1372 
1373   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1374                             const unsigned MinVal,
1375                             const unsigned MaxVal,
1376                             const StringRef ErrMsg);
1377   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1378   bool parseSwizzleOffset(int64_t &Imm);
1379   bool parseSwizzleMacro(int64_t &Imm);
1380   bool parseSwizzleQuadPerm(int64_t &Imm);
1381   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1382   bool parseSwizzleBroadcast(int64_t &Imm);
1383   bool parseSwizzleSwap(int64_t &Imm);
1384   bool parseSwizzleReverse(int64_t &Imm);
1385 
1386   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1387   int64_t parseGPRIdxMacro();
1388 
1389   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1390   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1391   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1392   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1393   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1394 
1395   AMDGPUOperand::Ptr defaultDLC() const;
1396   AMDGPUOperand::Ptr defaultGLC() const;
1397   AMDGPUOperand::Ptr defaultSLC() const;
1398 
1399   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1400   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1401   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1402   AMDGPUOperand::Ptr defaultFlatOffset() const;
1403 
1404   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1405 
1406   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1407                OptionalImmIndexMap &OptionalIdx);
1408   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1409   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1410   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1411 
1412   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1413 
1414   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1415                bool IsAtomic = false);
1416   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1417 
1418   OperandMatchResultTy parseDim(OperandVector &Operands);
1419   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1420   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1421   AMDGPUOperand::Ptr defaultRowMask() const;
1422   AMDGPUOperand::Ptr defaultBankMask() const;
1423   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1424   AMDGPUOperand::Ptr defaultFI() const;
1425   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1426   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1427 
1428   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1429                                     AMDGPUOperand::ImmTy Type);
1430   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1431   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1432   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1433   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1434   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1435   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1436   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1437                uint64_t BasicInstType,
1438                bool SkipDstVcc = false,
1439                bool SkipSrcVcc = false);
1440 
1441   AMDGPUOperand::Ptr defaultBLGP() const;
1442   AMDGPUOperand::Ptr defaultCBSZ() const;
1443   AMDGPUOperand::Ptr defaultABID() const;
1444 
1445   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1446   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1447 };
1448 
1449 struct OptionalOperand {
1450   const char *Name;
1451   AMDGPUOperand::ImmTy Type;
1452   bool IsBit;
1453   bool (*ConvertResult)(int64_t&);
1454 };
1455 
1456 } // end anonymous namespace
1457 
1458 // May be called with integer type with equivalent bitwidth.
1459 static const fltSemantics *getFltSemantics(unsigned Size) {
1460   switch (Size) {
1461   case 4:
1462     return &APFloat::IEEEsingle();
1463   case 8:
1464     return &APFloat::IEEEdouble();
1465   case 2:
1466     return &APFloat::IEEEhalf();
1467   default:
1468     llvm_unreachable("unsupported fp type");
1469   }
1470 }
1471 
1472 static const fltSemantics *getFltSemantics(MVT VT) {
1473   return getFltSemantics(VT.getSizeInBits() / 8);
1474 }
1475 
1476 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1477   switch (OperandType) {
1478   case AMDGPU::OPERAND_REG_IMM_INT32:
1479   case AMDGPU::OPERAND_REG_IMM_FP32:
1480   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1481   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1482   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1483   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1484     return &APFloat::IEEEsingle();
1485   case AMDGPU::OPERAND_REG_IMM_INT64:
1486   case AMDGPU::OPERAND_REG_IMM_FP64:
1487   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1488   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1489     return &APFloat::IEEEdouble();
1490   case AMDGPU::OPERAND_REG_IMM_INT16:
1491   case AMDGPU::OPERAND_REG_IMM_FP16:
1492   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1493   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1494   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1495   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1496   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1497   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1498   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1499   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1500   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1501   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1502     return &APFloat::IEEEhalf();
1503   default:
1504     llvm_unreachable("unsupported fp type");
1505   }
1506 }
1507 
1508 //===----------------------------------------------------------------------===//
1509 // Operand
1510 //===----------------------------------------------------------------------===//
1511 
1512 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1513   bool Lost;
1514 
1515   // Convert literal to single precision
1516   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1517                                                APFloat::rmNearestTiesToEven,
1518                                                &Lost);
1519   // We allow precision lost but not overflow or underflow
1520   if (Status != APFloat::opOK &&
1521       Lost &&
1522       ((Status & APFloat::opOverflow)  != 0 ||
1523        (Status & APFloat::opUnderflow) != 0)) {
1524     return false;
1525   }
1526 
1527   return true;
1528 }
1529 
1530 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1531   return isUIntN(Size, Val) || isIntN(Size, Val);
1532 }
1533 
1534 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1535 
1536   // This is a hack to enable named inline values like
1537   // shared_base with both 32-bit and 64-bit operands.
1538   // Note that these values are defined as
1539   // 32-bit operands only.
1540   if (isInlineValue()) {
1541     return true;
1542   }
1543 
1544   if (!isImmTy(ImmTyNone)) {
1545     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1546     return false;
1547   }
1548   // TODO: We should avoid using host float here. It would be better to
1549   // check the float bit values which is what a few other places do.
1550   // We've had bot failures before due to weird NaN support on mips hosts.
1551 
1552   APInt Literal(64, Imm.Val);
1553 
1554   if (Imm.IsFPImm) { // We got fp literal token
1555     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1556       return AMDGPU::isInlinableLiteral64(Imm.Val,
1557                                           AsmParser->hasInv2PiInlineImm());
1558     }
1559 
1560     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1561     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1562       return false;
1563 
1564     if (type.getScalarSizeInBits() == 16) {
1565       return AMDGPU::isInlinableLiteral16(
1566         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1567         AsmParser->hasInv2PiInlineImm());
1568     }
1569 
1570     // Check if single precision literal is inlinable
1571     return AMDGPU::isInlinableLiteral32(
1572       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1573       AsmParser->hasInv2PiInlineImm());
1574   }
1575 
1576   // We got int literal token.
1577   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1578     return AMDGPU::isInlinableLiteral64(Imm.Val,
1579                                         AsmParser->hasInv2PiInlineImm());
1580   }
1581 
1582   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1583     return false;
1584   }
1585 
1586   if (type.getScalarSizeInBits() == 16) {
1587     return AMDGPU::isInlinableLiteral16(
1588       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1589       AsmParser->hasInv2PiInlineImm());
1590   }
1591 
1592   return AMDGPU::isInlinableLiteral32(
1593     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1594     AsmParser->hasInv2PiInlineImm());
1595 }
1596 
1597 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1598   // Check that this immediate can be added as literal
1599   if (!isImmTy(ImmTyNone)) {
1600     return false;
1601   }
1602 
1603   if (!Imm.IsFPImm) {
1604     // We got int literal token.
1605 
1606     if (type == MVT::f64 && hasFPModifiers()) {
1607       // Cannot apply fp modifiers to int literals preserving the same semantics
1608       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1609       // disable these cases.
1610       return false;
1611     }
1612 
1613     unsigned Size = type.getSizeInBits();
1614     if (Size == 64)
1615       Size = 32;
1616 
1617     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1618     // types.
1619     return isSafeTruncation(Imm.Val, Size);
1620   }
1621 
1622   // We got fp literal token
1623   if (type == MVT::f64) { // Expected 64-bit fp operand
1624     // We would set low 64-bits of literal to zeroes but we accept this literals
1625     return true;
1626   }
1627 
1628   if (type == MVT::i64) { // Expected 64-bit int operand
1629     // We don't allow fp literals in 64-bit integer instructions. It is
1630     // unclear how we should encode them.
1631     return false;
1632   }
1633 
1634   // We allow fp literals with f16x2 operands assuming that the specified
1635   // literal goes into the lower half and the upper half is zero. We also
1636   // require that the literal may be losslesly converted to f16.
1637   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1638                      (type == MVT::v2i16)? MVT::i16 : type;
1639 
1640   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1641   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1642 }
1643 
1644 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1645   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1646 }
1647 
1648 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1649   if (AsmParser->isVI())
1650     return isVReg32();
1651   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1652     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1653   else
1654     return false;
1655 }
1656 
1657 bool AMDGPUOperand::isSDWAFP16Operand() const {
1658   return isSDWAOperand(MVT::f16);
1659 }
1660 
1661 bool AMDGPUOperand::isSDWAFP32Operand() const {
1662   return isSDWAOperand(MVT::f32);
1663 }
1664 
1665 bool AMDGPUOperand::isSDWAInt16Operand() const {
1666   return isSDWAOperand(MVT::i16);
1667 }
1668 
1669 bool AMDGPUOperand::isSDWAInt32Operand() const {
1670   return isSDWAOperand(MVT::i32);
1671 }
1672 
1673 bool AMDGPUOperand::isBoolReg() const {
1674   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1675          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1676 }
1677 
1678 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1679 {
1680   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1681   assert(Size == 2 || Size == 4 || Size == 8);
1682 
1683   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1684 
1685   if (Imm.Mods.Abs) {
1686     Val &= ~FpSignMask;
1687   }
1688   if (Imm.Mods.Neg) {
1689     Val ^= FpSignMask;
1690   }
1691 
1692   return Val;
1693 }
1694 
1695 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1696   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1697                              Inst.getNumOperands())) {
1698     addLiteralImmOperand(Inst, Imm.Val,
1699                          ApplyModifiers &
1700                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1701   } else {
1702     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1703     Inst.addOperand(MCOperand::createImm(Imm.Val));
1704   }
1705 }
1706 
1707 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1708   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1709   auto OpNum = Inst.getNumOperands();
1710   // Check that this operand accepts literals
1711   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1712 
1713   if (ApplyModifiers) {
1714     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1715     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1716     Val = applyInputFPModifiers(Val, Size);
1717   }
1718 
1719   APInt Literal(64, Val);
1720   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1721 
1722   if (Imm.IsFPImm) { // We got fp literal token
1723     switch (OpTy) {
1724     case AMDGPU::OPERAND_REG_IMM_INT64:
1725     case AMDGPU::OPERAND_REG_IMM_FP64:
1726     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1727     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1728       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1729                                        AsmParser->hasInv2PiInlineImm())) {
1730         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1731         return;
1732       }
1733 
1734       // Non-inlineable
1735       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1736         // For fp operands we check if low 32 bits are zeros
1737         if (Literal.getLoBits(32) != 0) {
1738           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1739           "Can't encode literal as exact 64-bit floating-point operand. "
1740           "Low 32-bits will be set to zero");
1741         }
1742 
1743         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1744         return;
1745       }
1746 
1747       // We don't allow fp literals in 64-bit integer instructions. It is
1748       // unclear how we should encode them. This case should be checked earlier
1749       // in predicate methods (isLiteralImm())
1750       llvm_unreachable("fp literal in 64-bit integer instruction.");
1751 
1752     case AMDGPU::OPERAND_REG_IMM_INT32:
1753     case AMDGPU::OPERAND_REG_IMM_FP32:
1754     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1755     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1756     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1757     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1758     case AMDGPU::OPERAND_REG_IMM_INT16:
1759     case AMDGPU::OPERAND_REG_IMM_FP16:
1760     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1761     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1762     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1763     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1764     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1765     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1766     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1767     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1768     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1769     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1770       bool lost;
1771       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1772       // Convert literal to single precision
1773       FPLiteral.convert(*getOpFltSemantics(OpTy),
1774                         APFloat::rmNearestTiesToEven, &lost);
1775       // We allow precision lost but not overflow or underflow. This should be
1776       // checked earlier in isLiteralImm()
1777 
1778       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1779       Inst.addOperand(MCOperand::createImm(ImmVal));
1780       return;
1781     }
1782     default:
1783       llvm_unreachable("invalid operand size");
1784     }
1785 
1786     return;
1787   }
1788 
1789   // We got int literal token.
1790   // Only sign extend inline immediates.
1791   switch (OpTy) {
1792   case AMDGPU::OPERAND_REG_IMM_INT32:
1793   case AMDGPU::OPERAND_REG_IMM_FP32:
1794   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1795   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1796   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1797   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1798   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1799   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1800     if (isSafeTruncation(Val, 32) &&
1801         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1802                                      AsmParser->hasInv2PiInlineImm())) {
1803       Inst.addOperand(MCOperand::createImm(Val));
1804       return;
1805     }
1806 
1807     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1808     return;
1809 
1810   case AMDGPU::OPERAND_REG_IMM_INT64:
1811   case AMDGPU::OPERAND_REG_IMM_FP64:
1812   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1813   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1814     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1815       Inst.addOperand(MCOperand::createImm(Val));
1816       return;
1817     }
1818 
1819     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1820     return;
1821 
1822   case AMDGPU::OPERAND_REG_IMM_INT16:
1823   case AMDGPU::OPERAND_REG_IMM_FP16:
1824   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1825   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1826   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1827   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1828     if (isSafeTruncation(Val, 16) &&
1829         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1830                                      AsmParser->hasInv2PiInlineImm())) {
1831       Inst.addOperand(MCOperand::createImm(Val));
1832       return;
1833     }
1834 
1835     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1836     return;
1837 
1838   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1839   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1840   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1841   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1842     assert(isSafeTruncation(Val, 16));
1843     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1844                                         AsmParser->hasInv2PiInlineImm()));
1845 
1846     Inst.addOperand(MCOperand::createImm(Val));
1847     return;
1848   }
1849   default:
1850     llvm_unreachable("invalid operand size");
1851   }
1852 }
1853 
1854 template <unsigned Bitwidth>
1855 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1856   APInt Literal(64, Imm.Val);
1857 
1858   if (!Imm.IsFPImm) {
1859     // We got int literal token.
1860     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1861     return;
1862   }
1863 
1864   bool Lost;
1865   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1866   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1867                     APFloat::rmNearestTiesToEven, &Lost);
1868   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1869 }
1870 
1871 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1872   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1873 }
1874 
1875 static bool isInlineValue(unsigned Reg) {
1876   switch (Reg) {
1877   case AMDGPU::SRC_SHARED_BASE:
1878   case AMDGPU::SRC_SHARED_LIMIT:
1879   case AMDGPU::SRC_PRIVATE_BASE:
1880   case AMDGPU::SRC_PRIVATE_LIMIT:
1881   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1882     return true;
1883   case AMDGPU::SRC_VCCZ:
1884   case AMDGPU::SRC_EXECZ:
1885   case AMDGPU::SRC_SCC:
1886     return true;
1887   case AMDGPU::SGPR_NULL:
1888     return true;
1889   default:
1890     return false;
1891   }
1892 }
1893 
1894 bool AMDGPUOperand::isInlineValue() const {
1895   return isRegKind() && ::isInlineValue(getReg());
1896 }
1897 
1898 //===----------------------------------------------------------------------===//
1899 // AsmParser
1900 //===----------------------------------------------------------------------===//
1901 
1902 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1903   if (Is == IS_VGPR) {
1904     switch (RegWidth) {
1905       default: return -1;
1906       case 1: return AMDGPU::VGPR_32RegClassID;
1907       case 2: return AMDGPU::VReg_64RegClassID;
1908       case 3: return AMDGPU::VReg_96RegClassID;
1909       case 4: return AMDGPU::VReg_128RegClassID;
1910       case 5: return AMDGPU::VReg_160RegClassID;
1911       case 8: return AMDGPU::VReg_256RegClassID;
1912       case 16: return AMDGPU::VReg_512RegClassID;
1913       case 32: return AMDGPU::VReg_1024RegClassID;
1914     }
1915   } else if (Is == IS_TTMP) {
1916     switch (RegWidth) {
1917       default: return -1;
1918       case 1: return AMDGPU::TTMP_32RegClassID;
1919       case 2: return AMDGPU::TTMP_64RegClassID;
1920       case 4: return AMDGPU::TTMP_128RegClassID;
1921       case 8: return AMDGPU::TTMP_256RegClassID;
1922       case 16: return AMDGPU::TTMP_512RegClassID;
1923     }
1924   } else if (Is == IS_SGPR) {
1925     switch (RegWidth) {
1926       default: return -1;
1927       case 1: return AMDGPU::SGPR_32RegClassID;
1928       case 2: return AMDGPU::SGPR_64RegClassID;
1929       case 4: return AMDGPU::SGPR_128RegClassID;
1930       case 8: return AMDGPU::SGPR_256RegClassID;
1931       case 16: return AMDGPU::SGPR_512RegClassID;
1932     }
1933   } else if (Is == IS_AGPR) {
1934     switch (RegWidth) {
1935       default: return -1;
1936       case 1: return AMDGPU::AGPR_32RegClassID;
1937       case 2: return AMDGPU::AReg_64RegClassID;
1938       case 4: return AMDGPU::AReg_128RegClassID;
1939       case 16: return AMDGPU::AReg_512RegClassID;
1940       case 32: return AMDGPU::AReg_1024RegClassID;
1941     }
1942   }
1943   return -1;
1944 }
1945 
1946 static unsigned getSpecialRegForName(StringRef RegName) {
1947   return StringSwitch<unsigned>(RegName)
1948     .Case("exec", AMDGPU::EXEC)
1949     .Case("vcc", AMDGPU::VCC)
1950     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1951     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1952     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1953     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1954     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1955     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1956     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1957     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1958     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1959     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1960     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1961     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1962     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1963     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1964     .Case("m0", AMDGPU::M0)
1965     .Case("vccz", AMDGPU::SRC_VCCZ)
1966     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1967     .Case("execz", AMDGPU::SRC_EXECZ)
1968     .Case("src_execz", AMDGPU::SRC_EXECZ)
1969     .Case("scc", AMDGPU::SRC_SCC)
1970     .Case("src_scc", AMDGPU::SRC_SCC)
1971     .Case("tba", AMDGPU::TBA)
1972     .Case("tma", AMDGPU::TMA)
1973     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1974     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1975     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1976     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1977     .Case("vcc_lo", AMDGPU::VCC_LO)
1978     .Case("vcc_hi", AMDGPU::VCC_HI)
1979     .Case("exec_lo", AMDGPU::EXEC_LO)
1980     .Case("exec_hi", AMDGPU::EXEC_HI)
1981     .Case("tma_lo", AMDGPU::TMA_LO)
1982     .Case("tma_hi", AMDGPU::TMA_HI)
1983     .Case("tba_lo", AMDGPU::TBA_LO)
1984     .Case("tba_hi", AMDGPU::TBA_HI)
1985     .Case("null", AMDGPU::SGPR_NULL)
1986     .Default(AMDGPU::NoRegister);
1987 }
1988 
1989 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1990                                     SMLoc &EndLoc) {
1991   auto R = parseRegister();
1992   if (!R) return true;
1993   assert(R->isReg());
1994   RegNo = R->getReg();
1995   StartLoc = R->getStartLoc();
1996   EndLoc = R->getEndLoc();
1997   return false;
1998 }
1999 
2000 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2001                                             RegisterKind RegKind, unsigned Reg1) {
2002   switch (RegKind) {
2003   case IS_SPECIAL:
2004     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2005       Reg = AMDGPU::EXEC;
2006       RegWidth = 2;
2007       return true;
2008     }
2009     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2010       Reg = AMDGPU::FLAT_SCR;
2011       RegWidth = 2;
2012       return true;
2013     }
2014     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2015       Reg = AMDGPU::XNACK_MASK;
2016       RegWidth = 2;
2017       return true;
2018     }
2019     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2020       Reg = AMDGPU::VCC;
2021       RegWidth = 2;
2022       return true;
2023     }
2024     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2025       Reg = AMDGPU::TBA;
2026       RegWidth = 2;
2027       return true;
2028     }
2029     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2030       Reg = AMDGPU::TMA;
2031       RegWidth = 2;
2032       return true;
2033     }
2034     return false;
2035   case IS_VGPR:
2036   case IS_SGPR:
2037   case IS_AGPR:
2038   case IS_TTMP:
2039     if (Reg1 != Reg + RegWidth) {
2040       return false;
2041     }
2042     RegWidth++;
2043     return true;
2044   default:
2045     llvm_unreachable("unexpected register kind");
2046   }
2047 }
2048 
2049 struct RegInfo {
2050   StringLiteral Name;
2051   RegisterKind Kind;
2052 };
2053 
2054 static constexpr RegInfo RegularRegisters[] = {
2055   {{"v"},    IS_VGPR},
2056   {{"s"},    IS_SGPR},
2057   {{"ttmp"}, IS_TTMP},
2058   {{"acc"},  IS_AGPR},
2059   {{"a"},    IS_AGPR},
2060 };
2061 
2062 static bool isRegularReg(RegisterKind Kind) {
2063   return Kind == IS_VGPR ||
2064          Kind == IS_SGPR ||
2065          Kind == IS_TTMP ||
2066          Kind == IS_AGPR;
2067 }
2068 
2069 static const RegInfo* getRegularRegInfo(StringRef Str) {
2070   for (const RegInfo &Reg : RegularRegisters)
2071     if (Str.startswith(Reg.Name))
2072       return &Reg;
2073   return nullptr;
2074 }
2075 
2076 static bool getRegNum(StringRef Str, unsigned& Num) {
2077   return !Str.getAsInteger(10, Num);
2078 }
2079 
2080 bool
2081 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2082                             const AsmToken &NextToken) const {
2083 
2084   // A list of consecutive registers: [s0,s1,s2,s3]
2085   if (Token.is(AsmToken::LBrac))
2086     return true;
2087 
2088   if (!Token.is(AsmToken::Identifier))
2089     return false;
2090 
2091   // A single register like s0 or a range of registers like s[0:1]
2092 
2093   StringRef Str = Token.getString();
2094   const RegInfo *Reg = getRegularRegInfo(Str);
2095   if (Reg) {
2096     StringRef RegName = Reg->Name;
2097     StringRef RegSuffix = Str.substr(RegName.size());
2098     if (!RegSuffix.empty()) {
2099       unsigned Num;
2100       // A single register with an index: rXX
2101       if (getRegNum(RegSuffix, Num))
2102         return true;
2103     } else {
2104       // A range of registers: r[XX:YY].
2105       if (NextToken.is(AsmToken::LBrac))
2106         return true;
2107     }
2108   }
2109 
2110   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2111 }
2112 
2113 bool
2114 AMDGPUAsmParser::isRegister()
2115 {
2116   return isRegister(getToken(), peekToken());
2117 }
2118 
2119 unsigned
2120 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2121                                unsigned RegNum,
2122                                unsigned RegWidth) {
2123 
2124   assert(isRegularReg(RegKind));
2125 
2126   unsigned AlignSize = 1;
2127   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2128     // SGPR and TTMP registers must be aligned.
2129     // Max required alignment is 4 dwords.
2130     AlignSize = std::min(RegWidth, 4u);
2131   }
2132 
2133   if (RegNum % AlignSize != 0)
2134     return AMDGPU::NoRegister;
2135 
2136   unsigned RegIdx = RegNum / AlignSize;
2137   int RCID = getRegClass(RegKind, RegWidth);
2138   if (RCID == -1)
2139     return AMDGPU::NoRegister;
2140 
2141   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2142   const MCRegisterClass RC = TRI->getRegClass(RCID);
2143   if (RegIdx >= RC.getNumRegs())
2144     return AMDGPU::NoRegister;
2145 
2146   return RC.getRegister(RegIdx);
2147 }
2148 
2149 bool
2150 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2151   int64_t RegLo, RegHi;
2152   if (!trySkipToken(AsmToken::LBrac))
2153     return false;
2154 
2155   if (!parseExpr(RegLo))
2156     return false;
2157 
2158   if (trySkipToken(AsmToken::Colon)) {
2159     if (!parseExpr(RegHi))
2160       return false;
2161   } else {
2162     RegHi = RegLo;
2163   }
2164 
2165   if (!trySkipToken(AsmToken::RBrac))
2166     return false;
2167 
2168   if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi)
2169     return false;
2170 
2171   Num = static_cast<unsigned>(RegLo);
2172   Width = (RegHi - RegLo) + 1;
2173   return true;
2174 }
2175 
2176 unsigned
2177 AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2178                                  unsigned &RegNum,
2179                                  unsigned &RegWidth) {
2180   assert(isToken(AsmToken::Identifier));
2181   unsigned Reg = getSpecialRegForName(getTokenStr());
2182   if (Reg) {
2183     RegNum = 0;
2184     RegWidth = 1;
2185     RegKind = IS_SPECIAL;
2186     lex(); // skip register name
2187   }
2188   return Reg;
2189 }
2190 
2191 unsigned
2192 AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2193                                  unsigned &RegNum,
2194                                  unsigned &RegWidth) {
2195   assert(isToken(AsmToken::Identifier));
2196   StringRef RegName = getTokenStr();
2197 
2198   const RegInfo *RI = getRegularRegInfo(RegName);
2199   if (!RI)
2200     return AMDGPU::NoRegister;
2201   lex(); // skip register name
2202 
2203   RegKind = RI->Kind;
2204   StringRef RegSuffix = RegName.substr(RI->Name.size());
2205   if (!RegSuffix.empty()) {
2206     // Single 32-bit register: vXX.
2207     if (!getRegNum(RegSuffix, RegNum))
2208       return AMDGPU::NoRegister;
2209     RegWidth = 1;
2210   } else {
2211     // Range of registers: v[XX:YY]. ":YY" is optional.
2212     if (!ParseRegRange(RegNum, RegWidth))
2213       return AMDGPU::NoRegister;
2214   }
2215 
2216   return getRegularReg(RegKind, RegNum, RegWidth);
2217 }
2218 
2219 unsigned
2220 AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
2221                               unsigned &RegNum,
2222                               unsigned &RegWidth) {
2223   unsigned Reg = AMDGPU::NoRegister;
2224 
2225   if (!trySkipToken(AsmToken::LBrac))
2226     return AMDGPU::NoRegister;
2227 
2228   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2229 
2230   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2231     return AMDGPU::NoRegister;
2232   if (RegWidth != 1)
2233     return AMDGPU::NoRegister;
2234 
2235   for (; trySkipToken(AsmToken::Comma); ) {
2236     RegisterKind NextRegKind;
2237     unsigned NextReg, NextRegNum, NextRegWidth;
2238 
2239     if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth))
2240       return AMDGPU::NoRegister;
2241     if (NextRegWidth != 1)
2242       return AMDGPU::NoRegister;
2243     if (NextRegKind != RegKind)
2244       return AMDGPU::NoRegister;
2245     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg))
2246       return AMDGPU::NoRegister;
2247   }
2248 
2249   if (!trySkipToken(AsmToken::RBrac))
2250     return AMDGPU::NoRegister;
2251 
2252   if (isRegularReg(RegKind))
2253     Reg = getRegularReg(RegKind, RegNum, RegWidth);
2254 
2255   return Reg;
2256 }
2257 
2258 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
2259                                           unsigned &Reg,
2260                                           unsigned &RegNum,
2261                                           unsigned &RegWidth) {
2262   Reg = AMDGPU::NoRegister;
2263 
2264   if (isToken(AsmToken::Identifier)) {
2265     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth);
2266     if (Reg == AMDGPU::NoRegister)
2267       Reg = ParseRegularReg(RegKind, RegNum, RegWidth);
2268   } else {
2269     Reg = ParseRegList(RegKind, RegNum, RegWidth);
2270   }
2271 
2272   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2273   return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg);
2274 }
2275 
2276 Optional<StringRef>
2277 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2278   switch (RegKind) {
2279   case IS_VGPR:
2280     return StringRef(".amdgcn.next_free_vgpr");
2281   case IS_SGPR:
2282     return StringRef(".amdgcn.next_free_sgpr");
2283   default:
2284     return None;
2285   }
2286 }
2287 
2288 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2289   auto SymbolName = getGprCountSymbolName(RegKind);
2290   assert(SymbolName && "initializing invalid register kind");
2291   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2292   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2293 }
2294 
2295 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2296                                             unsigned DwordRegIndex,
2297                                             unsigned RegWidth) {
2298   // Symbols are only defined for GCN targets
2299   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2300     return true;
2301 
2302   auto SymbolName = getGprCountSymbolName(RegKind);
2303   if (!SymbolName)
2304     return true;
2305   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2306 
2307   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2308   int64_t OldCount;
2309 
2310   if (!Sym->isVariable())
2311     return !Error(getParser().getTok().getLoc(),
2312                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2313   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2314     return !Error(
2315         getParser().getTok().getLoc(),
2316         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2317 
2318   if (OldCount <= NewMax)
2319     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2320 
2321   return true;
2322 }
2323 
2324 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2325   const auto &Tok = Parser.getTok();
2326   SMLoc StartLoc = Tok.getLoc();
2327   SMLoc EndLoc = Tok.getEndLoc();
2328   RegisterKind RegKind;
2329   unsigned Reg, RegNum, RegWidth;
2330 
2331   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2332     //FIXME: improve error messages (bug 41303).
2333     Error(StartLoc, "not a valid operand.");
2334     return nullptr;
2335   }
2336   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2337     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2338       return nullptr;
2339   } else
2340     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2341   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2342 }
2343 
2344 OperandMatchResultTy
2345 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2346   // TODO: add syntactic sugar for 1/(2*PI)
2347 
2348   assert(!isRegister());
2349   assert(!isModifier());
2350 
2351   const auto& Tok = getToken();
2352   const auto& NextTok = peekToken();
2353   bool IsReal = Tok.is(AsmToken::Real);
2354   SMLoc S = getLoc();
2355   bool Negate = false;
2356 
2357   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2358     lex();
2359     IsReal = true;
2360     Negate = true;
2361   }
2362 
2363   if (IsReal) {
2364     // Floating-point expressions are not supported.
2365     // Can only allow floating-point literals with an
2366     // optional sign.
2367 
2368     StringRef Num = getTokenStr();
2369     lex();
2370 
2371     APFloat RealVal(APFloat::IEEEdouble());
2372     auto roundMode = APFloat::rmNearestTiesToEven;
2373     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2374       return MatchOperand_ParseFail;
2375     }
2376     if (Negate)
2377       RealVal.changeSign();
2378 
2379     Operands.push_back(
2380       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2381                                AMDGPUOperand::ImmTyNone, true));
2382 
2383     return MatchOperand_Success;
2384 
2385   } else {
2386     int64_t IntVal;
2387     const MCExpr *Expr;
2388     SMLoc S = getLoc();
2389 
2390     if (HasSP3AbsModifier) {
2391       // This is a workaround for handling expressions
2392       // as arguments of SP3 'abs' modifier, for example:
2393       //     |1.0|
2394       //     |-1|
2395       //     |1+x|
2396       // This syntax is not compatible with syntax of standard
2397       // MC expressions (due to the trailing '|').
2398       SMLoc EndLoc;
2399       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2400         return MatchOperand_ParseFail;
2401     } else {
2402       if (Parser.parseExpression(Expr))
2403         return MatchOperand_ParseFail;
2404     }
2405 
2406     if (Expr->evaluateAsAbsolute(IntVal)) {
2407       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2408     } else {
2409       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2410     }
2411 
2412     return MatchOperand_Success;
2413   }
2414 
2415   return MatchOperand_NoMatch;
2416 }
2417 
2418 OperandMatchResultTy
2419 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2420   if (!isRegister())
2421     return MatchOperand_NoMatch;
2422 
2423   if (auto R = parseRegister()) {
2424     assert(R->isReg());
2425     Operands.push_back(std::move(R));
2426     return MatchOperand_Success;
2427   }
2428   return MatchOperand_ParseFail;
2429 }
2430 
2431 OperandMatchResultTy
2432 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2433   auto res = parseReg(Operands);
2434   if (res != MatchOperand_NoMatch) {
2435     return res;
2436   } else if (isModifier()) {
2437     return MatchOperand_NoMatch;
2438   } else {
2439     return parseImm(Operands, HasSP3AbsMod);
2440   }
2441 }
2442 
2443 bool
2444 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2445   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2446     const auto &str = Token.getString();
2447     return str == "abs" || str == "neg" || str == "sext";
2448   }
2449   return false;
2450 }
2451 
2452 bool
2453 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2454   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2455 }
2456 
2457 bool
2458 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2459   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2460 }
2461 
2462 bool
2463 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2464   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2465 }
2466 
2467 // Check if this is an operand modifier or an opcode modifier
2468 // which may look like an expression but it is not. We should
2469 // avoid parsing these modifiers as expressions. Currently
2470 // recognized sequences are:
2471 //   |...|
2472 //   abs(...)
2473 //   neg(...)
2474 //   sext(...)
2475 //   -reg
2476 //   -|...|
2477 //   -abs(...)
2478 //   name:...
2479 // Note that simple opcode modifiers like 'gds' may be parsed as
2480 // expressions; this is a special case. See getExpressionAsToken.
2481 //
2482 bool
2483 AMDGPUAsmParser::isModifier() {
2484 
2485   AsmToken Tok = getToken();
2486   AsmToken NextToken[2];
2487   peekTokens(NextToken);
2488 
2489   return isOperandModifier(Tok, NextToken[0]) ||
2490          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2491          isOpcodeModifierWithVal(Tok, NextToken[0]);
2492 }
2493 
2494 // Check if the current token is an SP3 'neg' modifier.
2495 // Currently this modifier is allowed in the following context:
2496 //
2497 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2498 // 2. Before an 'abs' modifier: -abs(...)
2499 // 3. Before an SP3 'abs' modifier: -|...|
2500 //
2501 // In all other cases "-" is handled as a part
2502 // of an expression that follows the sign.
2503 //
2504 // Note: When "-" is followed by an integer literal,
2505 // this is interpreted as integer negation rather
2506 // than a floating-point NEG modifier applied to N.
2507 // Beside being contr-intuitive, such use of floating-point
2508 // NEG modifier would have resulted in different meaning
2509 // of integer literals used with VOP1/2/C and VOP3,
2510 // for example:
2511 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2512 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2513 // Negative fp literals with preceding "-" are
2514 // handled likewise for unifomtity
2515 //
2516 bool
2517 AMDGPUAsmParser::parseSP3NegModifier() {
2518 
2519   AsmToken NextToken[2];
2520   peekTokens(NextToken);
2521 
2522   if (isToken(AsmToken::Minus) &&
2523       (isRegister(NextToken[0], NextToken[1]) ||
2524        NextToken[0].is(AsmToken::Pipe) ||
2525        isId(NextToken[0], "abs"))) {
2526     lex();
2527     return true;
2528   }
2529 
2530   return false;
2531 }
2532 
2533 OperandMatchResultTy
2534 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2535                                               bool AllowImm) {
2536   bool Neg, SP3Neg;
2537   bool Abs, SP3Abs;
2538   SMLoc Loc;
2539 
2540   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2541   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2542     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2543     return MatchOperand_ParseFail;
2544   }
2545 
2546   SP3Neg = parseSP3NegModifier();
2547 
2548   Loc = getLoc();
2549   Neg = trySkipId("neg");
2550   if (Neg && SP3Neg) {
2551     Error(Loc, "expected register or immediate");
2552     return MatchOperand_ParseFail;
2553   }
2554   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2555     return MatchOperand_ParseFail;
2556 
2557   Abs = trySkipId("abs");
2558   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2559     return MatchOperand_ParseFail;
2560 
2561   Loc = getLoc();
2562   SP3Abs = trySkipToken(AsmToken::Pipe);
2563   if (Abs && SP3Abs) {
2564     Error(Loc, "expected register or immediate");
2565     return MatchOperand_ParseFail;
2566   }
2567 
2568   OperandMatchResultTy Res;
2569   if (AllowImm) {
2570     Res = parseRegOrImm(Operands, SP3Abs);
2571   } else {
2572     Res = parseReg(Operands);
2573   }
2574   if (Res != MatchOperand_Success) {
2575     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2576   }
2577 
2578   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2579     return MatchOperand_ParseFail;
2580   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2581     return MatchOperand_ParseFail;
2582   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2583     return MatchOperand_ParseFail;
2584 
2585   AMDGPUOperand::Modifiers Mods;
2586   Mods.Abs = Abs || SP3Abs;
2587   Mods.Neg = Neg || SP3Neg;
2588 
2589   if (Mods.hasFPModifiers()) {
2590     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2591     if (Op.isExpr()) {
2592       Error(Op.getStartLoc(), "expected an absolute expression");
2593       return MatchOperand_ParseFail;
2594     }
2595     Op.setModifiers(Mods);
2596   }
2597   return MatchOperand_Success;
2598 }
2599 
2600 OperandMatchResultTy
2601 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2602                                                bool AllowImm) {
2603   bool Sext = trySkipId("sext");
2604   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2605     return MatchOperand_ParseFail;
2606 
2607   OperandMatchResultTy Res;
2608   if (AllowImm) {
2609     Res = parseRegOrImm(Operands);
2610   } else {
2611     Res = parseReg(Operands);
2612   }
2613   if (Res != MatchOperand_Success) {
2614     return Sext? MatchOperand_ParseFail : Res;
2615   }
2616 
2617   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2618     return MatchOperand_ParseFail;
2619 
2620   AMDGPUOperand::Modifiers Mods;
2621   Mods.Sext = Sext;
2622 
2623   if (Mods.hasIntModifiers()) {
2624     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2625     if (Op.isExpr()) {
2626       Error(Op.getStartLoc(), "expected an absolute expression");
2627       return MatchOperand_ParseFail;
2628     }
2629     Op.setModifiers(Mods);
2630   }
2631 
2632   return MatchOperand_Success;
2633 }
2634 
2635 OperandMatchResultTy
2636 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2637   return parseRegOrImmWithFPInputMods(Operands, false);
2638 }
2639 
2640 OperandMatchResultTy
2641 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2642   return parseRegOrImmWithIntInputMods(Operands, false);
2643 }
2644 
2645 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2646   auto Loc = getLoc();
2647   if (trySkipId("off")) {
2648     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2649                                                 AMDGPUOperand::ImmTyOff, false));
2650     return MatchOperand_Success;
2651   }
2652 
2653   if (!isRegister())
2654     return MatchOperand_NoMatch;
2655 
2656   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2657   if (Reg) {
2658     Operands.push_back(std::move(Reg));
2659     return MatchOperand_Success;
2660   }
2661 
2662   return MatchOperand_ParseFail;
2663 
2664 }
2665 
2666 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2667   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2668 
2669   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2670       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2671       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2672       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2673     return Match_InvalidOperand;
2674 
2675   if ((TSFlags & SIInstrFlags::VOP3) &&
2676       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2677       getForcedEncodingSize() != 64)
2678     return Match_PreferE32;
2679 
2680   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2681       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2682     // v_mac_f32/16 allow only dst_sel == DWORD;
2683     auto OpNum =
2684         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2685     const auto &Op = Inst.getOperand(OpNum);
2686     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2687       return Match_InvalidOperand;
2688     }
2689   }
2690 
2691   return Match_Success;
2692 }
2693 
2694 // What asm variants we should check
2695 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2696   if (getForcedEncodingSize() == 32) {
2697     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2698     return makeArrayRef(Variants);
2699   }
2700 
2701   if (isForcedVOP3()) {
2702     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2703     return makeArrayRef(Variants);
2704   }
2705 
2706   if (isForcedSDWA()) {
2707     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2708                                         AMDGPUAsmVariants::SDWA9};
2709     return makeArrayRef(Variants);
2710   }
2711 
2712   if (isForcedDPP()) {
2713     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2714     return makeArrayRef(Variants);
2715   }
2716 
2717   static const unsigned Variants[] = {
2718     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2719     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2720   };
2721 
2722   return makeArrayRef(Variants);
2723 }
2724 
2725 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2726   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2727   const unsigned Num = Desc.getNumImplicitUses();
2728   for (unsigned i = 0; i < Num; ++i) {
2729     unsigned Reg = Desc.ImplicitUses[i];
2730     switch (Reg) {
2731     case AMDGPU::FLAT_SCR:
2732     case AMDGPU::VCC:
2733     case AMDGPU::VCC_LO:
2734     case AMDGPU::VCC_HI:
2735     case AMDGPU::M0:
2736       return Reg;
2737     default:
2738       break;
2739     }
2740   }
2741   return AMDGPU::NoRegister;
2742 }
2743 
2744 // NB: This code is correct only when used to check constant
2745 // bus limitations because GFX7 support no f16 inline constants.
2746 // Note that there are no cases when a GFX7 opcode violates
2747 // constant bus limitations due to the use of an f16 constant.
2748 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2749                                        unsigned OpIdx) const {
2750   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2751 
2752   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2753     return false;
2754   }
2755 
2756   const MCOperand &MO = Inst.getOperand(OpIdx);
2757 
2758   int64_t Val = MO.getImm();
2759   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2760 
2761   switch (OpSize) { // expected operand size
2762   case 8:
2763     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2764   case 4:
2765     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2766   case 2: {
2767     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2768     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2769         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2770         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2771         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2772         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2773         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2774       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2775     } else {
2776       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2777     }
2778   }
2779   default:
2780     llvm_unreachable("invalid operand size");
2781   }
2782 }
2783 
2784 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2785   if (!isGFX10())
2786     return 1;
2787 
2788   switch (Opcode) {
2789   // 64-bit shift instructions can use only one scalar value input
2790   case AMDGPU::V_LSHLREV_B64:
2791   case AMDGPU::V_LSHLREV_B64_gfx10:
2792   case AMDGPU::V_LSHL_B64:
2793   case AMDGPU::V_LSHRREV_B64:
2794   case AMDGPU::V_LSHRREV_B64_gfx10:
2795   case AMDGPU::V_LSHR_B64:
2796   case AMDGPU::V_ASHRREV_I64:
2797   case AMDGPU::V_ASHRREV_I64_gfx10:
2798   case AMDGPU::V_ASHR_I64:
2799     return 1;
2800   default:
2801     return 2;
2802   }
2803 }
2804 
2805 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2806   const MCOperand &MO = Inst.getOperand(OpIdx);
2807   if (MO.isImm()) {
2808     return !isInlineConstant(Inst, OpIdx);
2809   } else if (MO.isReg()) {
2810     auto Reg = MO.getReg();
2811     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2812     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2813   } else {
2814     return true;
2815   }
2816 }
2817 
2818 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2819   const unsigned Opcode = Inst.getOpcode();
2820   const MCInstrDesc &Desc = MII.get(Opcode);
2821   unsigned ConstantBusUseCount = 0;
2822   unsigned NumLiterals = 0;
2823   unsigned LiteralSize;
2824 
2825   if (Desc.TSFlags &
2826       (SIInstrFlags::VOPC |
2827        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2828        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2829        SIInstrFlags::SDWA)) {
2830     // Check special imm operands (used by madmk, etc)
2831     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2832       ++ConstantBusUseCount;
2833     }
2834 
2835     SmallDenseSet<unsigned> SGPRsUsed;
2836     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2837     if (SGPRUsed != AMDGPU::NoRegister) {
2838       SGPRsUsed.insert(SGPRUsed);
2839       ++ConstantBusUseCount;
2840     }
2841 
2842     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2843     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2844     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2845 
2846     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2847 
2848     for (int OpIdx : OpIndices) {
2849       if (OpIdx == -1) break;
2850 
2851       const MCOperand &MO = Inst.getOperand(OpIdx);
2852       if (usesConstantBus(Inst, OpIdx)) {
2853         if (MO.isReg()) {
2854           const unsigned Reg = mc2PseudoReg(MO.getReg());
2855           // Pairs of registers with a partial intersections like these
2856           //   s0, s[0:1]
2857           //   flat_scratch_lo, flat_scratch
2858           //   flat_scratch_lo, flat_scratch_hi
2859           // are theoretically valid but they are disabled anyway.
2860           // Note that this code mimics SIInstrInfo::verifyInstruction
2861           if (!SGPRsUsed.count(Reg)) {
2862             SGPRsUsed.insert(Reg);
2863             ++ConstantBusUseCount;
2864           }
2865         } else { // Expression or a literal
2866 
2867           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2868             continue; // special operand like VINTERP attr_chan
2869 
2870           // An instruction may use only one literal.
2871           // This has been validated on the previous step.
2872           // See validateVOP3Literal.
2873           // This literal may be used as more than one operand.
2874           // If all these operands are of the same size,
2875           // this literal counts as one scalar value.
2876           // Otherwise it counts as 2 scalar values.
2877           // See "GFX10 Shader Programming", section 3.6.2.3.
2878 
2879           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2880           if (Size < 4) Size = 4;
2881 
2882           if (NumLiterals == 0) {
2883             NumLiterals = 1;
2884             LiteralSize = Size;
2885           } else if (LiteralSize != Size) {
2886             NumLiterals = 2;
2887           }
2888         }
2889       }
2890     }
2891   }
2892   ConstantBusUseCount += NumLiterals;
2893 
2894   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2895 }
2896 
2897 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2898   const unsigned Opcode = Inst.getOpcode();
2899   const MCInstrDesc &Desc = MII.get(Opcode);
2900 
2901   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2902   if (DstIdx == -1 ||
2903       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2904     return true;
2905   }
2906 
2907   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2908 
2909   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2910   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2911   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2912 
2913   assert(DstIdx != -1);
2914   const MCOperand &Dst = Inst.getOperand(DstIdx);
2915   assert(Dst.isReg());
2916   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2917 
2918   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2919 
2920   for (int SrcIdx : SrcIndices) {
2921     if (SrcIdx == -1) break;
2922     const MCOperand &Src = Inst.getOperand(SrcIdx);
2923     if (Src.isReg()) {
2924       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2925       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2926         return false;
2927       }
2928     }
2929   }
2930 
2931   return true;
2932 }
2933 
2934 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2935 
2936   const unsigned Opc = Inst.getOpcode();
2937   const MCInstrDesc &Desc = MII.get(Opc);
2938 
2939   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2940     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2941     assert(ClampIdx != -1);
2942     return Inst.getOperand(ClampIdx).getImm() == 0;
2943   }
2944 
2945   return true;
2946 }
2947 
2948 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2949 
2950   const unsigned Opc = Inst.getOpcode();
2951   const MCInstrDesc &Desc = MII.get(Opc);
2952 
2953   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2954     return true;
2955 
2956   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2957   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2958   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2959 
2960   assert(VDataIdx != -1);
2961   assert(DMaskIdx != -1);
2962   assert(TFEIdx != -1);
2963 
2964   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2965   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2966   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2967   if (DMask == 0)
2968     DMask = 1;
2969 
2970   unsigned DataSize =
2971     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2972   if (hasPackedD16()) {
2973     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2974     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2975       DataSize = (DataSize + 1) / 2;
2976   }
2977 
2978   return (VDataSize / 4) == DataSize + TFESize;
2979 }
2980 
2981 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2982   const unsigned Opc = Inst.getOpcode();
2983   const MCInstrDesc &Desc = MII.get(Opc);
2984 
2985   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2986     return true;
2987 
2988   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2989   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2990       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2991   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2992   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2993   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2994 
2995   assert(VAddr0Idx != -1);
2996   assert(SrsrcIdx != -1);
2997   assert(DimIdx != -1);
2998   assert(SrsrcIdx > VAddr0Idx);
2999 
3000   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3001   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3002   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3003   unsigned VAddrSize =
3004       IsNSA ? SrsrcIdx - VAddr0Idx
3005             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3006 
3007   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3008                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3009                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3010                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3011   if (!IsNSA) {
3012     if (AddrSize > 8)
3013       AddrSize = 16;
3014     else if (AddrSize > 4)
3015       AddrSize = 8;
3016   }
3017 
3018   return VAddrSize == AddrSize;
3019 }
3020 
3021 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3022 
3023   const unsigned Opc = Inst.getOpcode();
3024   const MCInstrDesc &Desc = MII.get(Opc);
3025 
3026   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3027     return true;
3028   if (!Desc.mayLoad() || !Desc.mayStore())
3029     return true; // Not atomic
3030 
3031   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3032   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3033 
3034   // This is an incomplete check because image_atomic_cmpswap
3035   // may only use 0x3 and 0xf while other atomic operations
3036   // may use 0x1 and 0x3. However these limitations are
3037   // verified when we check that dmask matches dst size.
3038   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3039 }
3040 
3041 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3042 
3043   const unsigned Opc = Inst.getOpcode();
3044   const MCInstrDesc &Desc = MII.get(Opc);
3045 
3046   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3047     return true;
3048 
3049   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3050   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3051 
3052   // GATHER4 instructions use dmask in a different fashion compared to
3053   // other MIMG instructions. The only useful DMASK values are
3054   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3055   // (red,red,red,red) etc.) The ISA document doesn't mention
3056   // this.
3057   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3058 }
3059 
3060 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3061 {
3062   switch (Opcode) {
3063   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3064   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3065   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3066     return true;
3067   default:
3068     return false;
3069   }
3070 }
3071 
3072 // movrels* opcodes should only allow VGPRS as src0.
3073 // This is specified in .td description for vop1/vop3,
3074 // but sdwa is handled differently. See isSDWAOperand.
3075 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3076 
3077   const unsigned Opc = Inst.getOpcode();
3078   const MCInstrDesc &Desc = MII.get(Opc);
3079 
3080   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3081     return true;
3082 
3083   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3084   assert(Src0Idx != -1);
3085 
3086   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3087   if (!Src0.isReg())
3088     return false;
3089 
3090   auto Reg = Src0.getReg();
3091   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3092   return !isSGPR(mc2PseudoReg(Reg), TRI);
3093 }
3094 
3095 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3096 
3097   const unsigned Opc = Inst.getOpcode();
3098   const MCInstrDesc &Desc = MII.get(Opc);
3099 
3100   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3101     return true;
3102 
3103   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3104   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3105     if (isCI() || isSI())
3106       return false;
3107   }
3108 
3109   return true;
3110 }
3111 
3112 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3113   const unsigned Opc = Inst.getOpcode();
3114   const MCInstrDesc &Desc = MII.get(Opc);
3115 
3116   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3117     return true;
3118 
3119   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3120   if (DimIdx < 0)
3121     return true;
3122 
3123   long Imm = Inst.getOperand(DimIdx).getImm();
3124   if (Imm < 0 || Imm >= 8)
3125     return false;
3126 
3127   return true;
3128 }
3129 
3130 static bool IsRevOpcode(const unsigned Opcode)
3131 {
3132   switch (Opcode) {
3133   case AMDGPU::V_SUBREV_F32_e32:
3134   case AMDGPU::V_SUBREV_F32_e64:
3135   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3136   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3137   case AMDGPU::V_SUBREV_F32_e32_vi:
3138   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3139   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3140   case AMDGPU::V_SUBREV_F32_e64_vi:
3141 
3142   case AMDGPU::V_SUBREV_I32_e32:
3143   case AMDGPU::V_SUBREV_I32_e64:
3144   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3145   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3146 
3147   case AMDGPU::V_SUBBREV_U32_e32:
3148   case AMDGPU::V_SUBBREV_U32_e64:
3149   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3150   case AMDGPU::V_SUBBREV_U32_e32_vi:
3151   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3152   case AMDGPU::V_SUBBREV_U32_e64_vi:
3153 
3154   case AMDGPU::V_SUBREV_U32_e32:
3155   case AMDGPU::V_SUBREV_U32_e64:
3156   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3157   case AMDGPU::V_SUBREV_U32_e32_vi:
3158   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3159   case AMDGPU::V_SUBREV_U32_e64_vi:
3160 
3161   case AMDGPU::V_SUBREV_F16_e32:
3162   case AMDGPU::V_SUBREV_F16_e64:
3163   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3164   case AMDGPU::V_SUBREV_F16_e32_vi:
3165   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3166   case AMDGPU::V_SUBREV_F16_e64_vi:
3167 
3168   case AMDGPU::V_SUBREV_U16_e32:
3169   case AMDGPU::V_SUBREV_U16_e64:
3170   case AMDGPU::V_SUBREV_U16_e32_vi:
3171   case AMDGPU::V_SUBREV_U16_e64_vi:
3172 
3173   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3174   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3175   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3176 
3177   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3178   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3179 
3180   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3181   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3182 
3183   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3184   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3185 
3186   case AMDGPU::V_LSHRREV_B32_e32:
3187   case AMDGPU::V_LSHRREV_B32_e64:
3188   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3189   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3190   case AMDGPU::V_LSHRREV_B32_e32_vi:
3191   case AMDGPU::V_LSHRREV_B32_e64_vi:
3192   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3193   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3194 
3195   case AMDGPU::V_ASHRREV_I32_e32:
3196   case AMDGPU::V_ASHRREV_I32_e64:
3197   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3198   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3199   case AMDGPU::V_ASHRREV_I32_e32_vi:
3200   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3201   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3202   case AMDGPU::V_ASHRREV_I32_e64_vi:
3203 
3204   case AMDGPU::V_LSHLREV_B32_e32:
3205   case AMDGPU::V_LSHLREV_B32_e64:
3206   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3207   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3208   case AMDGPU::V_LSHLREV_B32_e32_vi:
3209   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3210   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3211   case AMDGPU::V_LSHLREV_B32_e64_vi:
3212 
3213   case AMDGPU::V_LSHLREV_B16_e32:
3214   case AMDGPU::V_LSHLREV_B16_e64:
3215   case AMDGPU::V_LSHLREV_B16_e32_vi:
3216   case AMDGPU::V_LSHLREV_B16_e64_vi:
3217   case AMDGPU::V_LSHLREV_B16_gfx10:
3218 
3219   case AMDGPU::V_LSHRREV_B16_e32:
3220   case AMDGPU::V_LSHRREV_B16_e64:
3221   case AMDGPU::V_LSHRREV_B16_e32_vi:
3222   case AMDGPU::V_LSHRREV_B16_e64_vi:
3223   case AMDGPU::V_LSHRREV_B16_gfx10:
3224 
3225   case AMDGPU::V_ASHRREV_I16_e32:
3226   case AMDGPU::V_ASHRREV_I16_e64:
3227   case AMDGPU::V_ASHRREV_I16_e32_vi:
3228   case AMDGPU::V_ASHRREV_I16_e64_vi:
3229   case AMDGPU::V_ASHRREV_I16_gfx10:
3230 
3231   case AMDGPU::V_LSHLREV_B64:
3232   case AMDGPU::V_LSHLREV_B64_gfx10:
3233   case AMDGPU::V_LSHLREV_B64_vi:
3234 
3235   case AMDGPU::V_LSHRREV_B64:
3236   case AMDGPU::V_LSHRREV_B64_gfx10:
3237   case AMDGPU::V_LSHRREV_B64_vi:
3238 
3239   case AMDGPU::V_ASHRREV_I64:
3240   case AMDGPU::V_ASHRREV_I64_gfx10:
3241   case AMDGPU::V_ASHRREV_I64_vi:
3242 
3243   case AMDGPU::V_PK_LSHLREV_B16:
3244   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3245   case AMDGPU::V_PK_LSHLREV_B16_vi:
3246 
3247   case AMDGPU::V_PK_LSHRREV_B16:
3248   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3249   case AMDGPU::V_PK_LSHRREV_B16_vi:
3250   case AMDGPU::V_PK_ASHRREV_I16:
3251   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3252   case AMDGPU::V_PK_ASHRREV_I16_vi:
3253     return true;
3254   default:
3255     return false;
3256   }
3257 }
3258 
3259 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3260 
3261   using namespace SIInstrFlags;
3262   const unsigned Opcode = Inst.getOpcode();
3263   const MCInstrDesc &Desc = MII.get(Opcode);
3264 
3265   // lds_direct register is defined so that it can be used
3266   // with 9-bit operands only. Ignore encodings which do not accept these.
3267   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3268     return true;
3269 
3270   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3271   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3272   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3273 
3274   const int SrcIndices[] = { Src1Idx, Src2Idx };
3275 
3276   // lds_direct cannot be specified as either src1 or src2.
3277   for (int SrcIdx : SrcIndices) {
3278     if (SrcIdx == -1) break;
3279     const MCOperand &Src = Inst.getOperand(SrcIdx);
3280     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3281       return false;
3282     }
3283   }
3284 
3285   if (Src0Idx == -1)
3286     return true;
3287 
3288   const MCOperand &Src = Inst.getOperand(Src0Idx);
3289   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3290     return true;
3291 
3292   // lds_direct is specified as src0. Check additional limitations.
3293   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3294 }
3295 
3296 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3297   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3298     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3299     if (Op.isFlatOffset())
3300       return Op.getStartLoc();
3301   }
3302   return getLoc();
3303 }
3304 
3305 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3306                                          const OperandVector &Operands) {
3307   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3308   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3309     return true;
3310 
3311   auto Opcode = Inst.getOpcode();
3312   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3313   assert(OpNum != -1);
3314 
3315   const auto &Op = Inst.getOperand(OpNum);
3316   if (!hasFlatOffsets() && Op.getImm() != 0) {
3317     Error(getFlatOffsetLoc(Operands),
3318           "flat offset modifier is not supported on this GPU");
3319     return false;
3320   }
3321 
3322   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3323   // For FLAT segment the offset must be positive;
3324   // MSB is ignored and forced to zero.
3325   unsigned OffsetSize = isGFX9() ? 13 : 12;
3326   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3327     if (!isIntN(OffsetSize, Op.getImm())) {
3328       Error(getFlatOffsetLoc(Operands),
3329             isGFX9() ? "expected a 13-bit signed offset" :
3330                        "expected a 12-bit signed offset");
3331       return false;
3332     }
3333   } else {
3334     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3335       Error(getFlatOffsetLoc(Operands),
3336             isGFX9() ? "expected a 12-bit unsigned offset" :
3337                        "expected an 11-bit unsigned offset");
3338       return false;
3339     }
3340   }
3341 
3342   return true;
3343 }
3344 
3345 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3346   unsigned Opcode = Inst.getOpcode();
3347   const MCInstrDesc &Desc = MII.get(Opcode);
3348   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3349     return true;
3350 
3351   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3352   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3353 
3354   const int OpIndices[] = { Src0Idx, Src1Idx };
3355 
3356   unsigned NumExprs = 0;
3357   unsigned NumLiterals = 0;
3358   uint32_t LiteralValue;
3359 
3360   for (int OpIdx : OpIndices) {
3361     if (OpIdx == -1) break;
3362 
3363     const MCOperand &MO = Inst.getOperand(OpIdx);
3364     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3365     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3366       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3367         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3368         if (NumLiterals == 0 || LiteralValue != Value) {
3369           LiteralValue = Value;
3370           ++NumLiterals;
3371         }
3372       } else if (MO.isExpr()) {
3373         ++NumExprs;
3374       }
3375     }
3376   }
3377 
3378   return NumLiterals + NumExprs <= 1;
3379 }
3380 
3381 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3382   const unsigned Opc = Inst.getOpcode();
3383   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3384       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3385     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3386     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3387 
3388     if (OpSel & ~3)
3389       return false;
3390   }
3391   return true;
3392 }
3393 
3394 // Check if VCC register matches wavefront size
3395 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3396   auto FB = getFeatureBits();
3397   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3398     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3399 }
3400 
3401 // VOP3 literal is only allowed in GFX10+ and only one can be used
3402 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3403   unsigned Opcode = Inst.getOpcode();
3404   const MCInstrDesc &Desc = MII.get(Opcode);
3405   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3406     return true;
3407 
3408   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3409   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3410   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3411 
3412   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3413 
3414   unsigned NumExprs = 0;
3415   unsigned NumLiterals = 0;
3416   uint32_t LiteralValue;
3417 
3418   for (int OpIdx : OpIndices) {
3419     if (OpIdx == -1) break;
3420 
3421     const MCOperand &MO = Inst.getOperand(OpIdx);
3422     if (!MO.isImm() && !MO.isExpr())
3423       continue;
3424     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3425       continue;
3426 
3427     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3428         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3429       return false;
3430 
3431     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3432       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3433       if (NumLiterals == 0 || LiteralValue != Value) {
3434         LiteralValue = Value;
3435         ++NumLiterals;
3436       }
3437     } else if (MO.isExpr()) {
3438       ++NumExprs;
3439     }
3440   }
3441   NumLiterals += NumExprs;
3442 
3443   return !NumLiterals ||
3444          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3445 }
3446 
3447 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3448                                           const SMLoc &IDLoc,
3449                                           const OperandVector &Operands) {
3450   if (!validateLdsDirect(Inst)) {
3451     Error(IDLoc,
3452       "invalid use of lds_direct");
3453     return false;
3454   }
3455   if (!validateSOPLiteral(Inst)) {
3456     Error(IDLoc,
3457       "only one literal operand is allowed");
3458     return false;
3459   }
3460   if (!validateVOP3Literal(Inst)) {
3461     Error(IDLoc,
3462       "invalid literal operand");
3463     return false;
3464   }
3465   if (!validateConstantBusLimitations(Inst)) {
3466     Error(IDLoc,
3467       "invalid operand (violates constant bus restrictions)");
3468     return false;
3469   }
3470   if (!validateEarlyClobberLimitations(Inst)) {
3471     Error(IDLoc,
3472       "destination must be different than all sources");
3473     return false;
3474   }
3475   if (!validateIntClampSupported(Inst)) {
3476     Error(IDLoc,
3477       "integer clamping is not supported on this GPU");
3478     return false;
3479   }
3480   if (!validateOpSel(Inst)) {
3481     Error(IDLoc,
3482       "invalid op_sel operand");
3483     return false;
3484   }
3485   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3486   if (!validateMIMGD16(Inst)) {
3487     Error(IDLoc,
3488       "d16 modifier is not supported on this GPU");
3489     return false;
3490   }
3491   if (!validateMIMGDim(Inst)) {
3492     Error(IDLoc, "dim modifier is required on this GPU");
3493     return false;
3494   }
3495   if (!validateMIMGDataSize(Inst)) {
3496     Error(IDLoc,
3497       "image data size does not match dmask and tfe");
3498     return false;
3499   }
3500   if (!validateMIMGAddrSize(Inst)) {
3501     Error(IDLoc,
3502       "image address size does not match dim and a16");
3503     return false;
3504   }
3505   if (!validateMIMGAtomicDMask(Inst)) {
3506     Error(IDLoc,
3507       "invalid atomic image dmask");
3508     return false;
3509   }
3510   if (!validateMIMGGatherDMask(Inst)) {
3511     Error(IDLoc,
3512       "invalid image_gather dmask: only one bit must be set");
3513     return false;
3514   }
3515   if (!validateMovrels(Inst)) {
3516     Error(IDLoc, "source operand must be a VGPR");
3517     return false;
3518   }
3519   if (!validateFlatOffset(Inst, Operands)) {
3520     return false;
3521   }
3522 
3523   return true;
3524 }
3525 
3526 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3527                                             const FeatureBitset &FBS,
3528                                             unsigned VariantID = 0);
3529 
3530 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3531                                               OperandVector &Operands,
3532                                               MCStreamer &Out,
3533                                               uint64_t &ErrorInfo,
3534                                               bool MatchingInlineAsm) {
3535   MCInst Inst;
3536   unsigned Result = Match_Success;
3537   for (auto Variant : getMatchedVariants()) {
3538     uint64_t EI;
3539     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3540                                   Variant);
3541     // We order match statuses from least to most specific. We use most specific
3542     // status as resulting
3543     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3544     if ((R == Match_Success) ||
3545         (R == Match_PreferE32) ||
3546         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3547         (R == Match_InvalidOperand && Result != Match_MissingFeature
3548                                    && Result != Match_PreferE32) ||
3549         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3550                                    && Result != Match_MissingFeature
3551                                    && Result != Match_PreferE32)) {
3552       Result = R;
3553       ErrorInfo = EI;
3554     }
3555     if (R == Match_Success)
3556       break;
3557   }
3558 
3559   switch (Result) {
3560   default: break;
3561   case Match_Success:
3562     if (!validateInstruction(Inst, IDLoc, Operands)) {
3563       return true;
3564     }
3565     Inst.setLoc(IDLoc);
3566     Out.EmitInstruction(Inst, getSTI());
3567     return false;
3568 
3569   case Match_MissingFeature:
3570     return Error(IDLoc, "instruction not supported on this GPU");
3571 
3572   case Match_MnemonicFail: {
3573     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3574     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3575         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3576     return Error(IDLoc, "invalid instruction" + Suggestion,
3577                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3578   }
3579 
3580   case Match_InvalidOperand: {
3581     SMLoc ErrorLoc = IDLoc;
3582     if (ErrorInfo != ~0ULL) {
3583       if (ErrorInfo >= Operands.size()) {
3584         return Error(IDLoc, "too few operands for instruction");
3585       }
3586       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3587       if (ErrorLoc == SMLoc())
3588         ErrorLoc = IDLoc;
3589     }
3590     return Error(ErrorLoc, "invalid operand for instruction");
3591   }
3592 
3593   case Match_PreferE32:
3594     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3595                         "should be encoded as e32");
3596   }
3597   llvm_unreachable("Implement any new match types added!");
3598 }
3599 
3600 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3601   int64_t Tmp = -1;
3602   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3603     return true;
3604   }
3605   if (getParser().parseAbsoluteExpression(Tmp)) {
3606     return true;
3607   }
3608   Ret = static_cast<uint32_t>(Tmp);
3609   return false;
3610 }
3611 
3612 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3613                                                uint32_t &Minor) {
3614   if (ParseAsAbsoluteExpression(Major))
3615     return TokError("invalid major version");
3616 
3617   if (getLexer().isNot(AsmToken::Comma))
3618     return TokError("minor version number required, comma expected");
3619   Lex();
3620 
3621   if (ParseAsAbsoluteExpression(Minor))
3622     return TokError("invalid minor version");
3623 
3624   return false;
3625 }
3626 
3627 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3628   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3629     return TokError("directive only supported for amdgcn architecture");
3630 
3631   std::string Target;
3632 
3633   SMLoc TargetStart = getTok().getLoc();
3634   if (getParser().parseEscapedString(Target))
3635     return true;
3636   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3637 
3638   std::string ExpectedTarget;
3639   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3640   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3641 
3642   if (Target != ExpectedTargetOS.str())
3643     return getParser().Error(TargetRange.Start, "target must match options",
3644                              TargetRange);
3645 
3646   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3647   return false;
3648 }
3649 
3650 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3651   return getParser().Error(Range.Start, "value out of range", Range);
3652 }
3653 
3654 bool AMDGPUAsmParser::calculateGPRBlocks(
3655     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3656     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3657     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3658     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3659   // TODO(scott.linder): These calculations are duplicated from
3660   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3661   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3662 
3663   unsigned NumVGPRs = NextFreeVGPR;
3664   unsigned NumSGPRs = NextFreeSGPR;
3665 
3666   if (Version.Major >= 10)
3667     NumSGPRs = 0;
3668   else {
3669     unsigned MaxAddressableNumSGPRs =
3670         IsaInfo::getAddressableNumSGPRs(&getSTI());
3671 
3672     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3673         NumSGPRs > MaxAddressableNumSGPRs)
3674       return OutOfRangeError(SGPRRange);
3675 
3676     NumSGPRs +=
3677         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3678 
3679     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3680         NumSGPRs > MaxAddressableNumSGPRs)
3681       return OutOfRangeError(SGPRRange);
3682 
3683     if (Features.test(FeatureSGPRInitBug))
3684       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3685   }
3686 
3687   VGPRBlocks =
3688       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3689   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3690 
3691   return false;
3692 }
3693 
3694 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3695   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3696     return TokError("directive only supported for amdgcn architecture");
3697 
3698   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3699     return TokError("directive only supported for amdhsa OS");
3700 
3701   StringRef KernelName;
3702   if (getParser().parseIdentifier(KernelName))
3703     return true;
3704 
3705   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3706 
3707   StringSet<> Seen;
3708 
3709   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3710 
3711   SMRange VGPRRange;
3712   uint64_t NextFreeVGPR = 0;
3713   SMRange SGPRRange;
3714   uint64_t NextFreeSGPR = 0;
3715   unsigned UserSGPRCount = 0;
3716   bool ReserveVCC = true;
3717   bool ReserveFlatScr = true;
3718   bool ReserveXNACK = hasXNACK();
3719   Optional<bool> EnableWavefrontSize32;
3720 
3721   while (true) {
3722     while (getLexer().is(AsmToken::EndOfStatement))
3723       Lex();
3724 
3725     if (getLexer().isNot(AsmToken::Identifier))
3726       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3727 
3728     StringRef ID = getTok().getIdentifier();
3729     SMRange IDRange = getTok().getLocRange();
3730     Lex();
3731 
3732     if (ID == ".end_amdhsa_kernel")
3733       break;
3734 
3735     if (Seen.find(ID) != Seen.end())
3736       return TokError(".amdhsa_ directives cannot be repeated");
3737     Seen.insert(ID);
3738 
3739     SMLoc ValStart = getTok().getLoc();
3740     int64_t IVal;
3741     if (getParser().parseAbsoluteExpression(IVal))
3742       return true;
3743     SMLoc ValEnd = getTok().getLoc();
3744     SMRange ValRange = SMRange(ValStart, ValEnd);
3745 
3746     if (IVal < 0)
3747       return OutOfRangeError(ValRange);
3748 
3749     uint64_t Val = IVal;
3750 
3751 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3752   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3753     return OutOfRangeError(RANGE);                                             \
3754   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3755 
3756     if (ID == ".amdhsa_group_segment_fixed_size") {
3757       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3758         return OutOfRangeError(ValRange);
3759       KD.group_segment_fixed_size = Val;
3760     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3761       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3762         return OutOfRangeError(ValRange);
3763       KD.private_segment_fixed_size = Val;
3764     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3765       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3766                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3767                        Val, ValRange);
3768       if (Val)
3769         UserSGPRCount += 4;
3770     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3771       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3772                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3773                        ValRange);
3774       if (Val)
3775         UserSGPRCount += 2;
3776     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3777       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3778                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3779                        ValRange);
3780       if (Val)
3781         UserSGPRCount += 2;
3782     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3783       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3784                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3785                        Val, ValRange);
3786       if (Val)
3787         UserSGPRCount += 2;
3788     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3789       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3790                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3791                        ValRange);
3792       if (Val)
3793         UserSGPRCount += 2;
3794     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3795       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3796                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3797                        ValRange);
3798       if (Val)
3799         UserSGPRCount += 2;
3800     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3801       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3802                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3803                        Val, ValRange);
3804       if (Val)
3805         UserSGPRCount += 1;
3806     } else if (ID == ".amdhsa_wavefront_size32") {
3807       if (IVersion.Major < 10)
3808         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3809                                  IDRange);
3810       EnableWavefrontSize32 = Val;
3811       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3812                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3813                        Val, ValRange);
3814     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3815       PARSE_BITS_ENTRY(
3816           KD.compute_pgm_rsrc2,
3817           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3818           ValRange);
3819     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3820       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3821                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3822                        ValRange);
3823     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3824       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3825                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3826                        ValRange);
3827     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3828       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3829                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3830                        ValRange);
3831     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3832       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3833                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3834                        ValRange);
3835     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3836       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3837                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3838                        ValRange);
3839     } else if (ID == ".amdhsa_next_free_vgpr") {
3840       VGPRRange = ValRange;
3841       NextFreeVGPR = Val;
3842     } else if (ID == ".amdhsa_next_free_sgpr") {
3843       SGPRRange = ValRange;
3844       NextFreeSGPR = Val;
3845     } else if (ID == ".amdhsa_reserve_vcc") {
3846       if (!isUInt<1>(Val))
3847         return OutOfRangeError(ValRange);
3848       ReserveVCC = Val;
3849     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3850       if (IVersion.Major < 7)
3851         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3852                                  IDRange);
3853       if (!isUInt<1>(Val))
3854         return OutOfRangeError(ValRange);
3855       ReserveFlatScr = Val;
3856     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3857       if (IVersion.Major < 8)
3858         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3859                                  IDRange);
3860       if (!isUInt<1>(Val))
3861         return OutOfRangeError(ValRange);
3862       ReserveXNACK = Val;
3863     } else if (ID == ".amdhsa_float_round_mode_32") {
3864       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3865                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3866     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3867       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3868                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3869     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3870       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3871                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3872     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3873       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3874                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3875                        ValRange);
3876     } else if (ID == ".amdhsa_dx10_clamp") {
3877       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3878                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3879     } else if (ID == ".amdhsa_ieee_mode") {
3880       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3881                        Val, ValRange);
3882     } else if (ID == ".amdhsa_fp16_overflow") {
3883       if (IVersion.Major < 9)
3884         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3885                                  IDRange);
3886       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3887                        ValRange);
3888     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3889       if (IVersion.Major < 10)
3890         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3891                                  IDRange);
3892       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3893                        ValRange);
3894     } else if (ID == ".amdhsa_memory_ordered") {
3895       if (IVersion.Major < 10)
3896         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3897                                  IDRange);
3898       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3899                        ValRange);
3900     } else if (ID == ".amdhsa_forward_progress") {
3901       if (IVersion.Major < 10)
3902         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3903                                  IDRange);
3904       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3905                        ValRange);
3906     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3907       PARSE_BITS_ENTRY(
3908           KD.compute_pgm_rsrc2,
3909           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3910           ValRange);
3911     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3912       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3913                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3914                        Val, ValRange);
3915     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3916       PARSE_BITS_ENTRY(
3917           KD.compute_pgm_rsrc2,
3918           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3919           ValRange);
3920     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3921       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3922                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3923                        Val, ValRange);
3924     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3925       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3926                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3927                        Val, ValRange);
3928     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3929       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3930                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3931                        Val, ValRange);
3932     } else if (ID == ".amdhsa_exception_int_div_zero") {
3933       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3934                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3935                        Val, ValRange);
3936     } else {
3937       return getParser().Error(IDRange.Start,
3938                                "unknown .amdhsa_kernel directive", IDRange);
3939     }
3940 
3941 #undef PARSE_BITS_ENTRY
3942   }
3943 
3944   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3945     return TokError(".amdhsa_next_free_vgpr directive is required");
3946 
3947   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3948     return TokError(".amdhsa_next_free_sgpr directive is required");
3949 
3950   unsigned VGPRBlocks;
3951   unsigned SGPRBlocks;
3952   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3953                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3954                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3955                          SGPRBlocks))
3956     return true;
3957 
3958   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3959           VGPRBlocks))
3960     return OutOfRangeError(VGPRRange);
3961   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3962                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3963 
3964   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3965           SGPRBlocks))
3966     return OutOfRangeError(SGPRRange);
3967   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3968                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3969                   SGPRBlocks);
3970 
3971   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3972     return TokError("too many user SGPRs enabled");
3973   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3974                   UserSGPRCount);
3975 
3976   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3977       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3978       ReserveFlatScr, ReserveXNACK);
3979   return false;
3980 }
3981 
3982 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3983   uint32_t Major;
3984   uint32_t Minor;
3985 
3986   if (ParseDirectiveMajorMinor(Major, Minor))
3987     return true;
3988 
3989   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3990   return false;
3991 }
3992 
3993 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3994   uint32_t Major;
3995   uint32_t Minor;
3996   uint32_t Stepping;
3997   StringRef VendorName;
3998   StringRef ArchName;
3999 
4000   // If this directive has no arguments, then use the ISA version for the
4001   // targeted GPU.
4002   if (getLexer().is(AsmToken::EndOfStatement)) {
4003     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4004     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4005                                                       ISA.Stepping,
4006                                                       "AMD", "AMDGPU");
4007     return false;
4008   }
4009 
4010   if (ParseDirectiveMajorMinor(Major, Minor))
4011     return true;
4012 
4013   if (getLexer().isNot(AsmToken::Comma))
4014     return TokError("stepping version number required, comma expected");
4015   Lex();
4016 
4017   if (ParseAsAbsoluteExpression(Stepping))
4018     return TokError("invalid stepping version");
4019 
4020   if (getLexer().isNot(AsmToken::Comma))
4021     return TokError("vendor name required, comma expected");
4022   Lex();
4023 
4024   if (getLexer().isNot(AsmToken::String))
4025     return TokError("invalid vendor name");
4026 
4027   VendorName = getLexer().getTok().getStringContents();
4028   Lex();
4029 
4030   if (getLexer().isNot(AsmToken::Comma))
4031     return TokError("arch name required, comma expected");
4032   Lex();
4033 
4034   if (getLexer().isNot(AsmToken::String))
4035     return TokError("invalid arch name");
4036 
4037   ArchName = getLexer().getTok().getStringContents();
4038   Lex();
4039 
4040   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4041                                                     VendorName, ArchName);
4042   return false;
4043 }
4044 
4045 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4046                                                amd_kernel_code_t &Header) {
4047   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4048   // assembly for backwards compatibility.
4049   if (ID == "max_scratch_backing_memory_byte_size") {
4050     Parser.eatToEndOfStatement();
4051     return false;
4052   }
4053 
4054   SmallString<40> ErrStr;
4055   raw_svector_ostream Err(ErrStr);
4056   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4057     return TokError(Err.str());
4058   }
4059   Lex();
4060 
4061   if (ID == "enable_wavefront_size32") {
4062     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4063       if (!isGFX10())
4064         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4065       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4066         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4067     } else {
4068       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4069         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4070     }
4071   }
4072 
4073   if (ID == "wavefront_size") {
4074     if (Header.wavefront_size == 5) {
4075       if (!isGFX10())
4076         return TokError("wavefront_size=5 is only allowed on GFX10+");
4077       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4078         return TokError("wavefront_size=5 requires +WavefrontSize32");
4079     } else if (Header.wavefront_size == 6) {
4080       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4081         return TokError("wavefront_size=6 requires +WavefrontSize64");
4082     }
4083   }
4084 
4085   if (ID == "enable_wgp_mode") {
4086     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4087       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4088   }
4089 
4090   if (ID == "enable_mem_ordered") {
4091     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4092       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4093   }
4094 
4095   if (ID == "enable_fwd_progress") {
4096     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4097       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4098   }
4099 
4100   return false;
4101 }
4102 
4103 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4104   amd_kernel_code_t Header;
4105   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4106 
4107   while (true) {
4108     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4109     // will set the current token to EndOfStatement.
4110     while(getLexer().is(AsmToken::EndOfStatement))
4111       Lex();
4112 
4113     if (getLexer().isNot(AsmToken::Identifier))
4114       return TokError("expected value identifier or .end_amd_kernel_code_t");
4115 
4116     StringRef ID = getLexer().getTok().getIdentifier();
4117     Lex();
4118 
4119     if (ID == ".end_amd_kernel_code_t")
4120       break;
4121 
4122     if (ParseAMDKernelCodeTValue(ID, Header))
4123       return true;
4124   }
4125 
4126   getTargetStreamer().EmitAMDKernelCodeT(Header);
4127 
4128   return false;
4129 }
4130 
4131 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4132   if (getLexer().isNot(AsmToken::Identifier))
4133     return TokError("expected symbol name");
4134 
4135   StringRef KernelName = Parser.getTok().getString();
4136 
4137   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4138                                            ELF::STT_AMDGPU_HSA_KERNEL);
4139   Lex();
4140   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4141     KernelScope.initialize(getContext());
4142   return false;
4143 }
4144 
4145 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4146   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4147     return Error(getParser().getTok().getLoc(),
4148                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4149                  "architectures");
4150   }
4151 
4152   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4153 
4154   std::string ISAVersionStringFromSTI;
4155   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4156   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4157 
4158   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4159     return Error(getParser().getTok().getLoc(),
4160                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4161                  "arguments specified through the command line");
4162   }
4163 
4164   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4165   Lex();
4166 
4167   return false;
4168 }
4169 
4170 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4171   const char *AssemblerDirectiveBegin;
4172   const char *AssemblerDirectiveEnd;
4173   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4174       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4175           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4176                             HSAMD::V3::AssemblerDirectiveEnd)
4177           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4178                             HSAMD::AssemblerDirectiveEnd);
4179 
4180   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4181     return Error(getParser().getTok().getLoc(),
4182                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4183                  "not available on non-amdhsa OSes")).str());
4184   }
4185 
4186   std::string HSAMetadataString;
4187   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4188                           HSAMetadataString))
4189     return true;
4190 
4191   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4192     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4193       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4194   } else {
4195     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4196       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4197   }
4198 
4199   return false;
4200 }
4201 
4202 /// Common code to parse out a block of text (typically YAML) between start and
4203 /// end directives.
4204 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4205                                           const char *AssemblerDirectiveEnd,
4206                                           std::string &CollectString) {
4207 
4208   raw_string_ostream CollectStream(CollectString);
4209 
4210   getLexer().setSkipSpace(false);
4211 
4212   bool FoundEnd = false;
4213   while (!getLexer().is(AsmToken::Eof)) {
4214     while (getLexer().is(AsmToken::Space)) {
4215       CollectStream << getLexer().getTok().getString();
4216       Lex();
4217     }
4218 
4219     if (getLexer().is(AsmToken::Identifier)) {
4220       StringRef ID = getLexer().getTok().getIdentifier();
4221       if (ID == AssemblerDirectiveEnd) {
4222         Lex();
4223         FoundEnd = true;
4224         break;
4225       }
4226     }
4227 
4228     CollectStream << Parser.parseStringToEndOfStatement()
4229                   << getContext().getAsmInfo()->getSeparatorString();
4230 
4231     Parser.eatToEndOfStatement();
4232   }
4233 
4234   getLexer().setSkipSpace(true);
4235 
4236   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4237     return TokError(Twine("expected directive ") +
4238                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4239   }
4240 
4241   CollectStream.flush();
4242   return false;
4243 }
4244 
4245 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4246 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4247   std::string String;
4248   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4249                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4250     return true;
4251 
4252   auto PALMetadata = getTargetStreamer().getPALMetadata();
4253   if (!PALMetadata->setFromString(String))
4254     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4255   return false;
4256 }
4257 
4258 /// Parse the assembler directive for old linear-format PAL metadata.
4259 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4260   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4261     return Error(getParser().getTok().getLoc(),
4262                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4263                  "not available on non-amdpal OSes")).str());
4264   }
4265 
4266   auto PALMetadata = getTargetStreamer().getPALMetadata();
4267   PALMetadata->setLegacy();
4268   for (;;) {
4269     uint32_t Key, Value;
4270     if (ParseAsAbsoluteExpression(Key)) {
4271       return TokError(Twine("invalid value in ") +
4272                       Twine(PALMD::AssemblerDirective));
4273     }
4274     if (getLexer().isNot(AsmToken::Comma)) {
4275       return TokError(Twine("expected an even number of values in ") +
4276                       Twine(PALMD::AssemblerDirective));
4277     }
4278     Lex();
4279     if (ParseAsAbsoluteExpression(Value)) {
4280       return TokError(Twine("invalid value in ") +
4281                       Twine(PALMD::AssemblerDirective));
4282     }
4283     PALMetadata->setRegister(Key, Value);
4284     if (getLexer().isNot(AsmToken::Comma))
4285       break;
4286     Lex();
4287   }
4288   return false;
4289 }
4290 
4291 /// ParseDirectiveAMDGPULDS
4292 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4293 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4294   if (getParser().checkForValidSection())
4295     return true;
4296 
4297   StringRef Name;
4298   SMLoc NameLoc = getLexer().getLoc();
4299   if (getParser().parseIdentifier(Name))
4300     return TokError("expected identifier in directive");
4301 
4302   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4303   if (parseToken(AsmToken::Comma, "expected ','"))
4304     return true;
4305 
4306   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4307 
4308   int64_t Size;
4309   SMLoc SizeLoc = getLexer().getLoc();
4310   if (getParser().parseAbsoluteExpression(Size))
4311     return true;
4312   if (Size < 0)
4313     return Error(SizeLoc, "size must be non-negative");
4314   if (Size > LocalMemorySize)
4315     return Error(SizeLoc, "size is too large");
4316 
4317   int64_t Align = 4;
4318   if (getLexer().is(AsmToken::Comma)) {
4319     Lex();
4320     SMLoc AlignLoc = getLexer().getLoc();
4321     if (getParser().parseAbsoluteExpression(Align))
4322       return true;
4323     if (Align < 0 || !isPowerOf2_64(Align))
4324       return Error(AlignLoc, "alignment must be a power of two");
4325 
4326     // Alignment larger than the size of LDS is possible in theory, as long
4327     // as the linker manages to place to symbol at address 0, but we do want
4328     // to make sure the alignment fits nicely into a 32-bit integer.
4329     if (Align >= 1u << 31)
4330       return Error(AlignLoc, "alignment is too large");
4331   }
4332 
4333   if (parseToken(AsmToken::EndOfStatement,
4334                  "unexpected token in '.amdgpu_lds' directive"))
4335     return true;
4336 
4337   Symbol->redefineIfPossible();
4338   if (!Symbol->isUndefined())
4339     return Error(NameLoc, "invalid symbol redefinition");
4340 
4341   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4342   return false;
4343 }
4344 
4345 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4346   StringRef IDVal = DirectiveID.getString();
4347 
4348   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4349     if (IDVal == ".amdgcn_target")
4350       return ParseDirectiveAMDGCNTarget();
4351 
4352     if (IDVal == ".amdhsa_kernel")
4353       return ParseDirectiveAMDHSAKernel();
4354 
4355     // TODO: Restructure/combine with PAL metadata directive.
4356     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4357       return ParseDirectiveHSAMetadata();
4358   } else {
4359     if (IDVal == ".hsa_code_object_version")
4360       return ParseDirectiveHSACodeObjectVersion();
4361 
4362     if (IDVal == ".hsa_code_object_isa")
4363       return ParseDirectiveHSACodeObjectISA();
4364 
4365     if (IDVal == ".amd_kernel_code_t")
4366       return ParseDirectiveAMDKernelCodeT();
4367 
4368     if (IDVal == ".amdgpu_hsa_kernel")
4369       return ParseDirectiveAMDGPUHsaKernel();
4370 
4371     if (IDVal == ".amd_amdgpu_isa")
4372       return ParseDirectiveISAVersion();
4373 
4374     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4375       return ParseDirectiveHSAMetadata();
4376   }
4377 
4378   if (IDVal == ".amdgpu_lds")
4379     return ParseDirectiveAMDGPULDS();
4380 
4381   if (IDVal == PALMD::AssemblerDirectiveBegin)
4382     return ParseDirectivePALMetadataBegin();
4383 
4384   if (IDVal == PALMD::AssemblerDirective)
4385     return ParseDirectivePALMetadata();
4386 
4387   return true;
4388 }
4389 
4390 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4391                                            unsigned RegNo) const {
4392 
4393   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4394        R.isValid(); ++R) {
4395     if (*R == RegNo)
4396       return isGFX9() || isGFX10();
4397   }
4398 
4399   // GFX10 has 2 more SGPRs 104 and 105.
4400   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4401        R.isValid(); ++R) {
4402     if (*R == RegNo)
4403       return hasSGPR104_SGPR105();
4404   }
4405 
4406   switch (RegNo) {
4407   case AMDGPU::SRC_SHARED_BASE:
4408   case AMDGPU::SRC_SHARED_LIMIT:
4409   case AMDGPU::SRC_PRIVATE_BASE:
4410   case AMDGPU::SRC_PRIVATE_LIMIT:
4411   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4412     return !isCI() && !isSI() && !isVI();
4413   case AMDGPU::TBA:
4414   case AMDGPU::TBA_LO:
4415   case AMDGPU::TBA_HI:
4416   case AMDGPU::TMA:
4417   case AMDGPU::TMA_LO:
4418   case AMDGPU::TMA_HI:
4419     return !isGFX9() && !isGFX10();
4420   case AMDGPU::XNACK_MASK:
4421   case AMDGPU::XNACK_MASK_LO:
4422   case AMDGPU::XNACK_MASK_HI:
4423     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4424   case AMDGPU::SGPR_NULL:
4425     return isGFX10();
4426   default:
4427     break;
4428   }
4429 
4430   if (isCI())
4431     return true;
4432 
4433   if (isSI() || isGFX10()) {
4434     // No flat_scr on SI.
4435     // On GFX10 flat scratch is not a valid register operand and can only be
4436     // accessed with s_setreg/s_getreg.
4437     switch (RegNo) {
4438     case AMDGPU::FLAT_SCR:
4439     case AMDGPU::FLAT_SCR_LO:
4440     case AMDGPU::FLAT_SCR_HI:
4441       return false;
4442     default:
4443       return true;
4444     }
4445   }
4446 
4447   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4448   // SI/CI have.
4449   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4450        R.isValid(); ++R) {
4451     if (*R == RegNo)
4452       return hasSGPR102_SGPR103();
4453   }
4454 
4455   return true;
4456 }
4457 
4458 OperandMatchResultTy
4459 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4460                               OperandMode Mode) {
4461   // Try to parse with a custom parser
4462   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4463 
4464   // If we successfully parsed the operand or if there as an error parsing,
4465   // we are done.
4466   //
4467   // If we are parsing after we reach EndOfStatement then this means we
4468   // are appending default values to the Operands list.  This is only done
4469   // by custom parser, so we shouldn't continue on to the generic parsing.
4470   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4471       getLexer().is(AsmToken::EndOfStatement))
4472     return ResTy;
4473 
4474   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4475     unsigned Prefix = Operands.size();
4476     SMLoc LBraceLoc = getTok().getLoc();
4477     Parser.Lex(); // eat the '['
4478 
4479     for (;;) {
4480       ResTy = parseReg(Operands);
4481       if (ResTy != MatchOperand_Success)
4482         return ResTy;
4483 
4484       if (getLexer().is(AsmToken::RBrac))
4485         break;
4486 
4487       if (getLexer().isNot(AsmToken::Comma))
4488         return MatchOperand_ParseFail;
4489       Parser.Lex();
4490     }
4491 
4492     if (Operands.size() - Prefix > 1) {
4493       Operands.insert(Operands.begin() + Prefix,
4494                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4495       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4496                                                     getTok().getLoc()));
4497     }
4498 
4499     Parser.Lex(); // eat the ']'
4500     return MatchOperand_Success;
4501   }
4502 
4503   return parseRegOrImm(Operands);
4504 }
4505 
4506 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4507   // Clear any forced encodings from the previous instruction.
4508   setForcedEncodingSize(0);
4509   setForcedDPP(false);
4510   setForcedSDWA(false);
4511 
4512   if (Name.endswith("_e64")) {
4513     setForcedEncodingSize(64);
4514     return Name.substr(0, Name.size() - 4);
4515   } else if (Name.endswith("_e32")) {
4516     setForcedEncodingSize(32);
4517     return Name.substr(0, Name.size() - 4);
4518   } else if (Name.endswith("_dpp")) {
4519     setForcedDPP(true);
4520     return Name.substr(0, Name.size() - 4);
4521   } else if (Name.endswith("_sdwa")) {
4522     setForcedSDWA(true);
4523     return Name.substr(0, Name.size() - 5);
4524   }
4525   return Name;
4526 }
4527 
4528 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4529                                        StringRef Name,
4530                                        SMLoc NameLoc, OperandVector &Operands) {
4531   // Add the instruction mnemonic
4532   Name = parseMnemonicSuffix(Name);
4533   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4534 
4535   bool IsMIMG = Name.startswith("image_");
4536 
4537   while (!getLexer().is(AsmToken::EndOfStatement)) {
4538     OperandMode Mode = OperandMode_Default;
4539     if (IsMIMG && isGFX10() && Operands.size() == 2)
4540       Mode = OperandMode_NSA;
4541     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4542 
4543     // Eat the comma or space if there is one.
4544     if (getLexer().is(AsmToken::Comma))
4545       Parser.Lex();
4546 
4547     switch (Res) {
4548       case MatchOperand_Success: break;
4549       case MatchOperand_ParseFail:
4550         // FIXME: use real operand location rather than the current location.
4551         Error(getLexer().getLoc(), "failed parsing operand.");
4552         while (!getLexer().is(AsmToken::EndOfStatement)) {
4553           Parser.Lex();
4554         }
4555         return true;
4556       case MatchOperand_NoMatch:
4557         // FIXME: use real operand location rather than the current location.
4558         Error(getLexer().getLoc(), "not a valid operand.");
4559         while (!getLexer().is(AsmToken::EndOfStatement)) {
4560           Parser.Lex();
4561         }
4562         return true;
4563     }
4564   }
4565 
4566   return false;
4567 }
4568 
4569 //===----------------------------------------------------------------------===//
4570 // Utility functions
4571 //===----------------------------------------------------------------------===//
4572 
4573 OperandMatchResultTy
4574 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4575 
4576   if (!trySkipId(Prefix, AsmToken::Colon))
4577     return MatchOperand_NoMatch;
4578 
4579   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4580 }
4581 
4582 OperandMatchResultTy
4583 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4584                                     AMDGPUOperand::ImmTy ImmTy,
4585                                     bool (*ConvertResult)(int64_t&)) {
4586   SMLoc S = getLoc();
4587   int64_t Value = 0;
4588 
4589   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4590   if (Res != MatchOperand_Success)
4591     return Res;
4592 
4593   if (ConvertResult && !ConvertResult(Value)) {
4594     Error(S, "invalid " + StringRef(Prefix) + " value.");
4595   }
4596 
4597   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4598   return MatchOperand_Success;
4599 }
4600 
4601 OperandMatchResultTy
4602 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4603                                              OperandVector &Operands,
4604                                              AMDGPUOperand::ImmTy ImmTy,
4605                                              bool (*ConvertResult)(int64_t&)) {
4606   SMLoc S = getLoc();
4607   if (!trySkipId(Prefix, AsmToken::Colon))
4608     return MatchOperand_NoMatch;
4609 
4610   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4611     return MatchOperand_ParseFail;
4612 
4613   unsigned Val = 0;
4614   const unsigned MaxSize = 4;
4615 
4616   // FIXME: How to verify the number of elements matches the number of src
4617   // operands?
4618   for (int I = 0; ; ++I) {
4619     int64_t Op;
4620     SMLoc Loc = getLoc();
4621     if (!parseExpr(Op))
4622       return MatchOperand_ParseFail;
4623 
4624     if (Op != 0 && Op != 1) {
4625       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4626       return MatchOperand_ParseFail;
4627     }
4628 
4629     Val |= (Op << I);
4630 
4631     if (trySkipToken(AsmToken::RBrac))
4632       break;
4633 
4634     if (I + 1 == MaxSize) {
4635       Error(getLoc(), "expected a closing square bracket");
4636       return MatchOperand_ParseFail;
4637     }
4638 
4639     if (!skipToken(AsmToken::Comma, "expected a comma"))
4640       return MatchOperand_ParseFail;
4641   }
4642 
4643   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4644   return MatchOperand_Success;
4645 }
4646 
4647 OperandMatchResultTy
4648 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4649                                AMDGPUOperand::ImmTy ImmTy) {
4650   int64_t Bit = 0;
4651   SMLoc S = Parser.getTok().getLoc();
4652 
4653   // We are at the end of the statement, and this is a default argument, so
4654   // use a default value.
4655   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4656     switch(getLexer().getKind()) {
4657       case AsmToken::Identifier: {
4658         StringRef Tok = Parser.getTok().getString();
4659         if (Tok == Name) {
4660           if (Tok == "r128" && !hasMIMG_R128())
4661             Error(S, "r128 modifier is not supported on this GPU");
4662           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
4663             Error(S, "a16 modifier is not supported on this GPU");
4664           Bit = 1;
4665           Parser.Lex();
4666         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4667           Bit = 0;
4668           Parser.Lex();
4669         } else {
4670           return MatchOperand_NoMatch;
4671         }
4672         break;
4673       }
4674       default:
4675         return MatchOperand_NoMatch;
4676     }
4677   }
4678 
4679   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4680     return MatchOperand_ParseFail;
4681 
4682   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
4683     ImmTy = AMDGPUOperand::ImmTyR128A16;
4684 
4685   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4686   return MatchOperand_Success;
4687 }
4688 
4689 static void addOptionalImmOperand(
4690   MCInst& Inst, const OperandVector& Operands,
4691   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4692   AMDGPUOperand::ImmTy ImmT,
4693   int64_t Default = 0) {
4694   auto i = OptionalIdx.find(ImmT);
4695   if (i != OptionalIdx.end()) {
4696     unsigned Idx = i->second;
4697     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4698   } else {
4699     Inst.addOperand(MCOperand::createImm(Default));
4700   }
4701 }
4702 
4703 OperandMatchResultTy
4704 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4705   if (getLexer().isNot(AsmToken::Identifier)) {
4706     return MatchOperand_NoMatch;
4707   }
4708   StringRef Tok = Parser.getTok().getString();
4709   if (Tok != Prefix) {
4710     return MatchOperand_NoMatch;
4711   }
4712 
4713   Parser.Lex();
4714   if (getLexer().isNot(AsmToken::Colon)) {
4715     return MatchOperand_ParseFail;
4716   }
4717 
4718   Parser.Lex();
4719   if (getLexer().isNot(AsmToken::Identifier)) {
4720     return MatchOperand_ParseFail;
4721   }
4722 
4723   Value = Parser.getTok().getString();
4724   return MatchOperand_Success;
4725 }
4726 
4727 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4728 // values to live in a joint format operand in the MCInst encoding.
4729 OperandMatchResultTy
4730 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4731   SMLoc S = Parser.getTok().getLoc();
4732   int64_t Dfmt = 0, Nfmt = 0;
4733   // dfmt and nfmt can appear in either order, and each is optional.
4734   bool GotDfmt = false, GotNfmt = false;
4735   while (!GotDfmt || !GotNfmt) {
4736     if (!GotDfmt) {
4737       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4738       if (Res != MatchOperand_NoMatch) {
4739         if (Res != MatchOperand_Success)
4740           return Res;
4741         if (Dfmt >= 16) {
4742           Error(Parser.getTok().getLoc(), "out of range dfmt");
4743           return MatchOperand_ParseFail;
4744         }
4745         GotDfmt = true;
4746         Parser.Lex();
4747         continue;
4748       }
4749     }
4750     if (!GotNfmt) {
4751       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4752       if (Res != MatchOperand_NoMatch) {
4753         if (Res != MatchOperand_Success)
4754           return Res;
4755         if (Nfmt >= 8) {
4756           Error(Parser.getTok().getLoc(), "out of range nfmt");
4757           return MatchOperand_ParseFail;
4758         }
4759         GotNfmt = true;
4760         Parser.Lex();
4761         continue;
4762       }
4763     }
4764     break;
4765   }
4766   if (!GotDfmt && !GotNfmt)
4767     return MatchOperand_NoMatch;
4768   auto Format = Dfmt | Nfmt << 4;
4769   Operands.push_back(
4770       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4771   return MatchOperand_Success;
4772 }
4773 
4774 //===----------------------------------------------------------------------===//
4775 // ds
4776 //===----------------------------------------------------------------------===//
4777 
4778 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4779                                     const OperandVector &Operands) {
4780   OptionalImmIndexMap OptionalIdx;
4781 
4782   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4783     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4784 
4785     // Add the register arguments
4786     if (Op.isReg()) {
4787       Op.addRegOperands(Inst, 1);
4788       continue;
4789     }
4790 
4791     // Handle optional arguments
4792     OptionalIdx[Op.getImmTy()] = i;
4793   }
4794 
4795   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4796   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4797   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4798 
4799   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4800 }
4801 
4802 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4803                                 bool IsGdsHardcoded) {
4804   OptionalImmIndexMap OptionalIdx;
4805 
4806   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4807     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4808 
4809     // Add the register arguments
4810     if (Op.isReg()) {
4811       Op.addRegOperands(Inst, 1);
4812       continue;
4813     }
4814 
4815     if (Op.isToken() && Op.getToken() == "gds") {
4816       IsGdsHardcoded = true;
4817       continue;
4818     }
4819 
4820     // Handle optional arguments
4821     OptionalIdx[Op.getImmTy()] = i;
4822   }
4823 
4824   AMDGPUOperand::ImmTy OffsetType =
4825     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4826      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4827      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4828                                                       AMDGPUOperand::ImmTyOffset;
4829 
4830   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4831 
4832   if (!IsGdsHardcoded) {
4833     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4834   }
4835   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4836 }
4837 
4838 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4839   OptionalImmIndexMap OptionalIdx;
4840 
4841   unsigned OperandIdx[4];
4842   unsigned EnMask = 0;
4843   int SrcIdx = 0;
4844 
4845   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4846     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4847 
4848     // Add the register arguments
4849     if (Op.isReg()) {
4850       assert(SrcIdx < 4);
4851       OperandIdx[SrcIdx] = Inst.size();
4852       Op.addRegOperands(Inst, 1);
4853       ++SrcIdx;
4854       continue;
4855     }
4856 
4857     if (Op.isOff()) {
4858       assert(SrcIdx < 4);
4859       OperandIdx[SrcIdx] = Inst.size();
4860       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4861       ++SrcIdx;
4862       continue;
4863     }
4864 
4865     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4866       Op.addImmOperands(Inst, 1);
4867       continue;
4868     }
4869 
4870     if (Op.isToken() && Op.getToken() == "done")
4871       continue;
4872 
4873     // Handle optional arguments
4874     OptionalIdx[Op.getImmTy()] = i;
4875   }
4876 
4877   assert(SrcIdx == 4);
4878 
4879   bool Compr = false;
4880   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4881     Compr = true;
4882     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4883     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4884     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4885   }
4886 
4887   for (auto i = 0; i < SrcIdx; ++i) {
4888     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4889       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4890     }
4891   }
4892 
4893   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4894   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4895 
4896   Inst.addOperand(MCOperand::createImm(EnMask));
4897 }
4898 
4899 //===----------------------------------------------------------------------===//
4900 // s_waitcnt
4901 //===----------------------------------------------------------------------===//
4902 
4903 static bool
4904 encodeCnt(
4905   const AMDGPU::IsaVersion ISA,
4906   int64_t &IntVal,
4907   int64_t CntVal,
4908   bool Saturate,
4909   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4910   unsigned (*decode)(const IsaVersion &Version, unsigned))
4911 {
4912   bool Failed = false;
4913 
4914   IntVal = encode(ISA, IntVal, CntVal);
4915   if (CntVal != decode(ISA, IntVal)) {
4916     if (Saturate) {
4917       IntVal = encode(ISA, IntVal, -1);
4918     } else {
4919       Failed = true;
4920     }
4921   }
4922   return Failed;
4923 }
4924 
4925 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4926 
4927   SMLoc CntLoc = getLoc();
4928   StringRef CntName = getTokenStr();
4929 
4930   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4931       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4932     return false;
4933 
4934   int64_t CntVal;
4935   SMLoc ValLoc = getLoc();
4936   if (!parseExpr(CntVal))
4937     return false;
4938 
4939   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4940 
4941   bool Failed = true;
4942   bool Sat = CntName.endswith("_sat");
4943 
4944   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4945     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4946   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4947     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4948   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4949     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4950   } else {
4951     Error(CntLoc, "invalid counter name " + CntName);
4952     return false;
4953   }
4954 
4955   if (Failed) {
4956     Error(ValLoc, "too large value for " + CntName);
4957     return false;
4958   }
4959 
4960   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4961     return false;
4962 
4963   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4964     if (isToken(AsmToken::EndOfStatement)) {
4965       Error(getLoc(), "expected a counter name");
4966       return false;
4967     }
4968   }
4969 
4970   return true;
4971 }
4972 
4973 OperandMatchResultTy
4974 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4975   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4976   int64_t Waitcnt = getWaitcntBitMask(ISA);
4977   SMLoc S = getLoc();
4978 
4979   // If parse failed, do not return error code
4980   // to avoid excessive error messages.
4981   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4982     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4983   } else {
4984     parseExpr(Waitcnt);
4985   }
4986 
4987   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4988   return MatchOperand_Success;
4989 }
4990 
4991 bool
4992 AMDGPUOperand::isSWaitCnt() const {
4993   return isImm();
4994 }
4995 
4996 //===----------------------------------------------------------------------===//
4997 // hwreg
4998 //===----------------------------------------------------------------------===//
4999 
5000 bool
5001 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5002                                 int64_t &Offset,
5003                                 int64_t &Width) {
5004   using namespace llvm::AMDGPU::Hwreg;
5005 
5006   // The register may be specified by name or using a numeric code
5007   if (isToken(AsmToken::Identifier) &&
5008       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5009     HwReg.IsSymbolic = true;
5010     lex(); // skip message name
5011   } else if (!parseExpr(HwReg.Id)) {
5012     return false;
5013   }
5014 
5015   if (trySkipToken(AsmToken::RParen))
5016     return true;
5017 
5018   // parse optional params
5019   return
5020     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
5021     parseExpr(Offset) &&
5022     skipToken(AsmToken::Comma, "expected a comma") &&
5023     parseExpr(Width) &&
5024     skipToken(AsmToken::RParen, "expected a closing parenthesis");
5025 }
5026 
5027 bool
5028 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5029                                const int64_t Offset,
5030                                const int64_t Width,
5031                                const SMLoc Loc) {
5032 
5033   using namespace llvm::AMDGPU::Hwreg;
5034 
5035   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5036     Error(Loc, "specified hardware register is not supported on this GPU");
5037     return false;
5038   } else if (!isValidHwreg(HwReg.Id)) {
5039     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
5040     return false;
5041   } else if (!isValidHwregOffset(Offset)) {
5042     Error(Loc, "invalid bit offset: only 5-bit values are legal");
5043     return false;
5044   } else if (!isValidHwregWidth(Width)) {
5045     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
5046     return false;
5047   }
5048   return true;
5049 }
5050 
5051 OperandMatchResultTy
5052 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5053   using namespace llvm::AMDGPU::Hwreg;
5054 
5055   int64_t ImmVal = 0;
5056   SMLoc Loc = getLoc();
5057 
5058   // If parse failed, do not return error code
5059   // to avoid excessive error messages.
5060   if (trySkipId("hwreg", AsmToken::LParen)) {
5061     OperandInfoTy HwReg(ID_UNKNOWN_);
5062     int64_t Offset = OFFSET_DEFAULT_;
5063     int64_t Width = WIDTH_DEFAULT_;
5064     if (parseHwregBody(HwReg, Offset, Width) &&
5065         validateHwreg(HwReg, Offset, Width, Loc)) {
5066       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5067     }
5068   } else if (parseExpr(ImmVal)) {
5069     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5070       Error(Loc, "invalid immediate: only 16-bit values are legal");
5071   }
5072 
5073   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5074   return MatchOperand_Success;
5075 }
5076 
5077 bool AMDGPUOperand::isHwreg() const {
5078   return isImmTy(ImmTyHwreg);
5079 }
5080 
5081 //===----------------------------------------------------------------------===//
5082 // sendmsg
5083 //===----------------------------------------------------------------------===//
5084 
5085 bool
5086 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5087                                   OperandInfoTy &Op,
5088                                   OperandInfoTy &Stream) {
5089   using namespace llvm::AMDGPU::SendMsg;
5090 
5091   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5092     Msg.IsSymbolic = true;
5093     lex(); // skip message name
5094   } else if (!parseExpr(Msg.Id)) {
5095     return false;
5096   }
5097 
5098   if (trySkipToken(AsmToken::Comma)) {
5099     Op.IsDefined = true;
5100     if (isToken(AsmToken::Identifier) &&
5101         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5102       lex(); // skip operation name
5103     } else if (!parseExpr(Op.Id)) {
5104       return false;
5105     }
5106 
5107     if (trySkipToken(AsmToken::Comma)) {
5108       Stream.IsDefined = true;
5109       if (!parseExpr(Stream.Id))
5110         return false;
5111     }
5112   }
5113 
5114   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5115 }
5116 
5117 bool
5118 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5119                                  const OperandInfoTy &Op,
5120                                  const OperandInfoTy &Stream,
5121                                  const SMLoc S) {
5122   using namespace llvm::AMDGPU::SendMsg;
5123 
5124   // Validation strictness depends on whether message is specified
5125   // in a symbolc or in a numeric form. In the latter case
5126   // only encoding possibility is checked.
5127   bool Strict = Msg.IsSymbolic;
5128 
5129   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5130     Error(S, "invalid message id");
5131     return false;
5132   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5133     Error(S, Op.IsDefined ?
5134              "message does not support operations" :
5135              "missing message operation");
5136     return false;
5137   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5138     Error(S, "invalid operation id");
5139     return false;
5140   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5141     Error(S, "message operation does not support streams");
5142     return false;
5143   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5144     Error(S, "invalid message stream id");
5145     return false;
5146   }
5147   return true;
5148 }
5149 
5150 OperandMatchResultTy
5151 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5152   using namespace llvm::AMDGPU::SendMsg;
5153 
5154   int64_t ImmVal = 0;
5155   SMLoc Loc = getLoc();
5156 
5157   // If parse failed, do not return error code
5158   // to avoid excessive error messages.
5159   if (trySkipId("sendmsg", AsmToken::LParen)) {
5160     OperandInfoTy Msg(ID_UNKNOWN_);
5161     OperandInfoTy Op(OP_NONE_);
5162     OperandInfoTy Stream(STREAM_ID_NONE_);
5163     if (parseSendMsgBody(Msg, Op, Stream) &&
5164         validateSendMsg(Msg, Op, Stream, Loc)) {
5165       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5166     }
5167   } else if (parseExpr(ImmVal)) {
5168     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5169       Error(Loc, "invalid immediate: only 16-bit values are legal");
5170   }
5171 
5172   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5173   return MatchOperand_Success;
5174 }
5175 
5176 bool AMDGPUOperand::isSendMsg() const {
5177   return isImmTy(ImmTySendMsg);
5178 }
5179 
5180 //===----------------------------------------------------------------------===//
5181 // v_interp
5182 //===----------------------------------------------------------------------===//
5183 
5184 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5185   if (getLexer().getKind() != AsmToken::Identifier)
5186     return MatchOperand_NoMatch;
5187 
5188   StringRef Str = Parser.getTok().getString();
5189   int Slot = StringSwitch<int>(Str)
5190     .Case("p10", 0)
5191     .Case("p20", 1)
5192     .Case("p0", 2)
5193     .Default(-1);
5194 
5195   SMLoc S = Parser.getTok().getLoc();
5196   if (Slot == -1)
5197     return MatchOperand_ParseFail;
5198 
5199   Parser.Lex();
5200   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5201                                               AMDGPUOperand::ImmTyInterpSlot));
5202   return MatchOperand_Success;
5203 }
5204 
5205 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5206   if (getLexer().getKind() != AsmToken::Identifier)
5207     return MatchOperand_NoMatch;
5208 
5209   StringRef Str = Parser.getTok().getString();
5210   if (!Str.startswith("attr"))
5211     return MatchOperand_NoMatch;
5212 
5213   StringRef Chan = Str.take_back(2);
5214   int AttrChan = StringSwitch<int>(Chan)
5215     .Case(".x", 0)
5216     .Case(".y", 1)
5217     .Case(".z", 2)
5218     .Case(".w", 3)
5219     .Default(-1);
5220   if (AttrChan == -1)
5221     return MatchOperand_ParseFail;
5222 
5223   Str = Str.drop_back(2).drop_front(4);
5224 
5225   uint8_t Attr;
5226   if (Str.getAsInteger(10, Attr))
5227     return MatchOperand_ParseFail;
5228 
5229   SMLoc S = Parser.getTok().getLoc();
5230   Parser.Lex();
5231   if (Attr > 63) {
5232     Error(S, "out of bounds attr");
5233     return MatchOperand_Success;
5234   }
5235 
5236   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5237 
5238   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5239                                               AMDGPUOperand::ImmTyInterpAttr));
5240   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5241                                               AMDGPUOperand::ImmTyAttrChan));
5242   return MatchOperand_Success;
5243 }
5244 
5245 //===----------------------------------------------------------------------===//
5246 // exp
5247 //===----------------------------------------------------------------------===//
5248 
5249 void AMDGPUAsmParser::errorExpTgt() {
5250   Error(Parser.getTok().getLoc(), "invalid exp target");
5251 }
5252 
5253 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5254                                                       uint8_t &Val) {
5255   if (Str == "null") {
5256     Val = 9;
5257     return MatchOperand_Success;
5258   }
5259 
5260   if (Str.startswith("mrt")) {
5261     Str = Str.drop_front(3);
5262     if (Str == "z") { // == mrtz
5263       Val = 8;
5264       return MatchOperand_Success;
5265     }
5266 
5267     if (Str.getAsInteger(10, Val))
5268       return MatchOperand_ParseFail;
5269 
5270     if (Val > 7)
5271       errorExpTgt();
5272 
5273     return MatchOperand_Success;
5274   }
5275 
5276   if (Str.startswith("pos")) {
5277     Str = Str.drop_front(3);
5278     if (Str.getAsInteger(10, Val))
5279       return MatchOperand_ParseFail;
5280 
5281     if (Val > 4 || (Val == 4 && !isGFX10()))
5282       errorExpTgt();
5283 
5284     Val += 12;
5285     return MatchOperand_Success;
5286   }
5287 
5288   if (isGFX10() && Str == "prim") {
5289     Val = 20;
5290     return MatchOperand_Success;
5291   }
5292 
5293   if (Str.startswith("param")) {
5294     Str = Str.drop_front(5);
5295     if (Str.getAsInteger(10, Val))
5296       return MatchOperand_ParseFail;
5297 
5298     if (Val >= 32)
5299       errorExpTgt();
5300 
5301     Val += 32;
5302     return MatchOperand_Success;
5303   }
5304 
5305   if (Str.startswith("invalid_target_")) {
5306     Str = Str.drop_front(15);
5307     if (Str.getAsInteger(10, Val))
5308       return MatchOperand_ParseFail;
5309 
5310     errorExpTgt();
5311     return MatchOperand_Success;
5312   }
5313 
5314   return MatchOperand_NoMatch;
5315 }
5316 
5317 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5318   uint8_t Val;
5319   StringRef Str = Parser.getTok().getString();
5320 
5321   auto Res = parseExpTgtImpl(Str, Val);
5322   if (Res != MatchOperand_Success)
5323     return Res;
5324 
5325   SMLoc S = Parser.getTok().getLoc();
5326   Parser.Lex();
5327 
5328   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5329                                               AMDGPUOperand::ImmTyExpTgt));
5330   return MatchOperand_Success;
5331 }
5332 
5333 //===----------------------------------------------------------------------===//
5334 // parser helpers
5335 //===----------------------------------------------------------------------===//
5336 
5337 bool
5338 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5339   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5340 }
5341 
5342 bool
5343 AMDGPUAsmParser::isId(const StringRef Id) const {
5344   return isId(getToken(), Id);
5345 }
5346 
5347 bool
5348 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5349   return getTokenKind() == Kind;
5350 }
5351 
5352 bool
5353 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5354   if (isId(Id)) {
5355     lex();
5356     return true;
5357   }
5358   return false;
5359 }
5360 
5361 bool
5362 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5363   if (isId(Id) && peekToken().is(Kind)) {
5364     lex();
5365     lex();
5366     return true;
5367   }
5368   return false;
5369 }
5370 
5371 bool
5372 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5373   if (isToken(Kind)) {
5374     lex();
5375     return true;
5376   }
5377   return false;
5378 }
5379 
5380 bool
5381 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5382                            const StringRef ErrMsg) {
5383   if (!trySkipToken(Kind)) {
5384     Error(getLoc(), ErrMsg);
5385     return false;
5386   }
5387   return true;
5388 }
5389 
5390 bool
5391 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5392   return !getParser().parseAbsoluteExpression(Imm);
5393 }
5394 
5395 bool
5396 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5397   SMLoc S = getLoc();
5398 
5399   const MCExpr *Expr;
5400   if (Parser.parseExpression(Expr))
5401     return false;
5402 
5403   int64_t IntVal;
5404   if (Expr->evaluateAsAbsolute(IntVal)) {
5405     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5406   } else {
5407     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5408   }
5409   return true;
5410 }
5411 
5412 bool
5413 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5414   if (isToken(AsmToken::String)) {
5415     Val = getToken().getStringContents();
5416     lex();
5417     return true;
5418   } else {
5419     Error(getLoc(), ErrMsg);
5420     return false;
5421   }
5422 }
5423 
5424 AsmToken
5425 AMDGPUAsmParser::getToken() const {
5426   return Parser.getTok();
5427 }
5428 
5429 AsmToken
5430 AMDGPUAsmParser::peekToken() {
5431   return getLexer().peekTok();
5432 }
5433 
5434 void
5435 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5436   auto TokCount = getLexer().peekTokens(Tokens);
5437 
5438   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5439     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5440 }
5441 
5442 AsmToken::TokenKind
5443 AMDGPUAsmParser::getTokenKind() const {
5444   return getLexer().getKind();
5445 }
5446 
5447 SMLoc
5448 AMDGPUAsmParser::getLoc() const {
5449   return getToken().getLoc();
5450 }
5451 
5452 StringRef
5453 AMDGPUAsmParser::getTokenStr() const {
5454   return getToken().getString();
5455 }
5456 
5457 void
5458 AMDGPUAsmParser::lex() {
5459   Parser.Lex();
5460 }
5461 
5462 //===----------------------------------------------------------------------===//
5463 // swizzle
5464 //===----------------------------------------------------------------------===//
5465 
5466 LLVM_READNONE
5467 static unsigned
5468 encodeBitmaskPerm(const unsigned AndMask,
5469                   const unsigned OrMask,
5470                   const unsigned XorMask) {
5471   using namespace llvm::AMDGPU::Swizzle;
5472 
5473   return BITMASK_PERM_ENC |
5474          (AndMask << BITMASK_AND_SHIFT) |
5475          (OrMask  << BITMASK_OR_SHIFT)  |
5476          (XorMask << BITMASK_XOR_SHIFT);
5477 }
5478 
5479 bool
5480 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5481                                       const unsigned MinVal,
5482                                       const unsigned MaxVal,
5483                                       const StringRef ErrMsg) {
5484   for (unsigned i = 0; i < OpNum; ++i) {
5485     if (!skipToken(AsmToken::Comma, "expected a comma")){
5486       return false;
5487     }
5488     SMLoc ExprLoc = Parser.getTok().getLoc();
5489     if (!parseExpr(Op[i])) {
5490       return false;
5491     }
5492     if (Op[i] < MinVal || Op[i] > MaxVal) {
5493       Error(ExprLoc, ErrMsg);
5494       return false;
5495     }
5496   }
5497 
5498   return true;
5499 }
5500 
5501 bool
5502 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5503   using namespace llvm::AMDGPU::Swizzle;
5504 
5505   int64_t Lane[LANE_NUM];
5506   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5507                            "expected a 2-bit lane id")) {
5508     Imm = QUAD_PERM_ENC;
5509     for (unsigned I = 0; I < LANE_NUM; ++I) {
5510       Imm |= Lane[I] << (LANE_SHIFT * I);
5511     }
5512     return true;
5513   }
5514   return false;
5515 }
5516 
5517 bool
5518 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5519   using namespace llvm::AMDGPU::Swizzle;
5520 
5521   SMLoc S = Parser.getTok().getLoc();
5522   int64_t GroupSize;
5523   int64_t LaneIdx;
5524 
5525   if (!parseSwizzleOperands(1, &GroupSize,
5526                             2, 32,
5527                             "group size must be in the interval [2,32]")) {
5528     return false;
5529   }
5530   if (!isPowerOf2_64(GroupSize)) {
5531     Error(S, "group size must be a power of two");
5532     return false;
5533   }
5534   if (parseSwizzleOperands(1, &LaneIdx,
5535                            0, GroupSize - 1,
5536                            "lane id must be in the interval [0,group size - 1]")) {
5537     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5538     return true;
5539   }
5540   return false;
5541 }
5542 
5543 bool
5544 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5545   using namespace llvm::AMDGPU::Swizzle;
5546 
5547   SMLoc S = Parser.getTok().getLoc();
5548   int64_t GroupSize;
5549 
5550   if (!parseSwizzleOperands(1, &GroupSize,
5551       2, 32, "group size must be in the interval [2,32]")) {
5552     return false;
5553   }
5554   if (!isPowerOf2_64(GroupSize)) {
5555     Error(S, "group size must be a power of two");
5556     return false;
5557   }
5558 
5559   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5560   return true;
5561 }
5562 
5563 bool
5564 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5565   using namespace llvm::AMDGPU::Swizzle;
5566 
5567   SMLoc S = Parser.getTok().getLoc();
5568   int64_t GroupSize;
5569 
5570   if (!parseSwizzleOperands(1, &GroupSize,
5571       1, 16, "group size must be in the interval [1,16]")) {
5572     return false;
5573   }
5574   if (!isPowerOf2_64(GroupSize)) {
5575     Error(S, "group size must be a power of two");
5576     return false;
5577   }
5578 
5579   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5580   return true;
5581 }
5582 
5583 bool
5584 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5585   using namespace llvm::AMDGPU::Swizzle;
5586 
5587   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5588     return false;
5589   }
5590 
5591   StringRef Ctl;
5592   SMLoc StrLoc = Parser.getTok().getLoc();
5593   if (!parseString(Ctl)) {
5594     return false;
5595   }
5596   if (Ctl.size() != BITMASK_WIDTH) {
5597     Error(StrLoc, "expected a 5-character mask");
5598     return false;
5599   }
5600 
5601   unsigned AndMask = 0;
5602   unsigned OrMask = 0;
5603   unsigned XorMask = 0;
5604 
5605   for (size_t i = 0; i < Ctl.size(); ++i) {
5606     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5607     switch(Ctl[i]) {
5608     default:
5609       Error(StrLoc, "invalid mask");
5610       return false;
5611     case '0':
5612       break;
5613     case '1':
5614       OrMask |= Mask;
5615       break;
5616     case 'p':
5617       AndMask |= Mask;
5618       break;
5619     case 'i':
5620       AndMask |= Mask;
5621       XorMask |= Mask;
5622       break;
5623     }
5624   }
5625 
5626   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5627   return true;
5628 }
5629 
5630 bool
5631 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5632 
5633   SMLoc OffsetLoc = Parser.getTok().getLoc();
5634 
5635   if (!parseExpr(Imm)) {
5636     return false;
5637   }
5638   if (!isUInt<16>(Imm)) {
5639     Error(OffsetLoc, "expected a 16-bit offset");
5640     return false;
5641   }
5642   return true;
5643 }
5644 
5645 bool
5646 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5647   using namespace llvm::AMDGPU::Swizzle;
5648 
5649   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5650 
5651     SMLoc ModeLoc = Parser.getTok().getLoc();
5652     bool Ok = false;
5653 
5654     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5655       Ok = parseSwizzleQuadPerm(Imm);
5656     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5657       Ok = parseSwizzleBitmaskPerm(Imm);
5658     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5659       Ok = parseSwizzleBroadcast(Imm);
5660     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5661       Ok = parseSwizzleSwap(Imm);
5662     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5663       Ok = parseSwizzleReverse(Imm);
5664     } else {
5665       Error(ModeLoc, "expected a swizzle mode");
5666     }
5667 
5668     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5669   }
5670 
5671   return false;
5672 }
5673 
5674 OperandMatchResultTy
5675 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5676   SMLoc S = Parser.getTok().getLoc();
5677   int64_t Imm = 0;
5678 
5679   if (trySkipId("offset")) {
5680 
5681     bool Ok = false;
5682     if (skipToken(AsmToken::Colon, "expected a colon")) {
5683       if (trySkipId("swizzle")) {
5684         Ok = parseSwizzleMacro(Imm);
5685       } else {
5686         Ok = parseSwizzleOffset(Imm);
5687       }
5688     }
5689 
5690     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5691 
5692     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5693   } else {
5694     // Swizzle "offset" operand is optional.
5695     // If it is omitted, try parsing other optional operands.
5696     return parseOptionalOpr(Operands);
5697   }
5698 }
5699 
5700 bool
5701 AMDGPUOperand::isSwizzle() const {
5702   return isImmTy(ImmTySwizzle);
5703 }
5704 
5705 //===----------------------------------------------------------------------===//
5706 // VGPR Index Mode
5707 //===----------------------------------------------------------------------===//
5708 
5709 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5710 
5711   using namespace llvm::AMDGPU::VGPRIndexMode;
5712 
5713   if (trySkipToken(AsmToken::RParen)) {
5714     return OFF;
5715   }
5716 
5717   int64_t Imm = 0;
5718 
5719   while (true) {
5720     unsigned Mode = 0;
5721     SMLoc S = Parser.getTok().getLoc();
5722 
5723     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5724       if (trySkipId(IdSymbolic[ModeId])) {
5725         Mode = 1 << ModeId;
5726         break;
5727       }
5728     }
5729 
5730     if (Mode == 0) {
5731       Error(S, (Imm == 0)?
5732                "expected a VGPR index mode or a closing parenthesis" :
5733                "expected a VGPR index mode");
5734       break;
5735     }
5736 
5737     if (Imm & Mode) {
5738       Error(S, "duplicate VGPR index mode");
5739       break;
5740     }
5741     Imm |= Mode;
5742 
5743     if (trySkipToken(AsmToken::RParen))
5744       break;
5745     if (!skipToken(AsmToken::Comma,
5746                    "expected a comma or a closing parenthesis"))
5747       break;
5748   }
5749 
5750   return Imm;
5751 }
5752 
5753 OperandMatchResultTy
5754 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5755 
5756   int64_t Imm = 0;
5757   SMLoc S = Parser.getTok().getLoc();
5758 
5759   if (getLexer().getKind() == AsmToken::Identifier &&
5760       Parser.getTok().getString() == "gpr_idx" &&
5761       getLexer().peekTok().is(AsmToken::LParen)) {
5762 
5763     Parser.Lex();
5764     Parser.Lex();
5765 
5766     // If parse failed, trigger an error but do not return error code
5767     // to avoid excessive error messages.
5768     Imm = parseGPRIdxMacro();
5769 
5770   } else {
5771     if (getParser().parseAbsoluteExpression(Imm))
5772       return MatchOperand_NoMatch;
5773     if (Imm < 0 || !isUInt<4>(Imm)) {
5774       Error(S, "invalid immediate: only 4-bit values are legal");
5775     }
5776   }
5777 
5778   Operands.push_back(
5779       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5780   return MatchOperand_Success;
5781 }
5782 
5783 bool AMDGPUOperand::isGPRIdxMode() const {
5784   return isImmTy(ImmTyGprIdxMode);
5785 }
5786 
5787 //===----------------------------------------------------------------------===//
5788 // sopp branch targets
5789 //===----------------------------------------------------------------------===//
5790 
5791 OperandMatchResultTy
5792 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5793 
5794   // Make sure we are not parsing something
5795   // that looks like a label or an expression but is not.
5796   // This will improve error messages.
5797   if (isRegister() || isModifier())
5798     return MatchOperand_NoMatch;
5799 
5800   if (parseExpr(Operands)) {
5801 
5802     AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5803     assert(Opr.isImm() || Opr.isExpr());
5804     SMLoc Loc = Opr.getStartLoc();
5805 
5806     // Currently we do not support arbitrary expressions as branch targets.
5807     // Only labels and absolute expressions are accepted.
5808     if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5809       Error(Loc, "expected an absolute expression or a label");
5810     } else if (Opr.isImm() && !Opr.isS16Imm()) {
5811       Error(Loc, "expected a 16-bit signed jump offset");
5812     }
5813   }
5814 
5815   return MatchOperand_Success; // avoid excessive error messages
5816 }
5817 
5818 //===----------------------------------------------------------------------===//
5819 // Boolean holding registers
5820 //===----------------------------------------------------------------------===//
5821 
5822 OperandMatchResultTy
5823 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5824   return parseReg(Operands);
5825 }
5826 
5827 //===----------------------------------------------------------------------===//
5828 // mubuf
5829 //===----------------------------------------------------------------------===//
5830 
5831 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5832   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5833 }
5834 
5835 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5836   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5837 }
5838 
5839 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5840   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5841 }
5842 
5843 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5844                                const OperandVector &Operands,
5845                                bool IsAtomic,
5846                                bool IsAtomicReturn,
5847                                bool IsLds) {
5848   bool IsLdsOpcode = IsLds;
5849   bool HasLdsModifier = false;
5850   OptionalImmIndexMap OptionalIdx;
5851   assert(IsAtomicReturn ? IsAtomic : true);
5852   unsigned FirstOperandIdx = 1;
5853 
5854   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5855     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5856 
5857     // Add the register arguments
5858     if (Op.isReg()) {
5859       Op.addRegOperands(Inst, 1);
5860       // Insert a tied src for atomic return dst.
5861       // This cannot be postponed as subsequent calls to
5862       // addImmOperands rely on correct number of MC operands.
5863       if (IsAtomicReturn && i == FirstOperandIdx)
5864         Op.addRegOperands(Inst, 1);
5865       continue;
5866     }
5867 
5868     // Handle the case where soffset is an immediate
5869     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5870       Op.addImmOperands(Inst, 1);
5871       continue;
5872     }
5873 
5874     HasLdsModifier |= Op.isLDS();
5875 
5876     // Handle tokens like 'offen' which are sometimes hard-coded into the
5877     // asm string.  There are no MCInst operands for these.
5878     if (Op.isToken()) {
5879       continue;
5880     }
5881     assert(Op.isImm());
5882 
5883     // Handle optional arguments
5884     OptionalIdx[Op.getImmTy()] = i;
5885   }
5886 
5887   // This is a workaround for an llvm quirk which may result in an
5888   // incorrect instruction selection. Lds and non-lds versions of
5889   // MUBUF instructions are identical except that lds versions
5890   // have mandatory 'lds' modifier. However this modifier follows
5891   // optional modifiers and llvm asm matcher regards this 'lds'
5892   // modifier as an optional one. As a result, an lds version
5893   // of opcode may be selected even if it has no 'lds' modifier.
5894   if (IsLdsOpcode && !HasLdsModifier) {
5895     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5896     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5897       Inst.setOpcode(NoLdsOpcode);
5898       IsLdsOpcode = false;
5899     }
5900   }
5901 
5902   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5903   if (!IsAtomic) { // glc is hard-coded.
5904     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5905   }
5906   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5907 
5908   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5909     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5910   }
5911 
5912   if (isGFX10())
5913     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5914 }
5915 
5916 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5917   OptionalImmIndexMap OptionalIdx;
5918 
5919   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5920     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5921 
5922     // Add the register arguments
5923     if (Op.isReg()) {
5924       Op.addRegOperands(Inst, 1);
5925       continue;
5926     }
5927 
5928     // Handle the case where soffset is an immediate
5929     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5930       Op.addImmOperands(Inst, 1);
5931       continue;
5932     }
5933 
5934     // Handle tokens like 'offen' which are sometimes hard-coded into the
5935     // asm string.  There are no MCInst operands for these.
5936     if (Op.isToken()) {
5937       continue;
5938     }
5939     assert(Op.isImm());
5940 
5941     // Handle optional arguments
5942     OptionalIdx[Op.getImmTy()] = i;
5943   }
5944 
5945   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5946                         AMDGPUOperand::ImmTyOffset);
5947   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5948   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5949   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5950   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5951 
5952   if (isGFX10())
5953     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5954 }
5955 
5956 //===----------------------------------------------------------------------===//
5957 // mimg
5958 //===----------------------------------------------------------------------===//
5959 
5960 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5961                               bool IsAtomic) {
5962   unsigned I = 1;
5963   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5964   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5965     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5966   }
5967 
5968   if (IsAtomic) {
5969     // Add src, same as dst
5970     assert(Desc.getNumDefs() == 1);
5971     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5972   }
5973 
5974   OptionalImmIndexMap OptionalIdx;
5975 
5976   for (unsigned E = Operands.size(); I != E; ++I) {
5977     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5978 
5979     // Add the register arguments
5980     if (Op.isReg()) {
5981       Op.addRegOperands(Inst, 1);
5982     } else if (Op.isImmModifier()) {
5983       OptionalIdx[Op.getImmTy()] = I;
5984     } else if (!Op.isToken()) {
5985       llvm_unreachable("unexpected operand type");
5986     }
5987   }
5988 
5989   bool IsGFX10 = isGFX10();
5990 
5991   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5992   if (IsGFX10)
5993     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5994   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5995   if (IsGFX10)
5996     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5997   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5998   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5999   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6000   if (IsGFX10)
6001     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6002   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6003   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6004   if (!IsGFX10)
6005     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6006   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6007 }
6008 
6009 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6010   cvtMIMG(Inst, Operands, true);
6011 }
6012 
6013 //===----------------------------------------------------------------------===//
6014 // smrd
6015 //===----------------------------------------------------------------------===//
6016 
6017 bool AMDGPUOperand::isSMRDOffset8() const {
6018   return isImm() && isUInt<8>(getImm());
6019 }
6020 
6021 bool AMDGPUOperand::isSMRDOffset20() const {
6022   return isImm() && isUInt<20>(getImm());
6023 }
6024 
6025 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6026   // 32-bit literals are only supported on CI and we only want to use them
6027   // when the offset is > 8-bits.
6028   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6029 }
6030 
6031 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6032   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6033 }
6034 
6035 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
6036   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6037 }
6038 
6039 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6040   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6041 }
6042 
6043 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6044   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6045 }
6046 
6047 //===----------------------------------------------------------------------===//
6048 // vop3
6049 //===----------------------------------------------------------------------===//
6050 
6051 static bool ConvertOmodMul(int64_t &Mul) {
6052   if (Mul != 1 && Mul != 2 && Mul != 4)
6053     return false;
6054 
6055   Mul >>= 1;
6056   return true;
6057 }
6058 
6059 static bool ConvertOmodDiv(int64_t &Div) {
6060   if (Div == 1) {
6061     Div = 0;
6062     return true;
6063   }
6064 
6065   if (Div == 2) {
6066     Div = 3;
6067     return true;
6068   }
6069 
6070   return false;
6071 }
6072 
6073 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6074   if (BoundCtrl == 0) {
6075     BoundCtrl = 1;
6076     return true;
6077   }
6078 
6079   if (BoundCtrl == -1) {
6080     BoundCtrl = 0;
6081     return true;
6082   }
6083 
6084   return false;
6085 }
6086 
6087 // Note: the order in this table matches the order of operands in AsmString.
6088 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6089   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6090   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6091   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6092   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6093   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6094   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6095   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6096   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6097   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6098   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6099   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
6100   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6101   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6102   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6103   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6104   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6105   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6106   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6107   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6108   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6109   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6110   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6111   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6112   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6113   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6114   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6115   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6116   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6117   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6118   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6119   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6120   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6121   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6122   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6123   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6124   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6125   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6126   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6127   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6128   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6129   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6130   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6131   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6132   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6133 };
6134 
6135 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6136 
6137   OperandMatchResultTy res = parseOptionalOpr(Operands);
6138 
6139   // This is a hack to enable hardcoded mandatory operands which follow
6140   // optional operands.
6141   //
6142   // Current design assumes that all operands after the first optional operand
6143   // are also optional. However implementation of some instructions violates
6144   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6145   //
6146   // To alleviate this problem, we have to (implicitly) parse extra operands
6147   // to make sure autogenerated parser of custom operands never hit hardcoded
6148   // mandatory operands.
6149 
6150   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6151     if (res != MatchOperand_Success ||
6152         isToken(AsmToken::EndOfStatement))
6153       break;
6154 
6155     trySkipToken(AsmToken::Comma);
6156     res = parseOptionalOpr(Operands);
6157   }
6158 
6159   return res;
6160 }
6161 
6162 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6163   OperandMatchResultTy res;
6164   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6165     // try to parse any optional operand here
6166     if (Op.IsBit) {
6167       res = parseNamedBit(Op.Name, Operands, Op.Type);
6168     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6169       res = parseOModOperand(Operands);
6170     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6171                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6172                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6173       res = parseSDWASel(Operands, Op.Name, Op.Type);
6174     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6175       res = parseSDWADstUnused(Operands);
6176     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6177                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6178                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6179                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6180       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6181                                         Op.ConvertResult);
6182     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6183       res = parseDim(Operands);
6184     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6185       res = parseDfmtNfmt(Operands);
6186     } else {
6187       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6188     }
6189     if (res != MatchOperand_NoMatch) {
6190       return res;
6191     }
6192   }
6193   return MatchOperand_NoMatch;
6194 }
6195 
6196 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6197   StringRef Name = Parser.getTok().getString();
6198   if (Name == "mul") {
6199     return parseIntWithPrefix("mul", Operands,
6200                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6201   }
6202 
6203   if (Name == "div") {
6204     return parseIntWithPrefix("div", Operands,
6205                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6206   }
6207 
6208   return MatchOperand_NoMatch;
6209 }
6210 
6211 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6212   cvtVOP3P(Inst, Operands);
6213 
6214   int Opc = Inst.getOpcode();
6215 
6216   int SrcNum;
6217   const int Ops[] = { AMDGPU::OpName::src0,
6218                       AMDGPU::OpName::src1,
6219                       AMDGPU::OpName::src2 };
6220   for (SrcNum = 0;
6221        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6222        ++SrcNum);
6223   assert(SrcNum > 0);
6224 
6225   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6226   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6227 
6228   if ((OpSel & (1 << SrcNum)) != 0) {
6229     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6230     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6231     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6232   }
6233 }
6234 
6235 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6236       // 1. This operand is input modifiers
6237   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6238       // 2. This is not last operand
6239       && Desc.NumOperands > (OpNum + 1)
6240       // 3. Next operand is register class
6241       && Desc.OpInfo[OpNum + 1].RegClass != -1
6242       // 4. Next register is not tied to any other operand
6243       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6244 }
6245 
6246 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6247 {
6248   OptionalImmIndexMap OptionalIdx;
6249   unsigned Opc = Inst.getOpcode();
6250 
6251   unsigned I = 1;
6252   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6253   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6254     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6255   }
6256 
6257   for (unsigned E = Operands.size(); I != E; ++I) {
6258     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6259     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6260       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6261     } else if (Op.isInterpSlot() ||
6262                Op.isInterpAttr() ||
6263                Op.isAttrChan()) {
6264       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6265     } else if (Op.isImmModifier()) {
6266       OptionalIdx[Op.getImmTy()] = I;
6267     } else {
6268       llvm_unreachable("unhandled operand type");
6269     }
6270   }
6271 
6272   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6273     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6274   }
6275 
6276   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6277     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6278   }
6279 
6280   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6281     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6282   }
6283 }
6284 
6285 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6286                               OptionalImmIndexMap &OptionalIdx) {
6287   unsigned Opc = Inst.getOpcode();
6288 
6289   unsigned I = 1;
6290   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6291   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6292     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6293   }
6294 
6295   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6296     // This instruction has src modifiers
6297     for (unsigned E = Operands.size(); I != E; ++I) {
6298       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6299       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6300         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6301       } else if (Op.isImmModifier()) {
6302         OptionalIdx[Op.getImmTy()] = I;
6303       } else if (Op.isRegOrImm()) {
6304         Op.addRegOrImmOperands(Inst, 1);
6305       } else {
6306         llvm_unreachable("unhandled operand type");
6307       }
6308     }
6309   } else {
6310     // No src modifiers
6311     for (unsigned E = Operands.size(); I != E; ++I) {
6312       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6313       if (Op.isMod()) {
6314         OptionalIdx[Op.getImmTy()] = I;
6315       } else {
6316         Op.addRegOrImmOperands(Inst, 1);
6317       }
6318     }
6319   }
6320 
6321   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6322     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6323   }
6324 
6325   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6326     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6327   }
6328 
6329   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6330   // it has src2 register operand that is tied to dst operand
6331   // we don't allow modifiers for this operand in assembler so src2_modifiers
6332   // should be 0.
6333   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6334       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6335       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6336       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6337       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6338       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6339       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6340     auto it = Inst.begin();
6341     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6342     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6343     ++it;
6344     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6345   }
6346 }
6347 
6348 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6349   OptionalImmIndexMap OptionalIdx;
6350   cvtVOP3(Inst, Operands, OptionalIdx);
6351 }
6352 
6353 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6354                                const OperandVector &Operands) {
6355   OptionalImmIndexMap OptIdx;
6356   const int Opc = Inst.getOpcode();
6357   const MCInstrDesc &Desc = MII.get(Opc);
6358 
6359   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6360 
6361   cvtVOP3(Inst, Operands, OptIdx);
6362 
6363   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6364     assert(!IsPacked);
6365     Inst.addOperand(Inst.getOperand(0));
6366   }
6367 
6368   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6369   // instruction, and then figure out where to actually put the modifiers
6370 
6371   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6372 
6373   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6374   if (OpSelHiIdx != -1) {
6375     int DefaultVal = IsPacked ? -1 : 0;
6376     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6377                           DefaultVal);
6378   }
6379 
6380   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6381   if (NegLoIdx != -1) {
6382     assert(IsPacked);
6383     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6384     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6385   }
6386 
6387   const int Ops[] = { AMDGPU::OpName::src0,
6388                       AMDGPU::OpName::src1,
6389                       AMDGPU::OpName::src2 };
6390   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6391                          AMDGPU::OpName::src1_modifiers,
6392                          AMDGPU::OpName::src2_modifiers };
6393 
6394   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6395 
6396   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6397   unsigned OpSelHi = 0;
6398   unsigned NegLo = 0;
6399   unsigned NegHi = 0;
6400 
6401   if (OpSelHiIdx != -1) {
6402     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6403   }
6404 
6405   if (NegLoIdx != -1) {
6406     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6407     NegLo = Inst.getOperand(NegLoIdx).getImm();
6408     NegHi = Inst.getOperand(NegHiIdx).getImm();
6409   }
6410 
6411   for (int J = 0; J < 3; ++J) {
6412     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6413     if (OpIdx == -1)
6414       break;
6415 
6416     uint32_t ModVal = 0;
6417 
6418     if ((OpSel & (1 << J)) != 0)
6419       ModVal |= SISrcMods::OP_SEL_0;
6420 
6421     if ((OpSelHi & (1 << J)) != 0)
6422       ModVal |= SISrcMods::OP_SEL_1;
6423 
6424     if ((NegLo & (1 << J)) != 0)
6425       ModVal |= SISrcMods::NEG;
6426 
6427     if ((NegHi & (1 << J)) != 0)
6428       ModVal |= SISrcMods::NEG_HI;
6429 
6430     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6431 
6432     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6433   }
6434 }
6435 
6436 //===----------------------------------------------------------------------===//
6437 // dpp
6438 //===----------------------------------------------------------------------===//
6439 
6440 bool AMDGPUOperand::isDPP8() const {
6441   return isImmTy(ImmTyDPP8);
6442 }
6443 
6444 bool AMDGPUOperand::isDPPCtrl() const {
6445   using namespace AMDGPU::DPP;
6446 
6447   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6448   if (result) {
6449     int64_t Imm = getImm();
6450     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6451            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6452            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6453            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6454            (Imm == DppCtrl::WAVE_SHL1) ||
6455            (Imm == DppCtrl::WAVE_ROL1) ||
6456            (Imm == DppCtrl::WAVE_SHR1) ||
6457            (Imm == DppCtrl::WAVE_ROR1) ||
6458            (Imm == DppCtrl::ROW_MIRROR) ||
6459            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6460            (Imm == DppCtrl::BCAST15) ||
6461            (Imm == DppCtrl::BCAST31) ||
6462            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6463            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6464   }
6465   return false;
6466 }
6467 
6468 //===----------------------------------------------------------------------===//
6469 // mAI
6470 //===----------------------------------------------------------------------===//
6471 
6472 bool AMDGPUOperand::isBLGP() const {
6473   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6474 }
6475 
6476 bool AMDGPUOperand::isCBSZ() const {
6477   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6478 }
6479 
6480 bool AMDGPUOperand::isABID() const {
6481   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6482 }
6483 
6484 bool AMDGPUOperand::isS16Imm() const {
6485   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6486 }
6487 
6488 bool AMDGPUOperand::isU16Imm() const {
6489   return isImm() && isUInt<16>(getImm());
6490 }
6491 
6492 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6493   if (!isGFX10())
6494     return MatchOperand_NoMatch;
6495 
6496   SMLoc S = Parser.getTok().getLoc();
6497 
6498   if (getLexer().isNot(AsmToken::Identifier))
6499     return MatchOperand_NoMatch;
6500   if (getLexer().getTok().getString() != "dim")
6501     return MatchOperand_NoMatch;
6502 
6503   Parser.Lex();
6504   if (getLexer().isNot(AsmToken::Colon))
6505     return MatchOperand_ParseFail;
6506 
6507   Parser.Lex();
6508 
6509   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6510   // integer.
6511   std::string Token;
6512   if (getLexer().is(AsmToken::Integer)) {
6513     SMLoc Loc = getLexer().getTok().getEndLoc();
6514     Token = std::string(getLexer().getTok().getString());
6515     Parser.Lex();
6516     if (getLexer().getTok().getLoc() != Loc)
6517       return MatchOperand_ParseFail;
6518   }
6519   if (getLexer().isNot(AsmToken::Identifier))
6520     return MatchOperand_ParseFail;
6521   Token += getLexer().getTok().getString();
6522 
6523   StringRef DimId = Token;
6524   if (DimId.startswith("SQ_RSRC_IMG_"))
6525     DimId = DimId.substr(12);
6526 
6527   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6528   if (!DimInfo)
6529     return MatchOperand_ParseFail;
6530 
6531   Parser.Lex();
6532 
6533   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6534                                               AMDGPUOperand::ImmTyDim));
6535   return MatchOperand_Success;
6536 }
6537 
6538 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6539   SMLoc S = Parser.getTok().getLoc();
6540   StringRef Prefix;
6541 
6542   if (getLexer().getKind() == AsmToken::Identifier) {
6543     Prefix = Parser.getTok().getString();
6544   } else {
6545     return MatchOperand_NoMatch;
6546   }
6547 
6548   if (Prefix != "dpp8")
6549     return parseDPPCtrl(Operands);
6550   if (!isGFX10())
6551     return MatchOperand_NoMatch;
6552 
6553   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6554 
6555   int64_t Sels[8];
6556 
6557   Parser.Lex();
6558   if (getLexer().isNot(AsmToken::Colon))
6559     return MatchOperand_ParseFail;
6560 
6561   Parser.Lex();
6562   if (getLexer().isNot(AsmToken::LBrac))
6563     return MatchOperand_ParseFail;
6564 
6565   Parser.Lex();
6566   if (getParser().parseAbsoluteExpression(Sels[0]))
6567     return MatchOperand_ParseFail;
6568   if (0 > Sels[0] || 7 < Sels[0])
6569     return MatchOperand_ParseFail;
6570 
6571   for (size_t i = 1; i < 8; ++i) {
6572     if (getLexer().isNot(AsmToken::Comma))
6573       return MatchOperand_ParseFail;
6574 
6575     Parser.Lex();
6576     if (getParser().parseAbsoluteExpression(Sels[i]))
6577       return MatchOperand_ParseFail;
6578     if (0 > Sels[i] || 7 < Sels[i])
6579       return MatchOperand_ParseFail;
6580   }
6581 
6582   if (getLexer().isNot(AsmToken::RBrac))
6583     return MatchOperand_ParseFail;
6584   Parser.Lex();
6585 
6586   unsigned DPP8 = 0;
6587   for (size_t i = 0; i < 8; ++i)
6588     DPP8 |= (Sels[i] << (i * 3));
6589 
6590   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6591   return MatchOperand_Success;
6592 }
6593 
6594 OperandMatchResultTy
6595 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6596   using namespace AMDGPU::DPP;
6597 
6598   SMLoc S = Parser.getTok().getLoc();
6599   StringRef Prefix;
6600   int64_t Int;
6601 
6602   if (getLexer().getKind() == AsmToken::Identifier) {
6603     Prefix = Parser.getTok().getString();
6604   } else {
6605     return MatchOperand_NoMatch;
6606   }
6607 
6608   if (Prefix == "row_mirror") {
6609     Int = DppCtrl::ROW_MIRROR;
6610     Parser.Lex();
6611   } else if (Prefix == "row_half_mirror") {
6612     Int = DppCtrl::ROW_HALF_MIRROR;
6613     Parser.Lex();
6614   } else {
6615     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6616     if (Prefix != "quad_perm"
6617         && Prefix != "row_shl"
6618         && Prefix != "row_shr"
6619         && Prefix != "row_ror"
6620         && Prefix != "wave_shl"
6621         && Prefix != "wave_rol"
6622         && Prefix != "wave_shr"
6623         && Prefix != "wave_ror"
6624         && Prefix != "row_bcast"
6625         && Prefix != "row_share"
6626         && Prefix != "row_xmask") {
6627       return MatchOperand_NoMatch;
6628     }
6629 
6630     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6631       return MatchOperand_NoMatch;
6632 
6633     if (!isVI() && !isGFX9() &&
6634         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6635          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6636          Prefix == "row_bcast"))
6637       return MatchOperand_NoMatch;
6638 
6639     Parser.Lex();
6640     if (getLexer().isNot(AsmToken::Colon))
6641       return MatchOperand_ParseFail;
6642 
6643     if (Prefix == "quad_perm") {
6644       // quad_perm:[%d,%d,%d,%d]
6645       Parser.Lex();
6646       if (getLexer().isNot(AsmToken::LBrac))
6647         return MatchOperand_ParseFail;
6648       Parser.Lex();
6649 
6650       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6651         return MatchOperand_ParseFail;
6652 
6653       for (int i = 0; i < 3; ++i) {
6654         if (getLexer().isNot(AsmToken::Comma))
6655           return MatchOperand_ParseFail;
6656         Parser.Lex();
6657 
6658         int64_t Temp;
6659         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6660           return MatchOperand_ParseFail;
6661         const int shift = i*2 + 2;
6662         Int += (Temp << shift);
6663       }
6664 
6665       if (getLexer().isNot(AsmToken::RBrac))
6666         return MatchOperand_ParseFail;
6667       Parser.Lex();
6668     } else {
6669       // sel:%d
6670       Parser.Lex();
6671       if (getParser().parseAbsoluteExpression(Int))
6672         return MatchOperand_ParseFail;
6673 
6674       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6675         Int |= DppCtrl::ROW_SHL0;
6676       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6677         Int |= DppCtrl::ROW_SHR0;
6678       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6679         Int |= DppCtrl::ROW_ROR0;
6680       } else if (Prefix == "wave_shl" && 1 == Int) {
6681         Int = DppCtrl::WAVE_SHL1;
6682       } else if (Prefix == "wave_rol" && 1 == Int) {
6683         Int = DppCtrl::WAVE_ROL1;
6684       } else if (Prefix == "wave_shr" && 1 == Int) {
6685         Int = DppCtrl::WAVE_SHR1;
6686       } else if (Prefix == "wave_ror" && 1 == Int) {
6687         Int = DppCtrl::WAVE_ROR1;
6688       } else if (Prefix == "row_bcast") {
6689         if (Int == 15) {
6690           Int = DppCtrl::BCAST15;
6691         } else if (Int == 31) {
6692           Int = DppCtrl::BCAST31;
6693         } else {
6694           return MatchOperand_ParseFail;
6695         }
6696       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6697         Int |= DppCtrl::ROW_SHARE_FIRST;
6698       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6699         Int |= DppCtrl::ROW_XMASK_FIRST;
6700       } else {
6701         return MatchOperand_ParseFail;
6702       }
6703     }
6704   }
6705 
6706   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6707   return MatchOperand_Success;
6708 }
6709 
6710 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6711   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6712 }
6713 
6714 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6715   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6716 }
6717 
6718 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6719   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6720 }
6721 
6722 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6723   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6724 }
6725 
6726 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6727   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6728 }
6729 
6730 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6731   OptionalImmIndexMap OptionalIdx;
6732 
6733   unsigned I = 1;
6734   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6735   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6736     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6737   }
6738 
6739   int Fi = 0;
6740   for (unsigned E = Operands.size(); I != E; ++I) {
6741     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6742                                             MCOI::TIED_TO);
6743     if (TiedTo != -1) {
6744       assert((unsigned)TiedTo < Inst.getNumOperands());
6745       // handle tied old or src2 for MAC instructions
6746       Inst.addOperand(Inst.getOperand(TiedTo));
6747     }
6748     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6749     // Add the register arguments
6750     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6751       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6752       // Skip it.
6753       continue;
6754     }
6755 
6756     if (IsDPP8) {
6757       if (Op.isDPP8()) {
6758         Op.addImmOperands(Inst, 1);
6759       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6760         Op.addRegWithFPInputModsOperands(Inst, 2);
6761       } else if (Op.isFI()) {
6762         Fi = Op.getImm();
6763       } else if (Op.isReg()) {
6764         Op.addRegOperands(Inst, 1);
6765       } else {
6766         llvm_unreachable("Invalid operand type");
6767       }
6768     } else {
6769       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6770         Op.addRegWithFPInputModsOperands(Inst, 2);
6771       } else if (Op.isDPPCtrl()) {
6772         Op.addImmOperands(Inst, 1);
6773       } else if (Op.isImm()) {
6774         // Handle optional arguments
6775         OptionalIdx[Op.getImmTy()] = I;
6776       } else {
6777         llvm_unreachable("Invalid operand type");
6778       }
6779     }
6780   }
6781 
6782   if (IsDPP8) {
6783     using namespace llvm::AMDGPU::DPP;
6784     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6785   } else {
6786     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6787     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6788     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6789     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6790       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6791     }
6792   }
6793 }
6794 
6795 //===----------------------------------------------------------------------===//
6796 // sdwa
6797 //===----------------------------------------------------------------------===//
6798 
6799 OperandMatchResultTy
6800 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6801                               AMDGPUOperand::ImmTy Type) {
6802   using namespace llvm::AMDGPU::SDWA;
6803 
6804   SMLoc S = Parser.getTok().getLoc();
6805   StringRef Value;
6806   OperandMatchResultTy res;
6807 
6808   res = parseStringWithPrefix(Prefix, Value);
6809   if (res != MatchOperand_Success) {
6810     return res;
6811   }
6812 
6813   int64_t Int;
6814   Int = StringSwitch<int64_t>(Value)
6815         .Case("BYTE_0", SdwaSel::BYTE_0)
6816         .Case("BYTE_1", SdwaSel::BYTE_1)
6817         .Case("BYTE_2", SdwaSel::BYTE_2)
6818         .Case("BYTE_3", SdwaSel::BYTE_3)
6819         .Case("WORD_0", SdwaSel::WORD_0)
6820         .Case("WORD_1", SdwaSel::WORD_1)
6821         .Case("DWORD", SdwaSel::DWORD)
6822         .Default(0xffffffff);
6823   Parser.Lex(); // eat last token
6824 
6825   if (Int == 0xffffffff) {
6826     return MatchOperand_ParseFail;
6827   }
6828 
6829   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6830   return MatchOperand_Success;
6831 }
6832 
6833 OperandMatchResultTy
6834 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6835   using namespace llvm::AMDGPU::SDWA;
6836 
6837   SMLoc S = Parser.getTok().getLoc();
6838   StringRef Value;
6839   OperandMatchResultTy res;
6840 
6841   res = parseStringWithPrefix("dst_unused", Value);
6842   if (res != MatchOperand_Success) {
6843     return res;
6844   }
6845 
6846   int64_t Int;
6847   Int = StringSwitch<int64_t>(Value)
6848         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6849         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6850         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6851         .Default(0xffffffff);
6852   Parser.Lex(); // eat last token
6853 
6854   if (Int == 0xffffffff) {
6855     return MatchOperand_ParseFail;
6856   }
6857 
6858   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6859   return MatchOperand_Success;
6860 }
6861 
6862 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6863   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6864 }
6865 
6866 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6867   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6868 }
6869 
6870 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6871   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
6872 }
6873 
6874 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
6875   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
6876 }
6877 
6878 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6879   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6880 }
6881 
6882 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6883                               uint64_t BasicInstType,
6884                               bool SkipDstVcc,
6885                               bool SkipSrcVcc) {
6886   using namespace llvm::AMDGPU::SDWA;
6887 
6888   OptionalImmIndexMap OptionalIdx;
6889   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
6890   bool SkippedVcc = false;
6891 
6892   unsigned I = 1;
6893   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6894   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6895     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6896   }
6897 
6898   for (unsigned E = Operands.size(); I != E; ++I) {
6899     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6900     if (SkipVcc && !SkippedVcc && Op.isReg() &&
6901         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6902       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6903       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6904       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6905       // Skip VCC only if we didn't skip it on previous iteration.
6906       // Note that src0 and src1 occupy 2 slots each because of modifiers.
6907       if (BasicInstType == SIInstrFlags::VOP2 &&
6908           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
6909            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
6910         SkippedVcc = true;
6911         continue;
6912       } else if (BasicInstType == SIInstrFlags::VOPC &&
6913                  Inst.getNumOperands() == 0) {
6914         SkippedVcc = true;
6915         continue;
6916       }
6917     }
6918     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6919       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6920     } else if (Op.isImm()) {
6921       // Handle optional arguments
6922       OptionalIdx[Op.getImmTy()] = I;
6923     } else {
6924       llvm_unreachable("Invalid operand type");
6925     }
6926     SkippedVcc = false;
6927   }
6928 
6929   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6930       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6931       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6932     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6933     switch (BasicInstType) {
6934     case SIInstrFlags::VOP1:
6935       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6936       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6937         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6938       }
6939       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6940       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6941       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6942       break;
6943 
6944     case SIInstrFlags::VOP2:
6945       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6946       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6947         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6948       }
6949       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6950       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6951       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6952       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6953       break;
6954 
6955     case SIInstrFlags::VOPC:
6956       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6957         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6958       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6959       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6960       break;
6961 
6962     default:
6963       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6964     }
6965   }
6966 
6967   // special case v_mac_{f16, f32}:
6968   // it has src2 register operand that is tied to dst operand
6969   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6970       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6971     auto it = Inst.begin();
6972     std::advance(
6973       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6974     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6975   }
6976 }
6977 
6978 //===----------------------------------------------------------------------===//
6979 // mAI
6980 //===----------------------------------------------------------------------===//
6981 
6982 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
6983   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
6984 }
6985 
6986 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
6987   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
6988 }
6989 
6990 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
6991   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
6992 }
6993 
6994 /// Force static initialization.
6995 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
6996   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6997   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6998 }
6999 
7000 #define GET_REGISTER_MATCHER
7001 #define GET_MATCHER_IMPLEMENTATION
7002 #define GET_MNEMONIC_SPELL_CHECKER
7003 #include "AMDGPUGenAsmMatcher.inc"
7004 
7005 // This fuction should be defined after auto-generated include so that we have
7006 // MatchClassKind enum defined
7007 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7008                                                      unsigned Kind) {
7009   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7010   // But MatchInstructionImpl() expects to meet token and fails to validate
7011   // operand. This method checks if we are given immediate operand but expect to
7012   // get corresponding token.
7013   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7014   switch (Kind) {
7015   case MCK_addr64:
7016     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7017   case MCK_gds:
7018     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7019   case MCK_lds:
7020     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7021   case MCK_glc:
7022     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7023   case MCK_idxen:
7024     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7025   case MCK_offen:
7026     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7027   case MCK_SSrcB32:
7028     // When operands have expression values, they will return true for isToken,
7029     // because it is not possible to distinguish between a token and an
7030     // expression at parse time. MatchInstructionImpl() will always try to
7031     // match an operand as a token, when isToken returns true, and when the
7032     // name of the expression is not a valid token, the match will fail,
7033     // so we need to handle it here.
7034     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7035   case MCK_SSrcF32:
7036     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7037   case MCK_SoppBrTarget:
7038     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7039   case MCK_VReg32OrOff:
7040     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7041   case MCK_InterpSlot:
7042     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7043   case MCK_Attr:
7044     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7045   case MCK_AttrChan:
7046     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7047   case MCK_SReg_64:
7048   case MCK_SReg_64_XEXEC:
7049     // Null is defined as a 32-bit register but
7050     // it should also be enabled with 64-bit operands.
7051     // The following code enables it for SReg_64 operands
7052     // used as source and destination. Remaining source
7053     // operands are handled in isInlinableImm.
7054     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7055   default:
7056     return Match_InvalidOperand;
7057   }
7058 }
7059 
7060 //===----------------------------------------------------------------------===//
7061 // endpgm
7062 //===----------------------------------------------------------------------===//
7063 
7064 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7065   SMLoc S = Parser.getTok().getLoc();
7066   int64_t Imm = 0;
7067 
7068   if (!parseExpr(Imm)) {
7069     // The operand is optional, if not present default to 0
7070     Imm = 0;
7071   }
7072 
7073   if (!isUInt<16>(Imm)) {
7074     Error(S, "expected a 16-bit value");
7075     return MatchOperand_ParseFail;
7076   }
7077 
7078   Operands.push_back(
7079       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7080   return MatchOperand_Success;
7081 }
7082 
7083 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7084