1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyA16,
167     ImmTyLWE,
168     ImmTyExpTgt,
169     ImmTyExpCompr,
170     ImmTyExpVM,
171     ImmTyFORMAT,
172     ImmTyHwreg,
173     ImmTyOff,
174     ImmTySendMsg,
175     ImmTyInterpSlot,
176     ImmTyInterpAttr,
177     ImmTyAttrChan,
178     ImmTyOpSel,
179     ImmTyOpSelHi,
180     ImmTyNegLo,
181     ImmTyNegHi,
182     ImmTySwizzle,
183     ImmTyGprIdxMode,
184     ImmTyHigh,
185     ImmTyBLGP,
186     ImmTyCBSZ,
187     ImmTyABID,
188     ImmTyEndpgm,
189   };
190 
191 private:
192   struct TokOp {
193     const char *Data;
194     unsigned Length;
195   };
196 
197   struct ImmOp {
198     int64_t Val;
199     ImmTy Type;
200     bool IsFPImm;
201     Modifiers Mods;
202   };
203 
204   struct RegOp {
205     unsigned RegNo;
206     Modifiers Mods;
207   };
208 
209   union {
210     TokOp Tok;
211     ImmOp Imm;
212     RegOp Reg;
213     const MCExpr *Expr;
214   };
215 
216 public:
217   bool isToken() const override {
218     if (Kind == Token)
219       return true;
220 
221     // When parsing operands, we can't always tell if something was meant to be
222     // a token, like 'gds', or an expression that references a global variable.
223     // In this case, we assume the string is an expression, and if we need to
224     // interpret is a token, then we treat the symbol name as the token.
225     return isSymbolRefExpr();
226   }
227 
228   bool isSymbolRefExpr() const {
229     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230   }
231 
232   bool isImm() const override {
233     return Kind == Immediate;
234   }
235 
236   bool isInlinableImm(MVT type) const;
237   bool isLiteralImm(MVT type) const;
238 
239   bool isRegKind() const {
240     return Kind == Register;
241   }
242 
243   bool isReg() const override {
244     return isRegKind() && !hasModifiers();
245   }
246 
247   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
248     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
249   }
250 
251   bool isRegOrImmWithInt16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
253   }
254 
255   bool isRegOrImmWithInt32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
257   }
258 
259   bool isRegOrImmWithInt64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
261   }
262 
263   bool isRegOrImmWithFP16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
265   }
266 
267   bool isRegOrImmWithFP32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
269   }
270 
271   bool isRegOrImmWithFP64InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
273   }
274 
275   bool isVReg() const {
276     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
277            isRegClass(AMDGPU::VReg_64RegClassID) ||
278            isRegClass(AMDGPU::VReg_96RegClassID) ||
279            isRegClass(AMDGPU::VReg_128RegClassID) ||
280            isRegClass(AMDGPU::VReg_160RegClassID) ||
281            isRegClass(AMDGPU::VReg_192RegClassID) ||
282            isRegClass(AMDGPU::VReg_256RegClassID) ||
283            isRegClass(AMDGPU::VReg_512RegClassID) ||
284            isRegClass(AMDGPU::VReg_1024RegClassID);
285   }
286 
287   bool isVReg32() const {
288     return isRegClass(AMDGPU::VGPR_32RegClassID);
289   }
290 
291   bool isVReg32OrOff() const {
292     return isOff() || isVReg32();
293   }
294 
295   bool isNull() const {
296     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
297   }
298 
299   bool isSDWAOperand(MVT type) const;
300   bool isSDWAFP16Operand() const;
301   bool isSDWAFP32Operand() const;
302   bool isSDWAInt16Operand() const;
303   bool isSDWAInt32Operand() const;
304 
305   bool isImmTy(ImmTy ImmT) const {
306     return isImm() && Imm.Type == ImmT;
307   }
308 
309   bool isImmModifier() const {
310     return isImm() && Imm.Type != ImmTyNone;
311   }
312 
313   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
314   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
315   bool isDMask() const { return isImmTy(ImmTyDMask); }
316   bool isDim() const { return isImmTy(ImmTyDim); }
317   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
318   bool isDA() const { return isImmTy(ImmTyDA); }
319   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
320   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
321   bool isLWE() const { return isImmTy(ImmTyLWE); }
322   bool isOff() const { return isImmTy(ImmTyOff); }
323   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
324   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
325   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
326   bool isOffen() const { return isImmTy(ImmTyOffen); }
327   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
328   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
329   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
330   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
331   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
332 
333   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
334   bool isGDS() const { return isImmTy(ImmTyGDS); }
335   bool isLDS() const { return isImmTy(ImmTyLDS); }
336   bool isDLC() const { return isImmTy(ImmTyDLC); }
337   bool isGLC() const { return isImmTy(ImmTyGLC); }
338   bool isSLC() const { return isImmTy(ImmTySLC); }
339   bool isSWZ() const { return isImmTy(ImmTySWZ); }
340   bool isTFE() const { return isImmTy(ImmTyTFE); }
341   bool isD16() const { return isImmTy(ImmTyD16); }
342   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
343   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
344   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
345   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
346   bool isFI() const { return isImmTy(ImmTyDppFi); }
347   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
348   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
349   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
350   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
351   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
352   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
353   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
354   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
355   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
356   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
357   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
358   bool isHigh() const { return isImmTy(ImmTyHigh); }
359 
360   bool isMod() const {
361     return isClampSI() || isOModSI();
362   }
363 
364   bool isRegOrImm() const {
365     return isReg() || isImm();
366   }
367 
368   bool isRegClass(unsigned RCID) const;
369 
370   bool isInlineValue() const;
371 
372   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
373     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
374   }
375 
376   bool isSCSrcB16() const {
377     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
378   }
379 
380   bool isSCSrcV2B16() const {
381     return isSCSrcB16();
382   }
383 
384   bool isSCSrcB32() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
386   }
387 
388   bool isSCSrcB64() const {
389     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
390   }
391 
392   bool isBoolReg() const;
393 
394   bool isSCSrcF16() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
396   }
397 
398   bool isSCSrcV2F16() const {
399     return isSCSrcF16();
400   }
401 
402   bool isSCSrcF32() const {
403     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
404   }
405 
406   bool isSCSrcF64() const {
407     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
408   }
409 
410   bool isSSrcB32() const {
411     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
412   }
413 
414   bool isSSrcB16() const {
415     return isSCSrcB16() || isLiteralImm(MVT::i16);
416   }
417 
418   bool isSSrcV2B16() const {
419     llvm_unreachable("cannot happen");
420     return isSSrcB16();
421   }
422 
423   bool isSSrcB64() const {
424     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
425     // See isVSrc64().
426     return isSCSrcB64() || isLiteralImm(MVT::i64);
427   }
428 
429   bool isSSrcF32() const {
430     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
431   }
432 
433   bool isSSrcF64() const {
434     return isSCSrcB64() || isLiteralImm(MVT::f64);
435   }
436 
437   bool isSSrcF16() const {
438     return isSCSrcB16() || isLiteralImm(MVT::f16);
439   }
440 
441   bool isSSrcV2F16() const {
442     llvm_unreachable("cannot happen");
443     return isSSrcF16();
444   }
445 
446   bool isSSrcOrLdsB32() const {
447     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
448            isLiteralImm(MVT::i32) || isExpr();
449   }
450 
451   bool isVCSrcB32() const {
452     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
453   }
454 
455   bool isVCSrcB64() const {
456     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
457   }
458 
459   bool isVCSrcB16() const {
460     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
461   }
462 
463   bool isVCSrcV2B16() const {
464     return isVCSrcB16();
465   }
466 
467   bool isVCSrcF32() const {
468     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
469   }
470 
471   bool isVCSrcF64() const {
472     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
473   }
474 
475   bool isVCSrcF16() const {
476     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
477   }
478 
479   bool isVCSrcV2F16() const {
480     return isVCSrcF16();
481   }
482 
483   bool isVSrcB32() const {
484     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
485   }
486 
487   bool isVSrcB64() const {
488     return isVCSrcF64() || isLiteralImm(MVT::i64);
489   }
490 
491   bool isVSrcB16() const {
492     return isVCSrcF16() || isLiteralImm(MVT::i16);
493   }
494 
495   bool isVSrcV2B16() const {
496     return isVSrcB16() || isLiteralImm(MVT::v2i16);
497   }
498 
499   bool isVSrcF32() const {
500     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
501   }
502 
503   bool isVSrcF64() const {
504     return isVCSrcF64() || isLiteralImm(MVT::f64);
505   }
506 
507   bool isVSrcF16() const {
508     return isVCSrcF16() || isLiteralImm(MVT::f16);
509   }
510 
511   bool isVSrcV2F16() const {
512     return isVSrcF16() || isLiteralImm(MVT::v2f16);
513   }
514 
515   bool isVISrcB32() const {
516     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
517   }
518 
519   bool isVISrcB16() const {
520     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
521   }
522 
523   bool isVISrcV2B16() const {
524     return isVISrcB16();
525   }
526 
527   bool isVISrcF32() const {
528     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
529   }
530 
531   bool isVISrcF16() const {
532     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
533   }
534 
535   bool isVISrcV2F16() const {
536     return isVISrcF16() || isVISrcB32();
537   }
538 
539   bool isAISrcB32() const {
540     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
541   }
542 
543   bool isAISrcB16() const {
544     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
545   }
546 
547   bool isAISrcV2B16() const {
548     return isAISrcB16();
549   }
550 
551   bool isAISrcF32() const {
552     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
553   }
554 
555   bool isAISrcF16() const {
556     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
557   }
558 
559   bool isAISrcV2F16() const {
560     return isAISrcF16() || isAISrcB32();
561   }
562 
563   bool isAISrc_128B32() const {
564     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
565   }
566 
567   bool isAISrc_128B16() const {
568     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
569   }
570 
571   bool isAISrc_128V2B16() const {
572     return isAISrc_128B16();
573   }
574 
575   bool isAISrc_128F32() const {
576     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
577   }
578 
579   bool isAISrc_128F16() const {
580     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
581   }
582 
583   bool isAISrc_128V2F16() const {
584     return isAISrc_128F16() || isAISrc_128B32();
585   }
586 
587   bool isAISrc_512B32() const {
588     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
589   }
590 
591   bool isAISrc_512B16() const {
592     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
593   }
594 
595   bool isAISrc_512V2B16() const {
596     return isAISrc_512B16();
597   }
598 
599   bool isAISrc_512F32() const {
600     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
601   }
602 
603   bool isAISrc_512F16() const {
604     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
605   }
606 
607   bool isAISrc_512V2F16() const {
608     return isAISrc_512F16() || isAISrc_512B32();
609   }
610 
611   bool isAISrc_1024B32() const {
612     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
613   }
614 
615   bool isAISrc_1024B16() const {
616     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
617   }
618 
619   bool isAISrc_1024V2B16() const {
620     return isAISrc_1024B16();
621   }
622 
623   bool isAISrc_1024F32() const {
624     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
625   }
626 
627   bool isAISrc_1024F16() const {
628     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
629   }
630 
631   bool isAISrc_1024V2F16() const {
632     return isAISrc_1024F16() || isAISrc_1024B32();
633   }
634 
635   bool isKImmFP32() const {
636     return isLiteralImm(MVT::f32);
637   }
638 
639   bool isKImmFP16() const {
640     return isLiteralImm(MVT::f16);
641   }
642 
643   bool isMem() const override {
644     return false;
645   }
646 
647   bool isExpr() const {
648     return Kind == Expression;
649   }
650 
651   bool isSoppBrTarget() const {
652     return isExpr() || isImm();
653   }
654 
655   bool isSWaitCnt() const;
656   bool isHwreg() const;
657   bool isSendMsg() const;
658   bool isSwizzle() const;
659   bool isSMRDOffset8() const;
660   bool isSMRDOffset20() const;
661   bool isSMRDLiteralOffset() const;
662   bool isDPP8() const;
663   bool isDPPCtrl() const;
664   bool isBLGP() const;
665   bool isCBSZ() const;
666   bool isABID() const;
667   bool isGPRIdxMode() const;
668   bool isS16Imm() const;
669   bool isU16Imm() const;
670   bool isEndpgm() const;
671 
672   StringRef getExpressionAsToken() const {
673     assert(isExpr());
674     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
675     return S->getSymbol().getName();
676   }
677 
678   StringRef getToken() const {
679     assert(isToken());
680 
681     if (Kind == Expression)
682       return getExpressionAsToken();
683 
684     return StringRef(Tok.Data, Tok.Length);
685   }
686 
687   int64_t getImm() const {
688     assert(isImm());
689     return Imm.Val;
690   }
691 
692   ImmTy getImmTy() const {
693     assert(isImm());
694     return Imm.Type;
695   }
696 
697   unsigned getReg() const override {
698     assert(isRegKind());
699     return Reg.RegNo;
700   }
701 
702   SMLoc getStartLoc() const override {
703     return StartLoc;
704   }
705 
706   SMLoc getEndLoc() const override {
707     return EndLoc;
708   }
709 
710   SMRange getLocRange() const {
711     return SMRange(StartLoc, EndLoc);
712   }
713 
714   Modifiers getModifiers() const {
715     assert(isRegKind() || isImmTy(ImmTyNone));
716     return isRegKind() ? Reg.Mods : Imm.Mods;
717   }
718 
719   void setModifiers(Modifiers Mods) {
720     assert(isRegKind() || isImmTy(ImmTyNone));
721     if (isRegKind())
722       Reg.Mods = Mods;
723     else
724       Imm.Mods = Mods;
725   }
726 
727   bool hasModifiers() const {
728     return getModifiers().hasModifiers();
729   }
730 
731   bool hasFPModifiers() const {
732     return getModifiers().hasFPModifiers();
733   }
734 
735   bool hasIntModifiers() const {
736     return getModifiers().hasIntModifiers();
737   }
738 
739   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
740 
741   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
742 
743   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
744 
745   template <unsigned Bitwidth>
746   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
747 
748   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
749     addKImmFPOperands<16>(Inst, N);
750   }
751 
752   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
753     addKImmFPOperands<32>(Inst, N);
754   }
755 
756   void addRegOperands(MCInst &Inst, unsigned N) const;
757 
758   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
759     addRegOperands(Inst, N);
760   }
761 
762   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
763     if (isRegKind())
764       addRegOperands(Inst, N);
765     else if (isExpr())
766       Inst.addOperand(MCOperand::createExpr(Expr));
767     else
768       addImmOperands(Inst, N);
769   }
770 
771   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
772     Modifiers Mods = getModifiers();
773     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
774     if (isRegKind()) {
775       addRegOperands(Inst, N);
776     } else {
777       addImmOperands(Inst, N, false);
778     }
779   }
780 
781   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
782     assert(!hasIntModifiers());
783     addRegOrImmWithInputModsOperands(Inst, N);
784   }
785 
786   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
787     assert(!hasFPModifiers());
788     addRegOrImmWithInputModsOperands(Inst, N);
789   }
790 
791   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
792     Modifiers Mods = getModifiers();
793     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
794     assert(isRegKind());
795     addRegOperands(Inst, N);
796   }
797 
798   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
799     assert(!hasIntModifiers());
800     addRegWithInputModsOperands(Inst, N);
801   }
802 
803   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
804     assert(!hasFPModifiers());
805     addRegWithInputModsOperands(Inst, N);
806   }
807 
808   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
809     if (isImm())
810       addImmOperands(Inst, N);
811     else {
812       assert(isExpr());
813       Inst.addOperand(MCOperand::createExpr(Expr));
814     }
815   }
816 
817   static void printImmTy(raw_ostream& OS, ImmTy Type) {
818     switch (Type) {
819     case ImmTyNone: OS << "None"; break;
820     case ImmTyGDS: OS << "GDS"; break;
821     case ImmTyLDS: OS << "LDS"; break;
822     case ImmTyOffen: OS << "Offen"; break;
823     case ImmTyIdxen: OS << "Idxen"; break;
824     case ImmTyAddr64: OS << "Addr64"; break;
825     case ImmTyOffset: OS << "Offset"; break;
826     case ImmTyInstOffset: OS << "InstOffset"; break;
827     case ImmTyOffset0: OS << "Offset0"; break;
828     case ImmTyOffset1: OS << "Offset1"; break;
829     case ImmTyDLC: OS << "DLC"; break;
830     case ImmTyGLC: OS << "GLC"; break;
831     case ImmTySLC: OS << "SLC"; break;
832     case ImmTySWZ: OS << "SWZ"; break;
833     case ImmTyTFE: OS << "TFE"; break;
834     case ImmTyD16: OS << "D16"; break;
835     case ImmTyFORMAT: OS << "FORMAT"; break;
836     case ImmTyClampSI: OS << "ClampSI"; break;
837     case ImmTyOModSI: OS << "OModSI"; break;
838     case ImmTyDPP8: OS << "DPP8"; break;
839     case ImmTyDppCtrl: OS << "DppCtrl"; break;
840     case ImmTyDppRowMask: OS << "DppRowMask"; break;
841     case ImmTyDppBankMask: OS << "DppBankMask"; break;
842     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
843     case ImmTyDppFi: OS << "FI"; break;
844     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
845     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
846     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
847     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
848     case ImmTyDMask: OS << "DMask"; break;
849     case ImmTyDim: OS << "Dim"; break;
850     case ImmTyUNorm: OS << "UNorm"; break;
851     case ImmTyDA: OS << "DA"; break;
852     case ImmTyR128A16: OS << "R128A16"; break;
853     case ImmTyA16: OS << "A16"; break;
854     case ImmTyLWE: OS << "LWE"; break;
855     case ImmTyOff: OS << "Off"; break;
856     case ImmTyExpTgt: OS << "ExpTgt"; break;
857     case ImmTyExpCompr: OS << "ExpCompr"; break;
858     case ImmTyExpVM: OS << "ExpVM"; break;
859     case ImmTyHwreg: OS << "Hwreg"; break;
860     case ImmTySendMsg: OS << "SendMsg"; break;
861     case ImmTyInterpSlot: OS << "InterpSlot"; break;
862     case ImmTyInterpAttr: OS << "InterpAttr"; break;
863     case ImmTyAttrChan: OS << "AttrChan"; break;
864     case ImmTyOpSel: OS << "OpSel"; break;
865     case ImmTyOpSelHi: OS << "OpSelHi"; break;
866     case ImmTyNegLo: OS << "NegLo"; break;
867     case ImmTyNegHi: OS << "NegHi"; break;
868     case ImmTySwizzle: OS << "Swizzle"; break;
869     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
870     case ImmTyHigh: OS << "High"; break;
871     case ImmTyBLGP: OS << "BLGP"; break;
872     case ImmTyCBSZ: OS << "CBSZ"; break;
873     case ImmTyABID: OS << "ABID"; break;
874     case ImmTyEndpgm: OS << "Endpgm"; break;
875     }
876   }
877 
878   void print(raw_ostream &OS) const override {
879     switch (Kind) {
880     case Register:
881       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
882       break;
883     case Immediate:
884       OS << '<' << getImm();
885       if (getImmTy() != ImmTyNone) {
886         OS << " type: "; printImmTy(OS, getImmTy());
887       }
888       OS << " mods: " << Imm.Mods << '>';
889       break;
890     case Token:
891       OS << '\'' << getToken() << '\'';
892       break;
893     case Expression:
894       OS << "<expr " << *Expr << '>';
895       break;
896     }
897   }
898 
899   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
900                                       int64_t Val, SMLoc Loc,
901                                       ImmTy Type = ImmTyNone,
902                                       bool IsFPImm = false) {
903     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
904     Op->Imm.Val = Val;
905     Op->Imm.IsFPImm = IsFPImm;
906     Op->Imm.Type = Type;
907     Op->Imm.Mods = Modifiers();
908     Op->StartLoc = Loc;
909     Op->EndLoc = Loc;
910     return Op;
911   }
912 
913   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
914                                         StringRef Str, SMLoc Loc,
915                                         bool HasExplicitEncodingSize = true) {
916     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
917     Res->Tok.Data = Str.data();
918     Res->Tok.Length = Str.size();
919     Res->StartLoc = Loc;
920     Res->EndLoc = Loc;
921     return Res;
922   }
923 
924   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
925                                       unsigned RegNo, SMLoc S,
926                                       SMLoc E) {
927     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
928     Op->Reg.RegNo = RegNo;
929     Op->Reg.Mods = Modifiers();
930     Op->StartLoc = S;
931     Op->EndLoc = E;
932     return Op;
933   }
934 
935   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
936                                        const class MCExpr *Expr, SMLoc S) {
937     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
938     Op->Expr = Expr;
939     Op->StartLoc = S;
940     Op->EndLoc = S;
941     return Op;
942   }
943 };
944 
945 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
946   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
947   return OS;
948 }
949 
950 //===----------------------------------------------------------------------===//
951 // AsmParser
952 //===----------------------------------------------------------------------===//
953 
954 // Holds info related to the current kernel, e.g. count of SGPRs used.
955 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
956 // .amdgpu_hsa_kernel or at EOF.
957 class KernelScopeInfo {
958   int SgprIndexUnusedMin = -1;
959   int VgprIndexUnusedMin = -1;
960   MCContext *Ctx = nullptr;
961 
962   void usesSgprAt(int i) {
963     if (i >= SgprIndexUnusedMin) {
964       SgprIndexUnusedMin = ++i;
965       if (Ctx) {
966         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
967         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
968       }
969     }
970   }
971 
972   void usesVgprAt(int i) {
973     if (i >= VgprIndexUnusedMin) {
974       VgprIndexUnusedMin = ++i;
975       if (Ctx) {
976         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
977         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
978       }
979     }
980   }
981 
982 public:
983   KernelScopeInfo() = default;
984 
985   void initialize(MCContext &Context) {
986     Ctx = &Context;
987     usesSgprAt(SgprIndexUnusedMin = -1);
988     usesVgprAt(VgprIndexUnusedMin = -1);
989   }
990 
991   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
992     switch (RegKind) {
993       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
994       case IS_AGPR: // fall through
995       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
996       default: break;
997     }
998   }
999 };
1000 
1001 class AMDGPUAsmParser : public MCTargetAsmParser {
1002   MCAsmParser &Parser;
1003 
1004   // Number of extra operands parsed after the first optional operand.
1005   // This may be necessary to skip hardcoded mandatory operands.
1006   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1007 
1008   unsigned ForcedEncodingSize = 0;
1009   bool ForcedDPP = false;
1010   bool ForcedSDWA = false;
1011   KernelScopeInfo KernelScope;
1012 
1013   /// @name Auto-generated Match Functions
1014   /// {
1015 
1016 #define GET_ASSEMBLER_HEADER
1017 #include "AMDGPUGenAsmMatcher.inc"
1018 
1019   /// }
1020 
1021 private:
1022   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1023   bool OutOfRangeError(SMRange Range);
1024   /// Calculate VGPR/SGPR blocks required for given target, reserved
1025   /// registers, and user-specified NextFreeXGPR values.
1026   ///
1027   /// \param Features [in] Target features, used for bug corrections.
1028   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1029   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1030   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1031   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1032   /// descriptor field, if valid.
1033   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1034   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1035   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1036   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1037   /// \param VGPRBlocks [out] Result VGPR block count.
1038   /// \param SGPRBlocks [out] Result SGPR block count.
1039   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1040                           bool FlatScrUsed, bool XNACKUsed,
1041                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1042                           SMRange VGPRRange, unsigned NextFreeSGPR,
1043                           SMRange SGPRRange, unsigned &VGPRBlocks,
1044                           unsigned &SGPRBlocks);
1045   bool ParseDirectiveAMDGCNTarget();
1046   bool ParseDirectiveAMDHSAKernel();
1047   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1048   bool ParseDirectiveHSACodeObjectVersion();
1049   bool ParseDirectiveHSACodeObjectISA();
1050   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1051   bool ParseDirectiveAMDKernelCodeT();
1052   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1053   bool ParseDirectiveAMDGPUHsaKernel();
1054 
1055   bool ParseDirectiveISAVersion();
1056   bool ParseDirectiveHSAMetadata();
1057   bool ParseDirectivePALMetadataBegin();
1058   bool ParseDirectivePALMetadata();
1059   bool ParseDirectiveAMDGPULDS();
1060 
1061   /// Common code to parse out a block of text (typically YAML) between start and
1062   /// end directives.
1063   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1064                            const char *AssemblerDirectiveEnd,
1065                            std::string &CollectString);
1066 
1067   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1068                              RegisterKind RegKind, unsigned Reg1);
1069   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1070                            unsigned &RegNum, unsigned &RegWidth,
1071                            bool RestoreOnFailure = false);
1072   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1073                            unsigned &RegNum, unsigned &RegWidth,
1074                            SmallVectorImpl<AsmToken> &Tokens);
1075   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1076                            unsigned &RegWidth,
1077                            SmallVectorImpl<AsmToken> &Tokens);
1078   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1079                            unsigned &RegWidth,
1080                            SmallVectorImpl<AsmToken> &Tokens);
1081   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1082                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1083   bool ParseRegRange(unsigned& Num, unsigned& Width);
1084   unsigned getRegularReg(RegisterKind RegKind,
1085                          unsigned RegNum,
1086                          unsigned RegWidth);
1087 
1088   bool isRegister();
1089   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1090   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1091   void initializeGprCountSymbol(RegisterKind RegKind);
1092   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1093                              unsigned RegWidth);
1094   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1095                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1096   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1097                  bool IsGdsHardcoded);
1098 
1099 public:
1100   enum AMDGPUMatchResultTy {
1101     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1102   };
1103   enum OperandMode {
1104     OperandMode_Default,
1105     OperandMode_NSA,
1106   };
1107 
1108   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1109 
1110   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1111                const MCInstrInfo &MII,
1112                const MCTargetOptions &Options)
1113       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1114     MCAsmParserExtension::Initialize(Parser);
1115 
1116     if (getFeatureBits().none()) {
1117       // Set default features.
1118       copySTI().ToggleFeature("southern-islands");
1119     }
1120 
1121     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1122 
1123     {
1124       // TODO: make those pre-defined variables read-only.
1125       // Currently there is none suitable machinery in the core llvm-mc for this.
1126       // MCSymbol::isRedefinable is intended for another purpose, and
1127       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1128       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1129       MCContext &Ctx = getContext();
1130       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1131         MCSymbol *Sym =
1132             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1133         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1134         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1135         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1136         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1137         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1138       } else {
1139         MCSymbol *Sym =
1140             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1141         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1142         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1143         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1144         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1145         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1146       }
1147       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1148         initializeGprCountSymbol(IS_VGPR);
1149         initializeGprCountSymbol(IS_SGPR);
1150       } else
1151         KernelScope.initialize(getContext());
1152     }
1153   }
1154 
1155   bool hasXNACK() const {
1156     return AMDGPU::hasXNACK(getSTI());
1157   }
1158 
1159   bool hasMIMG_R128() const {
1160     return AMDGPU::hasMIMG_R128(getSTI());
1161   }
1162 
1163   bool hasPackedD16() const {
1164     return AMDGPU::hasPackedD16(getSTI());
1165   }
1166 
1167   bool hasGFX10A16() const {
1168     return AMDGPU::hasGFX10A16(getSTI());
1169   }
1170 
1171   bool isSI() const {
1172     return AMDGPU::isSI(getSTI());
1173   }
1174 
1175   bool isCI() const {
1176     return AMDGPU::isCI(getSTI());
1177   }
1178 
1179   bool isVI() const {
1180     return AMDGPU::isVI(getSTI());
1181   }
1182 
1183   bool isGFX9() const {
1184     return AMDGPU::isGFX9(getSTI());
1185   }
1186 
1187   bool isGFX10() const {
1188     return AMDGPU::isGFX10(getSTI());
1189   }
1190 
1191   bool hasInv2PiInlineImm() const {
1192     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1193   }
1194 
1195   bool hasFlatOffsets() const {
1196     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1197   }
1198 
1199   bool hasSGPR102_SGPR103() const {
1200     return !isVI() && !isGFX9();
1201   }
1202 
1203   bool hasSGPR104_SGPR105() const {
1204     return isGFX10();
1205   }
1206 
1207   bool hasIntClamp() const {
1208     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1209   }
1210 
1211   AMDGPUTargetStreamer &getTargetStreamer() {
1212     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1213     return static_cast<AMDGPUTargetStreamer &>(TS);
1214   }
1215 
1216   const MCRegisterInfo *getMRI() const {
1217     // We need this const_cast because for some reason getContext() is not const
1218     // in MCAsmParser.
1219     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1220   }
1221 
1222   const MCInstrInfo *getMII() const {
1223     return &MII;
1224   }
1225 
1226   const FeatureBitset &getFeatureBits() const {
1227     return getSTI().getFeatureBits();
1228   }
1229 
1230   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1231   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1232   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1233 
1234   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1235   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1236   bool isForcedDPP() const { return ForcedDPP; }
1237   bool isForcedSDWA() const { return ForcedSDWA; }
1238   ArrayRef<unsigned> getMatchedVariants() const;
1239 
1240   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1241   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1242                      bool RestoreOnFailure);
1243   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1244   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1245                                         SMLoc &EndLoc) override;
1246   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1247   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1248                                       unsigned Kind) override;
1249   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1250                                OperandVector &Operands, MCStreamer &Out,
1251                                uint64_t &ErrorInfo,
1252                                bool MatchingInlineAsm) override;
1253   bool ParseDirective(AsmToken DirectiveID) override;
1254   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1255                                     OperandMode Mode = OperandMode_Default);
1256   StringRef parseMnemonicSuffix(StringRef Name);
1257   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1258                         SMLoc NameLoc, OperandVector &Operands) override;
1259   //bool ProcessInstruction(MCInst &Inst);
1260 
1261   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1262 
1263   OperandMatchResultTy
1264   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1265                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1266                      bool (*ConvertResult)(int64_t &) = nullptr);
1267 
1268   OperandMatchResultTy
1269   parseOperandArrayWithPrefix(const char *Prefix,
1270                               OperandVector &Operands,
1271                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1272                               bool (*ConvertResult)(int64_t&) = nullptr);
1273 
1274   OperandMatchResultTy
1275   parseNamedBit(const char *Name, OperandVector &Operands,
1276                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1277   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1278                                              StringRef &Value);
1279 
1280   bool isModifier();
1281   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1282   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1283   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1284   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1285   bool parseSP3NegModifier();
1286   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1287   OperandMatchResultTy parseReg(OperandVector &Operands);
1288   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1289   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1290   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1291   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1292   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1293   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1294   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1295 
1296   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1297   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1298   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1299   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1300 
1301   bool parseCnt(int64_t &IntVal);
1302   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1303   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1304 
1305 private:
1306   struct OperandInfoTy {
1307     int64_t Id;
1308     bool IsSymbolic = false;
1309     bool IsDefined = false;
1310 
1311     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1312   };
1313 
1314   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1315   bool validateSendMsg(const OperandInfoTy &Msg,
1316                        const OperandInfoTy &Op,
1317                        const OperandInfoTy &Stream,
1318                        const SMLoc Loc);
1319 
1320   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1321   bool validateHwreg(const OperandInfoTy &HwReg,
1322                      const int64_t Offset,
1323                      const int64_t Width,
1324                      const SMLoc Loc);
1325 
1326   void errorExpTgt();
1327   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1328   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1329 
1330   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1331   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1332   bool validateSOPLiteral(const MCInst &Inst) const;
1333   bool validateConstantBusLimitations(const MCInst &Inst);
1334   bool validateEarlyClobberLimitations(const MCInst &Inst);
1335   bool validateIntClampSupported(const MCInst &Inst);
1336   bool validateMIMGAtomicDMask(const MCInst &Inst);
1337   bool validateMIMGGatherDMask(const MCInst &Inst);
1338   bool validateMovrels(const MCInst &Inst);
1339   bool validateMIMGDataSize(const MCInst &Inst);
1340   bool validateMIMGAddrSize(const MCInst &Inst);
1341   bool validateMIMGD16(const MCInst &Inst);
1342   bool validateMIMGDim(const MCInst &Inst);
1343   bool validateLdsDirect(const MCInst &Inst);
1344   bool validateOpSel(const MCInst &Inst);
1345   bool validateVccOperand(unsigned Reg) const;
1346   bool validateVOP3Literal(const MCInst &Inst) const;
1347   unsigned getConstantBusLimit(unsigned Opcode) const;
1348   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1349   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1350   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1351 
1352   bool isId(const StringRef Id) const;
1353   bool isId(const AsmToken &Token, const StringRef Id) const;
1354   bool isToken(const AsmToken::TokenKind Kind) const;
1355   bool trySkipId(const StringRef Id);
1356   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1357   bool trySkipToken(const AsmToken::TokenKind Kind);
1358   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1359   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1360   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1361   AsmToken::TokenKind getTokenKind() const;
1362   bool parseExpr(int64_t &Imm);
1363   bool parseExpr(OperandVector &Operands);
1364   StringRef getTokenStr() const;
1365   AsmToken peekToken();
1366   AsmToken getToken() const;
1367   SMLoc getLoc() const;
1368   void lex();
1369 
1370 public:
1371   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1372   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1373 
1374   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1375   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1376   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1377   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1378   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1379   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1380 
1381   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1382                             const unsigned MinVal,
1383                             const unsigned MaxVal,
1384                             const StringRef ErrMsg);
1385   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1386   bool parseSwizzleOffset(int64_t &Imm);
1387   bool parseSwizzleMacro(int64_t &Imm);
1388   bool parseSwizzleQuadPerm(int64_t &Imm);
1389   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1390   bool parseSwizzleBroadcast(int64_t &Imm);
1391   bool parseSwizzleSwap(int64_t &Imm);
1392   bool parseSwizzleReverse(int64_t &Imm);
1393 
1394   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1395   int64_t parseGPRIdxMacro();
1396 
1397   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1398   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1399   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1400   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1401   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1402 
1403   AMDGPUOperand::Ptr defaultDLC() const;
1404   AMDGPUOperand::Ptr defaultGLC() const;
1405   AMDGPUOperand::Ptr defaultSLC() const;
1406 
1407   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1408   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1409   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1410   AMDGPUOperand::Ptr defaultFlatOffset() const;
1411 
1412   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1413 
1414   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1415                OptionalImmIndexMap &OptionalIdx);
1416   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1417   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1418   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1419 
1420   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1421 
1422   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1423                bool IsAtomic = false);
1424   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1425 
1426   OperandMatchResultTy parseDim(OperandVector &Operands);
1427   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1428   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1429   AMDGPUOperand::Ptr defaultRowMask() const;
1430   AMDGPUOperand::Ptr defaultBankMask() const;
1431   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1432   AMDGPUOperand::Ptr defaultFI() const;
1433   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1434   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1435 
1436   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1437                                     AMDGPUOperand::ImmTy Type);
1438   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1439   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1440   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1441   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1442   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1443   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1444   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1445                uint64_t BasicInstType,
1446                bool SkipDstVcc = false,
1447                bool SkipSrcVcc = false);
1448 
1449   AMDGPUOperand::Ptr defaultBLGP() const;
1450   AMDGPUOperand::Ptr defaultCBSZ() const;
1451   AMDGPUOperand::Ptr defaultABID() const;
1452 
1453   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1454   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1455 };
1456 
1457 struct OptionalOperand {
1458   const char *Name;
1459   AMDGPUOperand::ImmTy Type;
1460   bool IsBit;
1461   bool (*ConvertResult)(int64_t&);
1462 };
1463 
1464 } // end anonymous namespace
1465 
1466 // May be called with integer type with equivalent bitwidth.
1467 static const fltSemantics *getFltSemantics(unsigned Size) {
1468   switch (Size) {
1469   case 4:
1470     return &APFloat::IEEEsingle();
1471   case 8:
1472     return &APFloat::IEEEdouble();
1473   case 2:
1474     return &APFloat::IEEEhalf();
1475   default:
1476     llvm_unreachable("unsupported fp type");
1477   }
1478 }
1479 
1480 static const fltSemantics *getFltSemantics(MVT VT) {
1481   return getFltSemantics(VT.getSizeInBits() / 8);
1482 }
1483 
1484 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1485   switch (OperandType) {
1486   case AMDGPU::OPERAND_REG_IMM_INT32:
1487   case AMDGPU::OPERAND_REG_IMM_FP32:
1488   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1489   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1490   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1491   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1492     return &APFloat::IEEEsingle();
1493   case AMDGPU::OPERAND_REG_IMM_INT64:
1494   case AMDGPU::OPERAND_REG_IMM_FP64:
1495   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1496   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1497     return &APFloat::IEEEdouble();
1498   case AMDGPU::OPERAND_REG_IMM_INT16:
1499   case AMDGPU::OPERAND_REG_IMM_FP16:
1500   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1501   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1502   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1503   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1504   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1505   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1506   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1507   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1508   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1509   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1510     return &APFloat::IEEEhalf();
1511   default:
1512     llvm_unreachable("unsupported fp type");
1513   }
1514 }
1515 
1516 //===----------------------------------------------------------------------===//
1517 // Operand
1518 //===----------------------------------------------------------------------===//
1519 
1520 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1521   bool Lost;
1522 
1523   // Convert literal to single precision
1524   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1525                                                APFloat::rmNearestTiesToEven,
1526                                                &Lost);
1527   // We allow precision lost but not overflow or underflow
1528   if (Status != APFloat::opOK &&
1529       Lost &&
1530       ((Status & APFloat::opOverflow)  != 0 ||
1531        (Status & APFloat::opUnderflow) != 0)) {
1532     return false;
1533   }
1534 
1535   return true;
1536 }
1537 
1538 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1539   return isUIntN(Size, Val) || isIntN(Size, Val);
1540 }
1541 
1542 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1543 
1544   // This is a hack to enable named inline values like
1545   // shared_base with both 32-bit and 64-bit operands.
1546   // Note that these values are defined as
1547   // 32-bit operands only.
1548   if (isInlineValue()) {
1549     return true;
1550   }
1551 
1552   if (!isImmTy(ImmTyNone)) {
1553     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1554     return false;
1555   }
1556   // TODO: We should avoid using host float here. It would be better to
1557   // check the float bit values which is what a few other places do.
1558   // We've had bot failures before due to weird NaN support on mips hosts.
1559 
1560   APInt Literal(64, Imm.Val);
1561 
1562   if (Imm.IsFPImm) { // We got fp literal token
1563     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1564       return AMDGPU::isInlinableLiteral64(Imm.Val,
1565                                           AsmParser->hasInv2PiInlineImm());
1566     }
1567 
1568     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1569     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1570       return false;
1571 
1572     if (type.getScalarSizeInBits() == 16) {
1573       return AMDGPU::isInlinableLiteral16(
1574         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1575         AsmParser->hasInv2PiInlineImm());
1576     }
1577 
1578     // Check if single precision literal is inlinable
1579     return AMDGPU::isInlinableLiteral32(
1580       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1581       AsmParser->hasInv2PiInlineImm());
1582   }
1583 
1584   // We got int literal token.
1585   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1586     return AMDGPU::isInlinableLiteral64(Imm.Val,
1587                                         AsmParser->hasInv2PiInlineImm());
1588   }
1589 
1590   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1591     return false;
1592   }
1593 
1594   if (type.getScalarSizeInBits() == 16) {
1595     return AMDGPU::isInlinableLiteral16(
1596       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1597       AsmParser->hasInv2PiInlineImm());
1598   }
1599 
1600   return AMDGPU::isInlinableLiteral32(
1601     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1602     AsmParser->hasInv2PiInlineImm());
1603 }
1604 
1605 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1606   // Check that this immediate can be added as literal
1607   if (!isImmTy(ImmTyNone)) {
1608     return false;
1609   }
1610 
1611   if (!Imm.IsFPImm) {
1612     // We got int literal token.
1613 
1614     if (type == MVT::f64 && hasFPModifiers()) {
1615       // Cannot apply fp modifiers to int literals preserving the same semantics
1616       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1617       // disable these cases.
1618       return false;
1619     }
1620 
1621     unsigned Size = type.getSizeInBits();
1622     if (Size == 64)
1623       Size = 32;
1624 
1625     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1626     // types.
1627     return isSafeTruncation(Imm.Val, Size);
1628   }
1629 
1630   // We got fp literal token
1631   if (type == MVT::f64) { // Expected 64-bit fp operand
1632     // We would set low 64-bits of literal to zeroes but we accept this literals
1633     return true;
1634   }
1635 
1636   if (type == MVT::i64) { // Expected 64-bit int operand
1637     // We don't allow fp literals in 64-bit integer instructions. It is
1638     // unclear how we should encode them.
1639     return false;
1640   }
1641 
1642   // We allow fp literals with f16x2 operands assuming that the specified
1643   // literal goes into the lower half and the upper half is zero. We also
1644   // require that the literal may be losslesly converted to f16.
1645   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1646                      (type == MVT::v2i16)? MVT::i16 : type;
1647 
1648   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1649   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1650 }
1651 
1652 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1653   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1654 }
1655 
1656 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1657   if (AsmParser->isVI())
1658     return isVReg32();
1659   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1660     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1661   else
1662     return false;
1663 }
1664 
1665 bool AMDGPUOperand::isSDWAFP16Operand() const {
1666   return isSDWAOperand(MVT::f16);
1667 }
1668 
1669 bool AMDGPUOperand::isSDWAFP32Operand() const {
1670   return isSDWAOperand(MVT::f32);
1671 }
1672 
1673 bool AMDGPUOperand::isSDWAInt16Operand() const {
1674   return isSDWAOperand(MVT::i16);
1675 }
1676 
1677 bool AMDGPUOperand::isSDWAInt32Operand() const {
1678   return isSDWAOperand(MVT::i32);
1679 }
1680 
1681 bool AMDGPUOperand::isBoolReg() const {
1682   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1683          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1684 }
1685 
1686 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1687 {
1688   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1689   assert(Size == 2 || Size == 4 || Size == 8);
1690 
1691   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1692 
1693   if (Imm.Mods.Abs) {
1694     Val &= ~FpSignMask;
1695   }
1696   if (Imm.Mods.Neg) {
1697     Val ^= FpSignMask;
1698   }
1699 
1700   return Val;
1701 }
1702 
1703 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1704   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1705                              Inst.getNumOperands())) {
1706     addLiteralImmOperand(Inst, Imm.Val,
1707                          ApplyModifiers &
1708                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1709   } else {
1710     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1711     Inst.addOperand(MCOperand::createImm(Imm.Val));
1712   }
1713 }
1714 
1715 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1716   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1717   auto OpNum = Inst.getNumOperands();
1718   // Check that this operand accepts literals
1719   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1720 
1721   if (ApplyModifiers) {
1722     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1723     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1724     Val = applyInputFPModifiers(Val, Size);
1725   }
1726 
1727   APInt Literal(64, Val);
1728   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1729 
1730   if (Imm.IsFPImm) { // We got fp literal token
1731     switch (OpTy) {
1732     case AMDGPU::OPERAND_REG_IMM_INT64:
1733     case AMDGPU::OPERAND_REG_IMM_FP64:
1734     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1735     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1736       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1737                                        AsmParser->hasInv2PiInlineImm())) {
1738         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1739         return;
1740       }
1741 
1742       // Non-inlineable
1743       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1744         // For fp operands we check if low 32 bits are zeros
1745         if (Literal.getLoBits(32) != 0) {
1746           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1747           "Can't encode literal as exact 64-bit floating-point operand. "
1748           "Low 32-bits will be set to zero");
1749         }
1750 
1751         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1752         return;
1753       }
1754 
1755       // We don't allow fp literals in 64-bit integer instructions. It is
1756       // unclear how we should encode them. This case should be checked earlier
1757       // in predicate methods (isLiteralImm())
1758       llvm_unreachable("fp literal in 64-bit integer instruction.");
1759 
1760     case AMDGPU::OPERAND_REG_IMM_INT32:
1761     case AMDGPU::OPERAND_REG_IMM_FP32:
1762     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1763     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1764     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1765     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1766     case AMDGPU::OPERAND_REG_IMM_INT16:
1767     case AMDGPU::OPERAND_REG_IMM_FP16:
1768     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1769     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1770     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1771     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1772     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1773     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1774     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1775     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1776     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1777     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1778       bool lost;
1779       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1780       // Convert literal to single precision
1781       FPLiteral.convert(*getOpFltSemantics(OpTy),
1782                         APFloat::rmNearestTiesToEven, &lost);
1783       // We allow precision lost but not overflow or underflow. This should be
1784       // checked earlier in isLiteralImm()
1785 
1786       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1787       Inst.addOperand(MCOperand::createImm(ImmVal));
1788       return;
1789     }
1790     default:
1791       llvm_unreachable("invalid operand size");
1792     }
1793 
1794     return;
1795   }
1796 
1797   // We got int literal token.
1798   // Only sign extend inline immediates.
1799   switch (OpTy) {
1800   case AMDGPU::OPERAND_REG_IMM_INT32:
1801   case AMDGPU::OPERAND_REG_IMM_FP32:
1802   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1803   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1804   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1805   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1806   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1807   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1808     if (isSafeTruncation(Val, 32) &&
1809         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1810                                      AsmParser->hasInv2PiInlineImm())) {
1811       Inst.addOperand(MCOperand::createImm(Val));
1812       return;
1813     }
1814 
1815     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1816     return;
1817 
1818   case AMDGPU::OPERAND_REG_IMM_INT64:
1819   case AMDGPU::OPERAND_REG_IMM_FP64:
1820   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1821   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1822     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1823       Inst.addOperand(MCOperand::createImm(Val));
1824       return;
1825     }
1826 
1827     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1828     return;
1829 
1830   case AMDGPU::OPERAND_REG_IMM_INT16:
1831   case AMDGPU::OPERAND_REG_IMM_FP16:
1832   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1833   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1834   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1835   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1836     if (isSafeTruncation(Val, 16) &&
1837         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1838                                      AsmParser->hasInv2PiInlineImm())) {
1839       Inst.addOperand(MCOperand::createImm(Val));
1840       return;
1841     }
1842 
1843     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1844     return;
1845 
1846   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1847   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1848   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1849   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1850     assert(isSafeTruncation(Val, 16));
1851     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1852                                         AsmParser->hasInv2PiInlineImm()));
1853 
1854     Inst.addOperand(MCOperand::createImm(Val));
1855     return;
1856   }
1857   default:
1858     llvm_unreachable("invalid operand size");
1859   }
1860 }
1861 
1862 template <unsigned Bitwidth>
1863 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1864   APInt Literal(64, Imm.Val);
1865 
1866   if (!Imm.IsFPImm) {
1867     // We got int literal token.
1868     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1869     return;
1870   }
1871 
1872   bool Lost;
1873   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1874   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1875                     APFloat::rmNearestTiesToEven, &Lost);
1876   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1877 }
1878 
1879 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1880   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1881 }
1882 
1883 static bool isInlineValue(unsigned Reg) {
1884   switch (Reg) {
1885   case AMDGPU::SRC_SHARED_BASE:
1886   case AMDGPU::SRC_SHARED_LIMIT:
1887   case AMDGPU::SRC_PRIVATE_BASE:
1888   case AMDGPU::SRC_PRIVATE_LIMIT:
1889   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1890     return true;
1891   case AMDGPU::SRC_VCCZ:
1892   case AMDGPU::SRC_EXECZ:
1893   case AMDGPU::SRC_SCC:
1894     return true;
1895   case AMDGPU::SGPR_NULL:
1896     return true;
1897   default:
1898     return false;
1899   }
1900 }
1901 
1902 bool AMDGPUOperand::isInlineValue() const {
1903   return isRegKind() && ::isInlineValue(getReg());
1904 }
1905 
1906 //===----------------------------------------------------------------------===//
1907 // AsmParser
1908 //===----------------------------------------------------------------------===//
1909 
1910 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1911   if (Is == IS_VGPR) {
1912     switch (RegWidth) {
1913       default: return -1;
1914       case 1: return AMDGPU::VGPR_32RegClassID;
1915       case 2: return AMDGPU::VReg_64RegClassID;
1916       case 3: return AMDGPU::VReg_96RegClassID;
1917       case 4: return AMDGPU::VReg_128RegClassID;
1918       case 5: return AMDGPU::VReg_160RegClassID;
1919       case 6: return AMDGPU::VReg_192RegClassID;
1920       case 8: return AMDGPU::VReg_256RegClassID;
1921       case 16: return AMDGPU::VReg_512RegClassID;
1922       case 32: return AMDGPU::VReg_1024RegClassID;
1923     }
1924   } else if (Is == IS_TTMP) {
1925     switch (RegWidth) {
1926       default: return -1;
1927       case 1: return AMDGPU::TTMP_32RegClassID;
1928       case 2: return AMDGPU::TTMP_64RegClassID;
1929       case 4: return AMDGPU::TTMP_128RegClassID;
1930       case 8: return AMDGPU::TTMP_256RegClassID;
1931       case 16: return AMDGPU::TTMP_512RegClassID;
1932     }
1933   } else if (Is == IS_SGPR) {
1934     switch (RegWidth) {
1935       default: return -1;
1936       case 1: return AMDGPU::SGPR_32RegClassID;
1937       case 2: return AMDGPU::SGPR_64RegClassID;
1938       case 3: return AMDGPU::SGPR_96RegClassID;
1939       case 4: return AMDGPU::SGPR_128RegClassID;
1940       case 5: return AMDGPU::SGPR_160RegClassID;
1941       case 6: return AMDGPU::SGPR_192RegClassID;
1942       case 8: return AMDGPU::SGPR_256RegClassID;
1943       case 16: return AMDGPU::SGPR_512RegClassID;
1944     }
1945   } else if (Is == IS_AGPR) {
1946     switch (RegWidth) {
1947       default: return -1;
1948       case 1: return AMDGPU::AGPR_32RegClassID;
1949       case 2: return AMDGPU::AReg_64RegClassID;
1950       case 3: return AMDGPU::AReg_96RegClassID;
1951       case 4: return AMDGPU::AReg_128RegClassID;
1952       case 5: return AMDGPU::AReg_160RegClassID;
1953       case 6: return AMDGPU::AReg_192RegClassID;
1954       case 8: return AMDGPU::AReg_256RegClassID;
1955       case 16: return AMDGPU::AReg_512RegClassID;
1956       case 32: return AMDGPU::AReg_1024RegClassID;
1957     }
1958   }
1959   return -1;
1960 }
1961 
1962 static unsigned getSpecialRegForName(StringRef RegName) {
1963   return StringSwitch<unsigned>(RegName)
1964     .Case("exec", AMDGPU::EXEC)
1965     .Case("vcc", AMDGPU::VCC)
1966     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1967     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1968     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1969     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1970     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1971     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1972     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1973     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1974     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1975     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1976     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1977     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1978     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1979     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1980     .Case("m0", AMDGPU::M0)
1981     .Case("vccz", AMDGPU::SRC_VCCZ)
1982     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1983     .Case("execz", AMDGPU::SRC_EXECZ)
1984     .Case("src_execz", AMDGPU::SRC_EXECZ)
1985     .Case("scc", AMDGPU::SRC_SCC)
1986     .Case("src_scc", AMDGPU::SRC_SCC)
1987     .Case("tba", AMDGPU::TBA)
1988     .Case("tma", AMDGPU::TMA)
1989     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1990     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1991     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1992     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1993     .Case("vcc_lo", AMDGPU::VCC_LO)
1994     .Case("vcc_hi", AMDGPU::VCC_HI)
1995     .Case("exec_lo", AMDGPU::EXEC_LO)
1996     .Case("exec_hi", AMDGPU::EXEC_HI)
1997     .Case("tma_lo", AMDGPU::TMA_LO)
1998     .Case("tma_hi", AMDGPU::TMA_HI)
1999     .Case("tba_lo", AMDGPU::TBA_LO)
2000     .Case("tba_hi", AMDGPU::TBA_HI)
2001     .Case("pc", AMDGPU::PC_REG)
2002     .Case("null", AMDGPU::SGPR_NULL)
2003     .Default(AMDGPU::NoRegister);
2004 }
2005 
2006 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2007                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2008   auto R = parseRegister();
2009   if (!R) return true;
2010   assert(R->isReg());
2011   RegNo = R->getReg();
2012   StartLoc = R->getStartLoc();
2013   EndLoc = R->getEndLoc();
2014   return false;
2015 }
2016 
2017 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2018                                     SMLoc &EndLoc) {
2019   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2020 }
2021 
2022 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2023                                                        SMLoc &StartLoc,
2024                                                        SMLoc &EndLoc) {
2025   bool Result =
2026       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2027   bool PendingErrors = getParser().hasPendingError();
2028   getParser().clearPendingErrors();
2029   if (PendingErrors)
2030     return MatchOperand_ParseFail;
2031   if (Result)
2032     return MatchOperand_NoMatch;
2033   return MatchOperand_Success;
2034 }
2035 
2036 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2037                                             RegisterKind RegKind, unsigned Reg1) {
2038   switch (RegKind) {
2039   case IS_SPECIAL:
2040     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2041       Reg = AMDGPU::EXEC;
2042       RegWidth = 2;
2043       return true;
2044     }
2045     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2046       Reg = AMDGPU::FLAT_SCR;
2047       RegWidth = 2;
2048       return true;
2049     }
2050     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2051       Reg = AMDGPU::XNACK_MASK;
2052       RegWidth = 2;
2053       return true;
2054     }
2055     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2056       Reg = AMDGPU::VCC;
2057       RegWidth = 2;
2058       return true;
2059     }
2060     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2061       Reg = AMDGPU::TBA;
2062       RegWidth = 2;
2063       return true;
2064     }
2065     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2066       Reg = AMDGPU::TMA;
2067       RegWidth = 2;
2068       return true;
2069     }
2070     return false;
2071   case IS_VGPR:
2072   case IS_SGPR:
2073   case IS_AGPR:
2074   case IS_TTMP:
2075     if (Reg1 != Reg + RegWidth) {
2076       return false;
2077     }
2078     RegWidth++;
2079     return true;
2080   default:
2081     llvm_unreachable("unexpected register kind");
2082   }
2083 }
2084 
2085 struct RegInfo {
2086   StringLiteral Name;
2087   RegisterKind Kind;
2088 };
2089 
2090 static constexpr RegInfo RegularRegisters[] = {
2091   {{"v"},    IS_VGPR},
2092   {{"s"},    IS_SGPR},
2093   {{"ttmp"}, IS_TTMP},
2094   {{"acc"},  IS_AGPR},
2095   {{"a"},    IS_AGPR},
2096 };
2097 
2098 static bool isRegularReg(RegisterKind Kind) {
2099   return Kind == IS_VGPR ||
2100          Kind == IS_SGPR ||
2101          Kind == IS_TTMP ||
2102          Kind == IS_AGPR;
2103 }
2104 
2105 static const RegInfo* getRegularRegInfo(StringRef Str) {
2106   for (const RegInfo &Reg : RegularRegisters)
2107     if (Str.startswith(Reg.Name))
2108       return &Reg;
2109   return nullptr;
2110 }
2111 
2112 static bool getRegNum(StringRef Str, unsigned& Num) {
2113   return !Str.getAsInteger(10, Num);
2114 }
2115 
2116 bool
2117 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2118                             const AsmToken &NextToken) const {
2119 
2120   // A list of consecutive registers: [s0,s1,s2,s3]
2121   if (Token.is(AsmToken::LBrac))
2122     return true;
2123 
2124   if (!Token.is(AsmToken::Identifier))
2125     return false;
2126 
2127   // A single register like s0 or a range of registers like s[0:1]
2128 
2129   StringRef Str = Token.getString();
2130   const RegInfo *Reg = getRegularRegInfo(Str);
2131   if (Reg) {
2132     StringRef RegName = Reg->Name;
2133     StringRef RegSuffix = Str.substr(RegName.size());
2134     if (!RegSuffix.empty()) {
2135       unsigned Num;
2136       // A single register with an index: rXX
2137       if (getRegNum(RegSuffix, Num))
2138         return true;
2139     } else {
2140       // A range of registers: r[XX:YY].
2141       if (NextToken.is(AsmToken::LBrac))
2142         return true;
2143     }
2144   }
2145 
2146   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2147 }
2148 
2149 bool
2150 AMDGPUAsmParser::isRegister()
2151 {
2152   return isRegister(getToken(), peekToken());
2153 }
2154 
2155 unsigned
2156 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2157                                unsigned RegNum,
2158                                unsigned RegWidth) {
2159 
2160   assert(isRegularReg(RegKind));
2161 
2162   unsigned AlignSize = 1;
2163   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2164     // SGPR and TTMP registers must be aligned.
2165     // Max required alignment is 4 dwords.
2166     AlignSize = std::min(RegWidth, 4u);
2167   }
2168 
2169   if (RegNum % AlignSize != 0)
2170     return AMDGPU::NoRegister;
2171 
2172   unsigned RegIdx = RegNum / AlignSize;
2173   int RCID = getRegClass(RegKind, RegWidth);
2174   if (RCID == -1)
2175     return AMDGPU::NoRegister;
2176 
2177   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2178   const MCRegisterClass RC = TRI->getRegClass(RCID);
2179   if (RegIdx >= RC.getNumRegs())
2180     return AMDGPU::NoRegister;
2181 
2182   return RC.getRegister(RegIdx);
2183 }
2184 
2185 bool
2186 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2187   int64_t RegLo, RegHi;
2188   if (!trySkipToken(AsmToken::LBrac))
2189     return false;
2190 
2191   if (!parseExpr(RegLo))
2192     return false;
2193 
2194   if (trySkipToken(AsmToken::Colon)) {
2195     if (!parseExpr(RegHi))
2196       return false;
2197   } else {
2198     RegHi = RegLo;
2199   }
2200 
2201   if (!trySkipToken(AsmToken::RBrac))
2202     return false;
2203 
2204   if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi)
2205     return false;
2206 
2207   Num = static_cast<unsigned>(RegLo);
2208   Width = (RegHi - RegLo) + 1;
2209   return true;
2210 }
2211 
2212 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2213                                           unsigned &RegNum, unsigned &RegWidth,
2214                                           SmallVectorImpl<AsmToken> &Tokens) {
2215   assert(isToken(AsmToken::Identifier));
2216   unsigned Reg = getSpecialRegForName(getTokenStr());
2217   if (Reg) {
2218     RegNum = 0;
2219     RegWidth = 1;
2220     RegKind = IS_SPECIAL;
2221     Tokens.push_back(getToken());
2222     lex(); // skip register name
2223   }
2224   return Reg;
2225 }
2226 
2227 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2228                                           unsigned &RegNum, unsigned &RegWidth,
2229                                           SmallVectorImpl<AsmToken> &Tokens) {
2230   assert(isToken(AsmToken::Identifier));
2231   StringRef RegName = getTokenStr();
2232 
2233   const RegInfo *RI = getRegularRegInfo(RegName);
2234   if (!RI)
2235     return AMDGPU::NoRegister;
2236   Tokens.push_back(getToken());
2237   lex(); // skip register name
2238 
2239   RegKind = RI->Kind;
2240   StringRef RegSuffix = RegName.substr(RI->Name.size());
2241   if (!RegSuffix.empty()) {
2242     // Single 32-bit register: vXX.
2243     if (!getRegNum(RegSuffix, RegNum))
2244       return AMDGPU::NoRegister;
2245     RegWidth = 1;
2246   } else {
2247     // Range of registers: v[XX:YY]. ":YY" is optional.
2248     if (!ParseRegRange(RegNum, RegWidth))
2249       return AMDGPU::NoRegister;
2250   }
2251 
2252   return getRegularReg(RegKind, RegNum, RegWidth);
2253 }
2254 
2255 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2256                                        unsigned &RegWidth,
2257                                        SmallVectorImpl<AsmToken> &Tokens) {
2258   unsigned Reg = AMDGPU::NoRegister;
2259 
2260   if (!trySkipToken(AsmToken::LBrac))
2261     return AMDGPU::NoRegister;
2262 
2263   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2264 
2265   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2266     return AMDGPU::NoRegister;
2267   if (RegWidth != 1)
2268     return AMDGPU::NoRegister;
2269 
2270   for (; trySkipToken(AsmToken::Comma); ) {
2271     RegisterKind NextRegKind;
2272     unsigned NextReg, NextRegNum, NextRegWidth;
2273 
2274     if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth,
2275                              Tokens))
2276       return AMDGPU::NoRegister;
2277     if (NextRegWidth != 1)
2278       return AMDGPU::NoRegister;
2279     if (NextRegKind != RegKind)
2280       return AMDGPU::NoRegister;
2281     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg))
2282       return AMDGPU::NoRegister;
2283   }
2284 
2285   if (!trySkipToken(AsmToken::RBrac))
2286     return AMDGPU::NoRegister;
2287 
2288   if (isRegularReg(RegKind))
2289     Reg = getRegularReg(RegKind, RegNum, RegWidth);
2290 
2291   return Reg;
2292 }
2293 
2294 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2295                                           unsigned &RegNum, unsigned &RegWidth,
2296                                           SmallVectorImpl<AsmToken> &Tokens) {
2297   Reg = AMDGPU::NoRegister;
2298 
2299   if (isToken(AsmToken::Identifier)) {
2300     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2301     if (Reg == AMDGPU::NoRegister)
2302       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2303   } else {
2304     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2305   }
2306 
2307   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2308   return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg);
2309 }
2310 
2311 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2312                                           unsigned &RegNum, unsigned &RegWidth,
2313                                           bool RestoreOnFailure) {
2314   Reg = AMDGPU::NoRegister;
2315 
2316   SmallVector<AsmToken, 1> Tokens;
2317   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2318     if (RestoreOnFailure) {
2319       while (!Tokens.empty()) {
2320         getLexer().UnLex(Tokens.pop_back_val());
2321       }
2322     }
2323     return true;
2324   }
2325   return false;
2326 }
2327 
2328 Optional<StringRef>
2329 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2330   switch (RegKind) {
2331   case IS_VGPR:
2332     return StringRef(".amdgcn.next_free_vgpr");
2333   case IS_SGPR:
2334     return StringRef(".amdgcn.next_free_sgpr");
2335   default:
2336     return None;
2337   }
2338 }
2339 
2340 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2341   auto SymbolName = getGprCountSymbolName(RegKind);
2342   assert(SymbolName && "initializing invalid register kind");
2343   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2344   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2345 }
2346 
2347 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2348                                             unsigned DwordRegIndex,
2349                                             unsigned RegWidth) {
2350   // Symbols are only defined for GCN targets
2351   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2352     return true;
2353 
2354   auto SymbolName = getGprCountSymbolName(RegKind);
2355   if (!SymbolName)
2356     return true;
2357   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2358 
2359   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2360   int64_t OldCount;
2361 
2362   if (!Sym->isVariable())
2363     return !Error(getParser().getTok().getLoc(),
2364                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2365   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2366     return !Error(
2367         getParser().getTok().getLoc(),
2368         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2369 
2370   if (OldCount <= NewMax)
2371     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2372 
2373   return true;
2374 }
2375 
2376 std::unique_ptr<AMDGPUOperand>
2377 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2378   const auto &Tok = Parser.getTok();
2379   SMLoc StartLoc = Tok.getLoc();
2380   SMLoc EndLoc = Tok.getEndLoc();
2381   RegisterKind RegKind;
2382   unsigned Reg, RegNum, RegWidth;
2383 
2384   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2385     //FIXME: improve error messages (bug 41303).
2386     Error(StartLoc, "not a valid operand.");
2387     return nullptr;
2388   }
2389   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2390     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2391       return nullptr;
2392   } else
2393     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2394   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2395 }
2396 
2397 OperandMatchResultTy
2398 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2399   // TODO: add syntactic sugar for 1/(2*PI)
2400 
2401   assert(!isRegister());
2402   assert(!isModifier());
2403 
2404   const auto& Tok = getToken();
2405   const auto& NextTok = peekToken();
2406   bool IsReal = Tok.is(AsmToken::Real);
2407   SMLoc S = getLoc();
2408   bool Negate = false;
2409 
2410   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2411     lex();
2412     IsReal = true;
2413     Negate = true;
2414   }
2415 
2416   if (IsReal) {
2417     // Floating-point expressions are not supported.
2418     // Can only allow floating-point literals with an
2419     // optional sign.
2420 
2421     StringRef Num = getTokenStr();
2422     lex();
2423 
2424     APFloat RealVal(APFloat::IEEEdouble());
2425     auto roundMode = APFloat::rmNearestTiesToEven;
2426     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2427       return MatchOperand_ParseFail;
2428     }
2429     if (Negate)
2430       RealVal.changeSign();
2431 
2432     Operands.push_back(
2433       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2434                                AMDGPUOperand::ImmTyNone, true));
2435 
2436     return MatchOperand_Success;
2437 
2438   } else {
2439     int64_t IntVal;
2440     const MCExpr *Expr;
2441     SMLoc S = getLoc();
2442 
2443     if (HasSP3AbsModifier) {
2444       // This is a workaround for handling expressions
2445       // as arguments of SP3 'abs' modifier, for example:
2446       //     |1.0|
2447       //     |-1|
2448       //     |1+x|
2449       // This syntax is not compatible with syntax of standard
2450       // MC expressions (due to the trailing '|').
2451       SMLoc EndLoc;
2452       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2453         return MatchOperand_ParseFail;
2454     } else {
2455       if (Parser.parseExpression(Expr))
2456         return MatchOperand_ParseFail;
2457     }
2458 
2459     if (Expr->evaluateAsAbsolute(IntVal)) {
2460       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2461     } else {
2462       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2463     }
2464 
2465     return MatchOperand_Success;
2466   }
2467 
2468   return MatchOperand_NoMatch;
2469 }
2470 
2471 OperandMatchResultTy
2472 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2473   if (!isRegister())
2474     return MatchOperand_NoMatch;
2475 
2476   if (auto R = parseRegister()) {
2477     assert(R->isReg());
2478     Operands.push_back(std::move(R));
2479     return MatchOperand_Success;
2480   }
2481   return MatchOperand_ParseFail;
2482 }
2483 
2484 OperandMatchResultTy
2485 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2486   auto res = parseReg(Operands);
2487   if (res != MatchOperand_NoMatch) {
2488     return res;
2489   } else if (isModifier()) {
2490     return MatchOperand_NoMatch;
2491   } else {
2492     return parseImm(Operands, HasSP3AbsMod);
2493   }
2494 }
2495 
2496 bool
2497 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2498   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2499     const auto &str = Token.getString();
2500     return str == "abs" || str == "neg" || str == "sext";
2501   }
2502   return false;
2503 }
2504 
2505 bool
2506 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2507   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2508 }
2509 
2510 bool
2511 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2512   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2513 }
2514 
2515 bool
2516 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2517   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2518 }
2519 
2520 // Check if this is an operand modifier or an opcode modifier
2521 // which may look like an expression but it is not. We should
2522 // avoid parsing these modifiers as expressions. Currently
2523 // recognized sequences are:
2524 //   |...|
2525 //   abs(...)
2526 //   neg(...)
2527 //   sext(...)
2528 //   -reg
2529 //   -|...|
2530 //   -abs(...)
2531 //   name:...
2532 // Note that simple opcode modifiers like 'gds' may be parsed as
2533 // expressions; this is a special case. See getExpressionAsToken.
2534 //
2535 bool
2536 AMDGPUAsmParser::isModifier() {
2537 
2538   AsmToken Tok = getToken();
2539   AsmToken NextToken[2];
2540   peekTokens(NextToken);
2541 
2542   return isOperandModifier(Tok, NextToken[0]) ||
2543          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2544          isOpcodeModifierWithVal(Tok, NextToken[0]);
2545 }
2546 
2547 // Check if the current token is an SP3 'neg' modifier.
2548 // Currently this modifier is allowed in the following context:
2549 //
2550 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2551 // 2. Before an 'abs' modifier: -abs(...)
2552 // 3. Before an SP3 'abs' modifier: -|...|
2553 //
2554 // In all other cases "-" is handled as a part
2555 // of an expression that follows the sign.
2556 //
2557 // Note: When "-" is followed by an integer literal,
2558 // this is interpreted as integer negation rather
2559 // than a floating-point NEG modifier applied to N.
2560 // Beside being contr-intuitive, such use of floating-point
2561 // NEG modifier would have resulted in different meaning
2562 // of integer literals used with VOP1/2/C and VOP3,
2563 // for example:
2564 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2565 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2566 // Negative fp literals with preceding "-" are
2567 // handled likewise for unifomtity
2568 //
2569 bool
2570 AMDGPUAsmParser::parseSP3NegModifier() {
2571 
2572   AsmToken NextToken[2];
2573   peekTokens(NextToken);
2574 
2575   if (isToken(AsmToken::Minus) &&
2576       (isRegister(NextToken[0], NextToken[1]) ||
2577        NextToken[0].is(AsmToken::Pipe) ||
2578        isId(NextToken[0], "abs"))) {
2579     lex();
2580     return true;
2581   }
2582 
2583   return false;
2584 }
2585 
2586 OperandMatchResultTy
2587 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2588                                               bool AllowImm) {
2589   bool Neg, SP3Neg;
2590   bool Abs, SP3Abs;
2591   SMLoc Loc;
2592 
2593   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2594   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2595     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2596     return MatchOperand_ParseFail;
2597   }
2598 
2599   SP3Neg = parseSP3NegModifier();
2600 
2601   Loc = getLoc();
2602   Neg = trySkipId("neg");
2603   if (Neg && SP3Neg) {
2604     Error(Loc, "expected register or immediate");
2605     return MatchOperand_ParseFail;
2606   }
2607   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2608     return MatchOperand_ParseFail;
2609 
2610   Abs = trySkipId("abs");
2611   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2612     return MatchOperand_ParseFail;
2613 
2614   Loc = getLoc();
2615   SP3Abs = trySkipToken(AsmToken::Pipe);
2616   if (Abs && SP3Abs) {
2617     Error(Loc, "expected register or immediate");
2618     return MatchOperand_ParseFail;
2619   }
2620 
2621   OperandMatchResultTy Res;
2622   if (AllowImm) {
2623     Res = parseRegOrImm(Operands, SP3Abs);
2624   } else {
2625     Res = parseReg(Operands);
2626   }
2627   if (Res != MatchOperand_Success) {
2628     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2629   }
2630 
2631   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2632     return MatchOperand_ParseFail;
2633   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2634     return MatchOperand_ParseFail;
2635   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2636     return MatchOperand_ParseFail;
2637 
2638   AMDGPUOperand::Modifiers Mods;
2639   Mods.Abs = Abs || SP3Abs;
2640   Mods.Neg = Neg || SP3Neg;
2641 
2642   if (Mods.hasFPModifiers()) {
2643     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2644     if (Op.isExpr()) {
2645       Error(Op.getStartLoc(), "expected an absolute expression");
2646       return MatchOperand_ParseFail;
2647     }
2648     Op.setModifiers(Mods);
2649   }
2650   return MatchOperand_Success;
2651 }
2652 
2653 OperandMatchResultTy
2654 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2655                                                bool AllowImm) {
2656   bool Sext = trySkipId("sext");
2657   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2658     return MatchOperand_ParseFail;
2659 
2660   OperandMatchResultTy Res;
2661   if (AllowImm) {
2662     Res = parseRegOrImm(Operands);
2663   } else {
2664     Res = parseReg(Operands);
2665   }
2666   if (Res != MatchOperand_Success) {
2667     return Sext? MatchOperand_ParseFail : Res;
2668   }
2669 
2670   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2671     return MatchOperand_ParseFail;
2672 
2673   AMDGPUOperand::Modifiers Mods;
2674   Mods.Sext = Sext;
2675 
2676   if (Mods.hasIntModifiers()) {
2677     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2678     if (Op.isExpr()) {
2679       Error(Op.getStartLoc(), "expected an absolute expression");
2680       return MatchOperand_ParseFail;
2681     }
2682     Op.setModifiers(Mods);
2683   }
2684 
2685   return MatchOperand_Success;
2686 }
2687 
2688 OperandMatchResultTy
2689 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2690   return parseRegOrImmWithFPInputMods(Operands, false);
2691 }
2692 
2693 OperandMatchResultTy
2694 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2695   return parseRegOrImmWithIntInputMods(Operands, false);
2696 }
2697 
2698 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2699   auto Loc = getLoc();
2700   if (trySkipId("off")) {
2701     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2702                                                 AMDGPUOperand::ImmTyOff, false));
2703     return MatchOperand_Success;
2704   }
2705 
2706   if (!isRegister())
2707     return MatchOperand_NoMatch;
2708 
2709   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2710   if (Reg) {
2711     Operands.push_back(std::move(Reg));
2712     return MatchOperand_Success;
2713   }
2714 
2715   return MatchOperand_ParseFail;
2716 
2717 }
2718 
2719 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2720   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2721 
2722   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2723       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2724       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2725       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2726     return Match_InvalidOperand;
2727 
2728   if ((TSFlags & SIInstrFlags::VOP3) &&
2729       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2730       getForcedEncodingSize() != 64)
2731     return Match_PreferE32;
2732 
2733   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2734       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2735     // v_mac_f32/16 allow only dst_sel == DWORD;
2736     auto OpNum =
2737         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2738     const auto &Op = Inst.getOperand(OpNum);
2739     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2740       return Match_InvalidOperand;
2741     }
2742   }
2743 
2744   return Match_Success;
2745 }
2746 
2747 // What asm variants we should check
2748 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2749   if (getForcedEncodingSize() == 32) {
2750     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2751     return makeArrayRef(Variants);
2752   }
2753 
2754   if (isForcedVOP3()) {
2755     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2756     return makeArrayRef(Variants);
2757   }
2758 
2759   if (isForcedSDWA()) {
2760     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2761                                         AMDGPUAsmVariants::SDWA9};
2762     return makeArrayRef(Variants);
2763   }
2764 
2765   if (isForcedDPP()) {
2766     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2767     return makeArrayRef(Variants);
2768   }
2769 
2770   static const unsigned Variants[] = {
2771     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2772     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2773   };
2774 
2775   return makeArrayRef(Variants);
2776 }
2777 
2778 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2779   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2780   const unsigned Num = Desc.getNumImplicitUses();
2781   for (unsigned i = 0; i < Num; ++i) {
2782     unsigned Reg = Desc.ImplicitUses[i];
2783     switch (Reg) {
2784     case AMDGPU::FLAT_SCR:
2785     case AMDGPU::VCC:
2786     case AMDGPU::VCC_LO:
2787     case AMDGPU::VCC_HI:
2788     case AMDGPU::M0:
2789       return Reg;
2790     default:
2791       break;
2792     }
2793   }
2794   return AMDGPU::NoRegister;
2795 }
2796 
2797 // NB: This code is correct only when used to check constant
2798 // bus limitations because GFX7 support no f16 inline constants.
2799 // Note that there are no cases when a GFX7 opcode violates
2800 // constant bus limitations due to the use of an f16 constant.
2801 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2802                                        unsigned OpIdx) const {
2803   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2804 
2805   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2806     return false;
2807   }
2808 
2809   const MCOperand &MO = Inst.getOperand(OpIdx);
2810 
2811   int64_t Val = MO.getImm();
2812   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2813 
2814   switch (OpSize) { // expected operand size
2815   case 8:
2816     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2817   case 4:
2818     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2819   case 2: {
2820     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2821     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2822         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2823         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2824         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2825         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2826         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2827       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2828     } else {
2829       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2830     }
2831   }
2832   default:
2833     llvm_unreachable("invalid operand size");
2834   }
2835 }
2836 
2837 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2838   if (!isGFX10())
2839     return 1;
2840 
2841   switch (Opcode) {
2842   // 64-bit shift instructions can use only one scalar value input
2843   case AMDGPU::V_LSHLREV_B64:
2844   case AMDGPU::V_LSHLREV_B64_gfx10:
2845   case AMDGPU::V_LSHL_B64:
2846   case AMDGPU::V_LSHRREV_B64:
2847   case AMDGPU::V_LSHRREV_B64_gfx10:
2848   case AMDGPU::V_LSHR_B64:
2849   case AMDGPU::V_ASHRREV_I64:
2850   case AMDGPU::V_ASHRREV_I64_gfx10:
2851   case AMDGPU::V_ASHR_I64:
2852     return 1;
2853   default:
2854     return 2;
2855   }
2856 }
2857 
2858 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2859   const MCOperand &MO = Inst.getOperand(OpIdx);
2860   if (MO.isImm()) {
2861     return !isInlineConstant(Inst, OpIdx);
2862   } else if (MO.isReg()) {
2863     auto Reg = MO.getReg();
2864     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2865     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2866   } else {
2867     return true;
2868   }
2869 }
2870 
2871 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2872   const unsigned Opcode = Inst.getOpcode();
2873   const MCInstrDesc &Desc = MII.get(Opcode);
2874   unsigned ConstantBusUseCount = 0;
2875   unsigned NumLiterals = 0;
2876   unsigned LiteralSize;
2877 
2878   if (Desc.TSFlags &
2879       (SIInstrFlags::VOPC |
2880        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2881        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2882        SIInstrFlags::SDWA)) {
2883     // Check special imm operands (used by madmk, etc)
2884     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2885       ++ConstantBusUseCount;
2886     }
2887 
2888     SmallDenseSet<unsigned> SGPRsUsed;
2889     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2890     if (SGPRUsed != AMDGPU::NoRegister) {
2891       SGPRsUsed.insert(SGPRUsed);
2892       ++ConstantBusUseCount;
2893     }
2894 
2895     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2896     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2897     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2898 
2899     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2900 
2901     for (int OpIdx : OpIndices) {
2902       if (OpIdx == -1) break;
2903 
2904       const MCOperand &MO = Inst.getOperand(OpIdx);
2905       if (usesConstantBus(Inst, OpIdx)) {
2906         if (MO.isReg()) {
2907           const unsigned Reg = mc2PseudoReg(MO.getReg());
2908           // Pairs of registers with a partial intersections like these
2909           //   s0, s[0:1]
2910           //   flat_scratch_lo, flat_scratch
2911           //   flat_scratch_lo, flat_scratch_hi
2912           // are theoretically valid but they are disabled anyway.
2913           // Note that this code mimics SIInstrInfo::verifyInstruction
2914           if (!SGPRsUsed.count(Reg)) {
2915             SGPRsUsed.insert(Reg);
2916             ++ConstantBusUseCount;
2917           }
2918         } else { // Expression or a literal
2919 
2920           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2921             continue; // special operand like VINTERP attr_chan
2922 
2923           // An instruction may use only one literal.
2924           // This has been validated on the previous step.
2925           // See validateVOP3Literal.
2926           // This literal may be used as more than one operand.
2927           // If all these operands are of the same size,
2928           // this literal counts as one scalar value.
2929           // Otherwise it counts as 2 scalar values.
2930           // See "GFX10 Shader Programming", section 3.6.2.3.
2931 
2932           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2933           if (Size < 4) Size = 4;
2934 
2935           if (NumLiterals == 0) {
2936             NumLiterals = 1;
2937             LiteralSize = Size;
2938           } else if (LiteralSize != Size) {
2939             NumLiterals = 2;
2940           }
2941         }
2942       }
2943     }
2944   }
2945   ConstantBusUseCount += NumLiterals;
2946 
2947   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2948 }
2949 
2950 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2951   const unsigned Opcode = Inst.getOpcode();
2952   const MCInstrDesc &Desc = MII.get(Opcode);
2953 
2954   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2955   if (DstIdx == -1 ||
2956       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2957     return true;
2958   }
2959 
2960   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2961 
2962   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2963   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2964   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2965 
2966   assert(DstIdx != -1);
2967   const MCOperand &Dst = Inst.getOperand(DstIdx);
2968   assert(Dst.isReg());
2969   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2970 
2971   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2972 
2973   for (int SrcIdx : SrcIndices) {
2974     if (SrcIdx == -1) break;
2975     const MCOperand &Src = Inst.getOperand(SrcIdx);
2976     if (Src.isReg()) {
2977       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2978       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2979         return false;
2980       }
2981     }
2982   }
2983 
2984   return true;
2985 }
2986 
2987 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2988 
2989   const unsigned Opc = Inst.getOpcode();
2990   const MCInstrDesc &Desc = MII.get(Opc);
2991 
2992   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2993     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2994     assert(ClampIdx != -1);
2995     return Inst.getOperand(ClampIdx).getImm() == 0;
2996   }
2997 
2998   return true;
2999 }
3000 
3001 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3002 
3003   const unsigned Opc = Inst.getOpcode();
3004   const MCInstrDesc &Desc = MII.get(Opc);
3005 
3006   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3007     return true;
3008 
3009   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3010   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3011   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3012 
3013   assert(VDataIdx != -1);
3014   assert(DMaskIdx != -1);
3015   assert(TFEIdx != -1);
3016 
3017   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3018   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3019   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3020   if (DMask == 0)
3021     DMask = 1;
3022 
3023   unsigned DataSize =
3024     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3025   if (hasPackedD16()) {
3026     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3027     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3028       DataSize = (DataSize + 1) / 2;
3029   }
3030 
3031   return (VDataSize / 4) == DataSize + TFESize;
3032 }
3033 
3034 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3035   const unsigned Opc = Inst.getOpcode();
3036   const MCInstrDesc &Desc = MII.get(Opc);
3037 
3038   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
3039     return true;
3040 
3041   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3042   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3043       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3044   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3045   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3046   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3047 
3048   assert(VAddr0Idx != -1);
3049   assert(SrsrcIdx != -1);
3050   assert(DimIdx != -1);
3051   assert(SrsrcIdx > VAddr0Idx);
3052 
3053   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3054   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3055   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3056   unsigned VAddrSize =
3057       IsNSA ? SrsrcIdx - VAddr0Idx
3058             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3059 
3060   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3061                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3062                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3063                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3064   if (!IsNSA) {
3065     if (AddrSize > 8)
3066       AddrSize = 16;
3067     else if (AddrSize > 4)
3068       AddrSize = 8;
3069   }
3070 
3071   return VAddrSize == AddrSize;
3072 }
3073 
3074 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3075 
3076   const unsigned Opc = Inst.getOpcode();
3077   const MCInstrDesc &Desc = MII.get(Opc);
3078 
3079   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3080     return true;
3081   if (!Desc.mayLoad() || !Desc.mayStore())
3082     return true; // Not atomic
3083 
3084   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3085   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3086 
3087   // This is an incomplete check because image_atomic_cmpswap
3088   // may only use 0x3 and 0xf while other atomic operations
3089   // may use 0x1 and 0x3. However these limitations are
3090   // verified when we check that dmask matches dst size.
3091   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3092 }
3093 
3094 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3095 
3096   const unsigned Opc = Inst.getOpcode();
3097   const MCInstrDesc &Desc = MII.get(Opc);
3098 
3099   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3100     return true;
3101 
3102   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3103   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3104 
3105   // GATHER4 instructions use dmask in a different fashion compared to
3106   // other MIMG instructions. The only useful DMASK values are
3107   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3108   // (red,red,red,red) etc.) The ISA document doesn't mention
3109   // this.
3110   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3111 }
3112 
3113 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3114 {
3115   switch (Opcode) {
3116   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3117   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3118   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3119     return true;
3120   default:
3121     return false;
3122   }
3123 }
3124 
3125 // movrels* opcodes should only allow VGPRS as src0.
3126 // This is specified in .td description for vop1/vop3,
3127 // but sdwa is handled differently. See isSDWAOperand.
3128 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3129 
3130   const unsigned Opc = Inst.getOpcode();
3131   const MCInstrDesc &Desc = MII.get(Opc);
3132 
3133   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3134     return true;
3135 
3136   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3137   assert(Src0Idx != -1);
3138 
3139   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3140   if (!Src0.isReg())
3141     return false;
3142 
3143   auto Reg = Src0.getReg();
3144   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3145   return !isSGPR(mc2PseudoReg(Reg), TRI);
3146 }
3147 
3148 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3149 
3150   const unsigned Opc = Inst.getOpcode();
3151   const MCInstrDesc &Desc = MII.get(Opc);
3152 
3153   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3154     return true;
3155 
3156   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3157   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3158     if (isCI() || isSI())
3159       return false;
3160   }
3161 
3162   return true;
3163 }
3164 
3165 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3166   const unsigned Opc = Inst.getOpcode();
3167   const MCInstrDesc &Desc = MII.get(Opc);
3168 
3169   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3170     return true;
3171 
3172   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3173   if (DimIdx < 0)
3174     return true;
3175 
3176   long Imm = Inst.getOperand(DimIdx).getImm();
3177   if (Imm < 0 || Imm >= 8)
3178     return false;
3179 
3180   return true;
3181 }
3182 
3183 static bool IsRevOpcode(const unsigned Opcode)
3184 {
3185   switch (Opcode) {
3186   case AMDGPU::V_SUBREV_F32_e32:
3187   case AMDGPU::V_SUBREV_F32_e64:
3188   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3189   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3190   case AMDGPU::V_SUBREV_F32_e32_vi:
3191   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3192   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3193   case AMDGPU::V_SUBREV_F32_e64_vi:
3194 
3195   case AMDGPU::V_SUBREV_I32_e32:
3196   case AMDGPU::V_SUBREV_I32_e64:
3197   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3198   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3199 
3200   case AMDGPU::V_SUBBREV_U32_e32:
3201   case AMDGPU::V_SUBBREV_U32_e64:
3202   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3203   case AMDGPU::V_SUBBREV_U32_e32_vi:
3204   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3205   case AMDGPU::V_SUBBREV_U32_e64_vi:
3206 
3207   case AMDGPU::V_SUBREV_U32_e32:
3208   case AMDGPU::V_SUBREV_U32_e64:
3209   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3210   case AMDGPU::V_SUBREV_U32_e32_vi:
3211   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3212   case AMDGPU::V_SUBREV_U32_e64_vi:
3213 
3214   case AMDGPU::V_SUBREV_F16_e32:
3215   case AMDGPU::V_SUBREV_F16_e64:
3216   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3217   case AMDGPU::V_SUBREV_F16_e32_vi:
3218   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3219   case AMDGPU::V_SUBREV_F16_e64_vi:
3220 
3221   case AMDGPU::V_SUBREV_U16_e32:
3222   case AMDGPU::V_SUBREV_U16_e64:
3223   case AMDGPU::V_SUBREV_U16_e32_vi:
3224   case AMDGPU::V_SUBREV_U16_e64_vi:
3225 
3226   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3227   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3228   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3229 
3230   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3231   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3232 
3233   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3234   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3235 
3236   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3237   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3238 
3239   case AMDGPU::V_LSHRREV_B32_e32:
3240   case AMDGPU::V_LSHRREV_B32_e64:
3241   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3242   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3243   case AMDGPU::V_LSHRREV_B32_e32_vi:
3244   case AMDGPU::V_LSHRREV_B32_e64_vi:
3245   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3246   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3247 
3248   case AMDGPU::V_ASHRREV_I32_e32:
3249   case AMDGPU::V_ASHRREV_I32_e64:
3250   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3251   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3252   case AMDGPU::V_ASHRREV_I32_e32_vi:
3253   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3254   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3255   case AMDGPU::V_ASHRREV_I32_e64_vi:
3256 
3257   case AMDGPU::V_LSHLREV_B32_e32:
3258   case AMDGPU::V_LSHLREV_B32_e64:
3259   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3260   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3261   case AMDGPU::V_LSHLREV_B32_e32_vi:
3262   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3263   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3264   case AMDGPU::V_LSHLREV_B32_e64_vi:
3265 
3266   case AMDGPU::V_LSHLREV_B16_e32:
3267   case AMDGPU::V_LSHLREV_B16_e64:
3268   case AMDGPU::V_LSHLREV_B16_e32_vi:
3269   case AMDGPU::V_LSHLREV_B16_e64_vi:
3270   case AMDGPU::V_LSHLREV_B16_gfx10:
3271 
3272   case AMDGPU::V_LSHRREV_B16_e32:
3273   case AMDGPU::V_LSHRREV_B16_e64:
3274   case AMDGPU::V_LSHRREV_B16_e32_vi:
3275   case AMDGPU::V_LSHRREV_B16_e64_vi:
3276   case AMDGPU::V_LSHRREV_B16_gfx10:
3277 
3278   case AMDGPU::V_ASHRREV_I16_e32:
3279   case AMDGPU::V_ASHRREV_I16_e64:
3280   case AMDGPU::V_ASHRREV_I16_e32_vi:
3281   case AMDGPU::V_ASHRREV_I16_e64_vi:
3282   case AMDGPU::V_ASHRREV_I16_gfx10:
3283 
3284   case AMDGPU::V_LSHLREV_B64:
3285   case AMDGPU::V_LSHLREV_B64_gfx10:
3286   case AMDGPU::V_LSHLREV_B64_vi:
3287 
3288   case AMDGPU::V_LSHRREV_B64:
3289   case AMDGPU::V_LSHRREV_B64_gfx10:
3290   case AMDGPU::V_LSHRREV_B64_vi:
3291 
3292   case AMDGPU::V_ASHRREV_I64:
3293   case AMDGPU::V_ASHRREV_I64_gfx10:
3294   case AMDGPU::V_ASHRREV_I64_vi:
3295 
3296   case AMDGPU::V_PK_LSHLREV_B16:
3297   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3298   case AMDGPU::V_PK_LSHLREV_B16_vi:
3299 
3300   case AMDGPU::V_PK_LSHRREV_B16:
3301   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3302   case AMDGPU::V_PK_LSHRREV_B16_vi:
3303   case AMDGPU::V_PK_ASHRREV_I16:
3304   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3305   case AMDGPU::V_PK_ASHRREV_I16_vi:
3306     return true;
3307   default:
3308     return false;
3309   }
3310 }
3311 
3312 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3313 
3314   using namespace SIInstrFlags;
3315   const unsigned Opcode = Inst.getOpcode();
3316   const MCInstrDesc &Desc = MII.get(Opcode);
3317 
3318   // lds_direct register is defined so that it can be used
3319   // with 9-bit operands only. Ignore encodings which do not accept these.
3320   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3321     return true;
3322 
3323   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3324   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3325   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3326 
3327   const int SrcIndices[] = { Src1Idx, Src2Idx };
3328 
3329   // lds_direct cannot be specified as either src1 or src2.
3330   for (int SrcIdx : SrcIndices) {
3331     if (SrcIdx == -1) break;
3332     const MCOperand &Src = Inst.getOperand(SrcIdx);
3333     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3334       return false;
3335     }
3336   }
3337 
3338   if (Src0Idx == -1)
3339     return true;
3340 
3341   const MCOperand &Src = Inst.getOperand(Src0Idx);
3342   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3343     return true;
3344 
3345   // lds_direct is specified as src0. Check additional limitations.
3346   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3347 }
3348 
3349 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3350   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3351     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3352     if (Op.isFlatOffset())
3353       return Op.getStartLoc();
3354   }
3355   return getLoc();
3356 }
3357 
3358 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3359                                          const OperandVector &Operands) {
3360   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3361   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3362     return true;
3363 
3364   auto Opcode = Inst.getOpcode();
3365   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3366   assert(OpNum != -1);
3367 
3368   const auto &Op = Inst.getOperand(OpNum);
3369   if (!hasFlatOffsets() && Op.getImm() != 0) {
3370     Error(getFlatOffsetLoc(Operands),
3371           "flat offset modifier is not supported on this GPU");
3372     return false;
3373   }
3374 
3375   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3376   // For FLAT segment the offset must be positive;
3377   // MSB is ignored and forced to zero.
3378   unsigned OffsetSize = isGFX9() ? 13 : 12;
3379   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3380     if (!isIntN(OffsetSize, Op.getImm())) {
3381       Error(getFlatOffsetLoc(Operands),
3382             isGFX9() ? "expected a 13-bit signed offset" :
3383                        "expected a 12-bit signed offset");
3384       return false;
3385     }
3386   } else {
3387     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3388       Error(getFlatOffsetLoc(Operands),
3389             isGFX9() ? "expected a 12-bit unsigned offset" :
3390                        "expected an 11-bit unsigned offset");
3391       return false;
3392     }
3393   }
3394 
3395   return true;
3396 }
3397 
3398 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3399   unsigned Opcode = Inst.getOpcode();
3400   const MCInstrDesc &Desc = MII.get(Opcode);
3401   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3402     return true;
3403 
3404   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3405   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3406 
3407   const int OpIndices[] = { Src0Idx, Src1Idx };
3408 
3409   unsigned NumExprs = 0;
3410   unsigned NumLiterals = 0;
3411   uint32_t LiteralValue;
3412 
3413   for (int OpIdx : OpIndices) {
3414     if (OpIdx == -1) break;
3415 
3416     const MCOperand &MO = Inst.getOperand(OpIdx);
3417     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3418     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3419       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3420         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3421         if (NumLiterals == 0 || LiteralValue != Value) {
3422           LiteralValue = Value;
3423           ++NumLiterals;
3424         }
3425       } else if (MO.isExpr()) {
3426         ++NumExprs;
3427       }
3428     }
3429   }
3430 
3431   return NumLiterals + NumExprs <= 1;
3432 }
3433 
3434 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3435   const unsigned Opc = Inst.getOpcode();
3436   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3437       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3438     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3439     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3440 
3441     if (OpSel & ~3)
3442       return false;
3443   }
3444   return true;
3445 }
3446 
3447 // Check if VCC register matches wavefront size
3448 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3449   auto FB = getFeatureBits();
3450   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3451     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3452 }
3453 
3454 // VOP3 literal is only allowed in GFX10+ and only one can be used
3455 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3456   unsigned Opcode = Inst.getOpcode();
3457   const MCInstrDesc &Desc = MII.get(Opcode);
3458   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3459     return true;
3460 
3461   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3462   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3463   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3464 
3465   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3466 
3467   unsigned NumExprs = 0;
3468   unsigned NumLiterals = 0;
3469   uint32_t LiteralValue;
3470 
3471   for (int OpIdx : OpIndices) {
3472     if (OpIdx == -1) break;
3473 
3474     const MCOperand &MO = Inst.getOperand(OpIdx);
3475     if (!MO.isImm() && !MO.isExpr())
3476       continue;
3477     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3478       continue;
3479 
3480     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3481         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3482       return false;
3483 
3484     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3485       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3486       if (NumLiterals == 0 || LiteralValue != Value) {
3487         LiteralValue = Value;
3488         ++NumLiterals;
3489       }
3490     } else if (MO.isExpr()) {
3491       ++NumExprs;
3492     }
3493   }
3494   NumLiterals += NumExprs;
3495 
3496   return !NumLiterals ||
3497          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3498 }
3499 
3500 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3501                                           const SMLoc &IDLoc,
3502                                           const OperandVector &Operands) {
3503   if (!validateLdsDirect(Inst)) {
3504     Error(IDLoc,
3505       "invalid use of lds_direct");
3506     return false;
3507   }
3508   if (!validateSOPLiteral(Inst)) {
3509     Error(IDLoc,
3510       "only one literal operand is allowed");
3511     return false;
3512   }
3513   if (!validateVOP3Literal(Inst)) {
3514     Error(IDLoc,
3515       "invalid literal operand");
3516     return false;
3517   }
3518   if (!validateConstantBusLimitations(Inst)) {
3519     Error(IDLoc,
3520       "invalid operand (violates constant bus restrictions)");
3521     return false;
3522   }
3523   if (!validateEarlyClobberLimitations(Inst)) {
3524     Error(IDLoc,
3525       "destination must be different than all sources");
3526     return false;
3527   }
3528   if (!validateIntClampSupported(Inst)) {
3529     Error(IDLoc,
3530       "integer clamping is not supported on this GPU");
3531     return false;
3532   }
3533   if (!validateOpSel(Inst)) {
3534     Error(IDLoc,
3535       "invalid op_sel operand");
3536     return false;
3537   }
3538   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3539   if (!validateMIMGD16(Inst)) {
3540     Error(IDLoc,
3541       "d16 modifier is not supported on this GPU");
3542     return false;
3543   }
3544   if (!validateMIMGDim(Inst)) {
3545     Error(IDLoc, "dim modifier is required on this GPU");
3546     return false;
3547   }
3548   if (!validateMIMGDataSize(Inst)) {
3549     Error(IDLoc,
3550       "image data size does not match dmask and tfe");
3551     return false;
3552   }
3553   if (!validateMIMGAddrSize(Inst)) {
3554     Error(IDLoc,
3555       "image address size does not match dim and a16");
3556     return false;
3557   }
3558   if (!validateMIMGAtomicDMask(Inst)) {
3559     Error(IDLoc,
3560       "invalid atomic image dmask");
3561     return false;
3562   }
3563   if (!validateMIMGGatherDMask(Inst)) {
3564     Error(IDLoc,
3565       "invalid image_gather dmask: only one bit must be set");
3566     return false;
3567   }
3568   if (!validateMovrels(Inst)) {
3569     Error(IDLoc, "source operand must be a VGPR");
3570     return false;
3571   }
3572   if (!validateFlatOffset(Inst, Operands)) {
3573     return false;
3574   }
3575 
3576   return true;
3577 }
3578 
3579 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3580                                             const FeatureBitset &FBS,
3581                                             unsigned VariantID = 0);
3582 
3583 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3584                                               OperandVector &Operands,
3585                                               MCStreamer &Out,
3586                                               uint64_t &ErrorInfo,
3587                                               bool MatchingInlineAsm) {
3588   MCInst Inst;
3589   unsigned Result = Match_Success;
3590   for (auto Variant : getMatchedVariants()) {
3591     uint64_t EI;
3592     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3593                                   Variant);
3594     // We order match statuses from least to most specific. We use most specific
3595     // status as resulting
3596     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3597     if ((R == Match_Success) ||
3598         (R == Match_PreferE32) ||
3599         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3600         (R == Match_InvalidOperand && Result != Match_MissingFeature
3601                                    && Result != Match_PreferE32) ||
3602         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3603                                    && Result != Match_MissingFeature
3604                                    && Result != Match_PreferE32)) {
3605       Result = R;
3606       ErrorInfo = EI;
3607     }
3608     if (R == Match_Success)
3609       break;
3610   }
3611 
3612   switch (Result) {
3613   default: break;
3614   case Match_Success:
3615     if (!validateInstruction(Inst, IDLoc, Operands)) {
3616       return true;
3617     }
3618     Inst.setLoc(IDLoc);
3619     Out.emitInstruction(Inst, getSTI());
3620     return false;
3621 
3622   case Match_MissingFeature:
3623     return Error(IDLoc, "instruction not supported on this GPU");
3624 
3625   case Match_MnemonicFail: {
3626     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3627     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3628         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3629     return Error(IDLoc, "invalid instruction" + Suggestion,
3630                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3631   }
3632 
3633   case Match_InvalidOperand: {
3634     SMLoc ErrorLoc = IDLoc;
3635     if (ErrorInfo != ~0ULL) {
3636       if (ErrorInfo >= Operands.size()) {
3637         return Error(IDLoc, "too few operands for instruction");
3638       }
3639       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3640       if (ErrorLoc == SMLoc())
3641         ErrorLoc = IDLoc;
3642     }
3643     return Error(ErrorLoc, "invalid operand for instruction");
3644   }
3645 
3646   case Match_PreferE32:
3647     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3648                         "should be encoded as e32");
3649   }
3650   llvm_unreachable("Implement any new match types added!");
3651 }
3652 
3653 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3654   int64_t Tmp = -1;
3655   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3656     return true;
3657   }
3658   if (getParser().parseAbsoluteExpression(Tmp)) {
3659     return true;
3660   }
3661   Ret = static_cast<uint32_t>(Tmp);
3662   return false;
3663 }
3664 
3665 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3666                                                uint32_t &Minor) {
3667   if (ParseAsAbsoluteExpression(Major))
3668     return TokError("invalid major version");
3669 
3670   if (getLexer().isNot(AsmToken::Comma))
3671     return TokError("minor version number required, comma expected");
3672   Lex();
3673 
3674   if (ParseAsAbsoluteExpression(Minor))
3675     return TokError("invalid minor version");
3676 
3677   return false;
3678 }
3679 
3680 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3681   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3682     return TokError("directive only supported for amdgcn architecture");
3683 
3684   std::string Target;
3685 
3686   SMLoc TargetStart = getTok().getLoc();
3687   if (getParser().parseEscapedString(Target))
3688     return true;
3689   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3690 
3691   std::string ExpectedTarget;
3692   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3693   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3694 
3695   if (Target != ExpectedTargetOS.str())
3696     return getParser().Error(TargetRange.Start, "target must match options",
3697                              TargetRange);
3698 
3699   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3700   return false;
3701 }
3702 
3703 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3704   return getParser().Error(Range.Start, "value out of range", Range);
3705 }
3706 
3707 bool AMDGPUAsmParser::calculateGPRBlocks(
3708     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3709     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3710     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3711     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3712   // TODO(scott.linder): These calculations are duplicated from
3713   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3714   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3715 
3716   unsigned NumVGPRs = NextFreeVGPR;
3717   unsigned NumSGPRs = NextFreeSGPR;
3718 
3719   if (Version.Major >= 10)
3720     NumSGPRs = 0;
3721   else {
3722     unsigned MaxAddressableNumSGPRs =
3723         IsaInfo::getAddressableNumSGPRs(&getSTI());
3724 
3725     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3726         NumSGPRs > MaxAddressableNumSGPRs)
3727       return OutOfRangeError(SGPRRange);
3728 
3729     NumSGPRs +=
3730         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3731 
3732     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3733         NumSGPRs > MaxAddressableNumSGPRs)
3734       return OutOfRangeError(SGPRRange);
3735 
3736     if (Features.test(FeatureSGPRInitBug))
3737       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3738   }
3739 
3740   VGPRBlocks =
3741       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3742   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3743 
3744   return false;
3745 }
3746 
3747 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3748   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3749     return TokError("directive only supported for amdgcn architecture");
3750 
3751   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3752     return TokError("directive only supported for amdhsa OS");
3753 
3754   StringRef KernelName;
3755   if (getParser().parseIdentifier(KernelName))
3756     return true;
3757 
3758   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3759 
3760   StringSet<> Seen;
3761 
3762   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3763 
3764   SMRange VGPRRange;
3765   uint64_t NextFreeVGPR = 0;
3766   SMRange SGPRRange;
3767   uint64_t NextFreeSGPR = 0;
3768   unsigned UserSGPRCount = 0;
3769   bool ReserveVCC = true;
3770   bool ReserveFlatScr = true;
3771   bool ReserveXNACK = hasXNACK();
3772   Optional<bool> EnableWavefrontSize32;
3773 
3774   while (true) {
3775     while (getLexer().is(AsmToken::EndOfStatement))
3776       Lex();
3777 
3778     if (getLexer().isNot(AsmToken::Identifier))
3779       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3780 
3781     StringRef ID = getTok().getIdentifier();
3782     SMRange IDRange = getTok().getLocRange();
3783     Lex();
3784 
3785     if (ID == ".end_amdhsa_kernel")
3786       break;
3787 
3788     if (Seen.find(ID) != Seen.end())
3789       return TokError(".amdhsa_ directives cannot be repeated");
3790     Seen.insert(ID);
3791 
3792     SMLoc ValStart = getTok().getLoc();
3793     int64_t IVal;
3794     if (getParser().parseAbsoluteExpression(IVal))
3795       return true;
3796     SMLoc ValEnd = getTok().getLoc();
3797     SMRange ValRange = SMRange(ValStart, ValEnd);
3798 
3799     if (IVal < 0)
3800       return OutOfRangeError(ValRange);
3801 
3802     uint64_t Val = IVal;
3803 
3804 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3805   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3806     return OutOfRangeError(RANGE);                                             \
3807   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3808 
3809     if (ID == ".amdhsa_group_segment_fixed_size") {
3810       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3811         return OutOfRangeError(ValRange);
3812       KD.group_segment_fixed_size = Val;
3813     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3814       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3815         return OutOfRangeError(ValRange);
3816       KD.private_segment_fixed_size = Val;
3817     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3818       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3819                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3820                        Val, ValRange);
3821       if (Val)
3822         UserSGPRCount += 4;
3823     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3824       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3825                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3826                        ValRange);
3827       if (Val)
3828         UserSGPRCount += 2;
3829     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3830       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3831                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3832                        ValRange);
3833       if (Val)
3834         UserSGPRCount += 2;
3835     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3836       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3837                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3838                        Val, ValRange);
3839       if (Val)
3840         UserSGPRCount += 2;
3841     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3842       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3843                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3844                        ValRange);
3845       if (Val)
3846         UserSGPRCount += 2;
3847     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3848       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3849                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3850                        ValRange);
3851       if (Val)
3852         UserSGPRCount += 2;
3853     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3854       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3855                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3856                        Val, ValRange);
3857       if (Val)
3858         UserSGPRCount += 1;
3859     } else if (ID == ".amdhsa_wavefront_size32") {
3860       if (IVersion.Major < 10)
3861         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3862                                  IDRange);
3863       EnableWavefrontSize32 = Val;
3864       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3865                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3866                        Val, ValRange);
3867     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3868       PARSE_BITS_ENTRY(
3869           KD.compute_pgm_rsrc2,
3870           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3871           ValRange);
3872     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3873       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3874                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3875                        ValRange);
3876     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3877       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3878                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3879                        ValRange);
3880     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3881       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3882                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3883                        ValRange);
3884     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3885       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3886                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3887                        ValRange);
3888     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3889       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3890                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3891                        ValRange);
3892     } else if (ID == ".amdhsa_next_free_vgpr") {
3893       VGPRRange = ValRange;
3894       NextFreeVGPR = Val;
3895     } else if (ID == ".amdhsa_next_free_sgpr") {
3896       SGPRRange = ValRange;
3897       NextFreeSGPR = Val;
3898     } else if (ID == ".amdhsa_reserve_vcc") {
3899       if (!isUInt<1>(Val))
3900         return OutOfRangeError(ValRange);
3901       ReserveVCC = Val;
3902     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3903       if (IVersion.Major < 7)
3904         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3905                                  IDRange);
3906       if (!isUInt<1>(Val))
3907         return OutOfRangeError(ValRange);
3908       ReserveFlatScr = Val;
3909     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3910       if (IVersion.Major < 8)
3911         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3912                                  IDRange);
3913       if (!isUInt<1>(Val))
3914         return OutOfRangeError(ValRange);
3915       ReserveXNACK = Val;
3916     } else if (ID == ".amdhsa_float_round_mode_32") {
3917       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3918                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3919     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3920       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3921                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3922     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3923       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3924                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3925     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3926       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3927                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3928                        ValRange);
3929     } else if (ID == ".amdhsa_dx10_clamp") {
3930       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3931                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3932     } else if (ID == ".amdhsa_ieee_mode") {
3933       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3934                        Val, ValRange);
3935     } else if (ID == ".amdhsa_fp16_overflow") {
3936       if (IVersion.Major < 9)
3937         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3938                                  IDRange);
3939       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3940                        ValRange);
3941     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3942       if (IVersion.Major < 10)
3943         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3944                                  IDRange);
3945       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3946                        ValRange);
3947     } else if (ID == ".amdhsa_memory_ordered") {
3948       if (IVersion.Major < 10)
3949         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3950                                  IDRange);
3951       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3952                        ValRange);
3953     } else if (ID == ".amdhsa_forward_progress") {
3954       if (IVersion.Major < 10)
3955         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3956                                  IDRange);
3957       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3958                        ValRange);
3959     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3960       PARSE_BITS_ENTRY(
3961           KD.compute_pgm_rsrc2,
3962           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3963           ValRange);
3964     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3965       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3966                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3967                        Val, ValRange);
3968     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3969       PARSE_BITS_ENTRY(
3970           KD.compute_pgm_rsrc2,
3971           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3972           ValRange);
3973     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3974       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3975                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3976                        Val, ValRange);
3977     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3978       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3979                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3980                        Val, ValRange);
3981     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3982       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3983                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3984                        Val, ValRange);
3985     } else if (ID == ".amdhsa_exception_int_div_zero") {
3986       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3987                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3988                        Val, ValRange);
3989     } else {
3990       return getParser().Error(IDRange.Start,
3991                                "unknown .amdhsa_kernel directive", IDRange);
3992     }
3993 
3994 #undef PARSE_BITS_ENTRY
3995   }
3996 
3997   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3998     return TokError(".amdhsa_next_free_vgpr directive is required");
3999 
4000   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4001     return TokError(".amdhsa_next_free_sgpr directive is required");
4002 
4003   unsigned VGPRBlocks;
4004   unsigned SGPRBlocks;
4005   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4006                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4007                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4008                          SGPRBlocks))
4009     return true;
4010 
4011   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4012           VGPRBlocks))
4013     return OutOfRangeError(VGPRRange);
4014   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4015                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4016 
4017   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4018           SGPRBlocks))
4019     return OutOfRangeError(SGPRRange);
4020   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4021                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4022                   SGPRBlocks);
4023 
4024   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4025     return TokError("too many user SGPRs enabled");
4026   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4027                   UserSGPRCount);
4028 
4029   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4030       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4031       ReserveFlatScr, ReserveXNACK);
4032   return false;
4033 }
4034 
4035 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4036   uint32_t Major;
4037   uint32_t Minor;
4038 
4039   if (ParseDirectiveMajorMinor(Major, Minor))
4040     return true;
4041 
4042   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4043   return false;
4044 }
4045 
4046 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4047   uint32_t Major;
4048   uint32_t Minor;
4049   uint32_t Stepping;
4050   StringRef VendorName;
4051   StringRef ArchName;
4052 
4053   // If this directive has no arguments, then use the ISA version for the
4054   // targeted GPU.
4055   if (getLexer().is(AsmToken::EndOfStatement)) {
4056     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4057     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4058                                                       ISA.Stepping,
4059                                                       "AMD", "AMDGPU");
4060     return false;
4061   }
4062 
4063   if (ParseDirectiveMajorMinor(Major, Minor))
4064     return true;
4065 
4066   if (getLexer().isNot(AsmToken::Comma))
4067     return TokError("stepping version number required, comma expected");
4068   Lex();
4069 
4070   if (ParseAsAbsoluteExpression(Stepping))
4071     return TokError("invalid stepping version");
4072 
4073   if (getLexer().isNot(AsmToken::Comma))
4074     return TokError("vendor name required, comma expected");
4075   Lex();
4076 
4077   if (getLexer().isNot(AsmToken::String))
4078     return TokError("invalid vendor name");
4079 
4080   VendorName = getLexer().getTok().getStringContents();
4081   Lex();
4082 
4083   if (getLexer().isNot(AsmToken::Comma))
4084     return TokError("arch name required, comma expected");
4085   Lex();
4086 
4087   if (getLexer().isNot(AsmToken::String))
4088     return TokError("invalid arch name");
4089 
4090   ArchName = getLexer().getTok().getStringContents();
4091   Lex();
4092 
4093   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4094                                                     VendorName, ArchName);
4095   return false;
4096 }
4097 
4098 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4099                                                amd_kernel_code_t &Header) {
4100   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4101   // assembly for backwards compatibility.
4102   if (ID == "max_scratch_backing_memory_byte_size") {
4103     Parser.eatToEndOfStatement();
4104     return false;
4105   }
4106 
4107   SmallString<40> ErrStr;
4108   raw_svector_ostream Err(ErrStr);
4109   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4110     return TokError(Err.str());
4111   }
4112   Lex();
4113 
4114   if (ID == "enable_wavefront_size32") {
4115     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4116       if (!isGFX10())
4117         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4118       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4119         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4120     } else {
4121       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4122         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4123     }
4124   }
4125 
4126   if (ID == "wavefront_size") {
4127     if (Header.wavefront_size == 5) {
4128       if (!isGFX10())
4129         return TokError("wavefront_size=5 is only allowed on GFX10+");
4130       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4131         return TokError("wavefront_size=5 requires +WavefrontSize32");
4132     } else if (Header.wavefront_size == 6) {
4133       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4134         return TokError("wavefront_size=6 requires +WavefrontSize64");
4135     }
4136   }
4137 
4138   if (ID == "enable_wgp_mode") {
4139     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4140       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4141   }
4142 
4143   if (ID == "enable_mem_ordered") {
4144     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4145       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4146   }
4147 
4148   if (ID == "enable_fwd_progress") {
4149     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4150       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4151   }
4152 
4153   return false;
4154 }
4155 
4156 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4157   amd_kernel_code_t Header;
4158   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4159 
4160   while (true) {
4161     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4162     // will set the current token to EndOfStatement.
4163     while(getLexer().is(AsmToken::EndOfStatement))
4164       Lex();
4165 
4166     if (getLexer().isNot(AsmToken::Identifier))
4167       return TokError("expected value identifier or .end_amd_kernel_code_t");
4168 
4169     StringRef ID = getLexer().getTok().getIdentifier();
4170     Lex();
4171 
4172     if (ID == ".end_amd_kernel_code_t")
4173       break;
4174 
4175     if (ParseAMDKernelCodeTValue(ID, Header))
4176       return true;
4177   }
4178 
4179   getTargetStreamer().EmitAMDKernelCodeT(Header);
4180 
4181   return false;
4182 }
4183 
4184 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4185   if (getLexer().isNot(AsmToken::Identifier))
4186     return TokError("expected symbol name");
4187 
4188   StringRef KernelName = Parser.getTok().getString();
4189 
4190   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4191                                            ELF::STT_AMDGPU_HSA_KERNEL);
4192   Lex();
4193   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4194     KernelScope.initialize(getContext());
4195   return false;
4196 }
4197 
4198 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4199   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4200     return Error(getParser().getTok().getLoc(),
4201                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4202                  "architectures");
4203   }
4204 
4205   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4206 
4207   std::string ISAVersionStringFromSTI;
4208   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4209   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4210 
4211   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4212     return Error(getParser().getTok().getLoc(),
4213                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4214                  "arguments specified through the command line");
4215   }
4216 
4217   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4218   Lex();
4219 
4220   return false;
4221 }
4222 
4223 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4224   const char *AssemblerDirectiveBegin;
4225   const char *AssemblerDirectiveEnd;
4226   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4227       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4228           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4229                             HSAMD::V3::AssemblerDirectiveEnd)
4230           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4231                             HSAMD::AssemblerDirectiveEnd);
4232 
4233   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4234     return Error(getParser().getTok().getLoc(),
4235                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4236                  "not available on non-amdhsa OSes")).str());
4237   }
4238 
4239   std::string HSAMetadataString;
4240   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4241                           HSAMetadataString))
4242     return true;
4243 
4244   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4245     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4246       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4247   } else {
4248     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4249       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4250   }
4251 
4252   return false;
4253 }
4254 
4255 /// Common code to parse out a block of text (typically YAML) between start and
4256 /// end directives.
4257 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4258                                           const char *AssemblerDirectiveEnd,
4259                                           std::string &CollectString) {
4260 
4261   raw_string_ostream CollectStream(CollectString);
4262 
4263   getLexer().setSkipSpace(false);
4264 
4265   bool FoundEnd = false;
4266   while (!getLexer().is(AsmToken::Eof)) {
4267     while (getLexer().is(AsmToken::Space)) {
4268       CollectStream << getLexer().getTok().getString();
4269       Lex();
4270     }
4271 
4272     if (getLexer().is(AsmToken::Identifier)) {
4273       StringRef ID = getLexer().getTok().getIdentifier();
4274       if (ID == AssemblerDirectiveEnd) {
4275         Lex();
4276         FoundEnd = true;
4277         break;
4278       }
4279     }
4280 
4281     CollectStream << Parser.parseStringToEndOfStatement()
4282                   << getContext().getAsmInfo()->getSeparatorString();
4283 
4284     Parser.eatToEndOfStatement();
4285   }
4286 
4287   getLexer().setSkipSpace(true);
4288 
4289   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4290     return TokError(Twine("expected directive ") +
4291                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4292   }
4293 
4294   CollectStream.flush();
4295   return false;
4296 }
4297 
4298 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4299 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4300   std::string String;
4301   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4302                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4303     return true;
4304 
4305   auto PALMetadata = getTargetStreamer().getPALMetadata();
4306   if (!PALMetadata->setFromString(String))
4307     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4308   return false;
4309 }
4310 
4311 /// Parse the assembler directive for old linear-format PAL metadata.
4312 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4313   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4314     return Error(getParser().getTok().getLoc(),
4315                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4316                  "not available on non-amdpal OSes")).str());
4317   }
4318 
4319   auto PALMetadata = getTargetStreamer().getPALMetadata();
4320   PALMetadata->setLegacy();
4321   for (;;) {
4322     uint32_t Key, Value;
4323     if (ParseAsAbsoluteExpression(Key)) {
4324       return TokError(Twine("invalid value in ") +
4325                       Twine(PALMD::AssemblerDirective));
4326     }
4327     if (getLexer().isNot(AsmToken::Comma)) {
4328       return TokError(Twine("expected an even number of values in ") +
4329                       Twine(PALMD::AssemblerDirective));
4330     }
4331     Lex();
4332     if (ParseAsAbsoluteExpression(Value)) {
4333       return TokError(Twine("invalid value in ") +
4334                       Twine(PALMD::AssemblerDirective));
4335     }
4336     PALMetadata->setRegister(Key, Value);
4337     if (getLexer().isNot(AsmToken::Comma))
4338       break;
4339     Lex();
4340   }
4341   return false;
4342 }
4343 
4344 /// ParseDirectiveAMDGPULDS
4345 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4346 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4347   if (getParser().checkForValidSection())
4348     return true;
4349 
4350   StringRef Name;
4351   SMLoc NameLoc = getLexer().getLoc();
4352   if (getParser().parseIdentifier(Name))
4353     return TokError("expected identifier in directive");
4354 
4355   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4356   if (parseToken(AsmToken::Comma, "expected ','"))
4357     return true;
4358 
4359   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4360 
4361   int64_t Size;
4362   SMLoc SizeLoc = getLexer().getLoc();
4363   if (getParser().parseAbsoluteExpression(Size))
4364     return true;
4365   if (Size < 0)
4366     return Error(SizeLoc, "size must be non-negative");
4367   if (Size > LocalMemorySize)
4368     return Error(SizeLoc, "size is too large");
4369 
4370   int64_t Align = 4;
4371   if (getLexer().is(AsmToken::Comma)) {
4372     Lex();
4373     SMLoc AlignLoc = getLexer().getLoc();
4374     if (getParser().parseAbsoluteExpression(Align))
4375       return true;
4376     if (Align < 0 || !isPowerOf2_64(Align))
4377       return Error(AlignLoc, "alignment must be a power of two");
4378 
4379     // Alignment larger than the size of LDS is possible in theory, as long
4380     // as the linker manages to place to symbol at address 0, but we do want
4381     // to make sure the alignment fits nicely into a 32-bit integer.
4382     if (Align >= 1u << 31)
4383       return Error(AlignLoc, "alignment is too large");
4384   }
4385 
4386   if (parseToken(AsmToken::EndOfStatement,
4387                  "unexpected token in '.amdgpu_lds' directive"))
4388     return true;
4389 
4390   Symbol->redefineIfPossible();
4391   if (!Symbol->isUndefined())
4392     return Error(NameLoc, "invalid symbol redefinition");
4393 
4394   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4395   return false;
4396 }
4397 
4398 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4399   StringRef IDVal = DirectiveID.getString();
4400 
4401   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4402     if (IDVal == ".amdgcn_target")
4403       return ParseDirectiveAMDGCNTarget();
4404 
4405     if (IDVal == ".amdhsa_kernel")
4406       return ParseDirectiveAMDHSAKernel();
4407 
4408     // TODO: Restructure/combine with PAL metadata directive.
4409     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4410       return ParseDirectiveHSAMetadata();
4411   } else {
4412     if (IDVal == ".hsa_code_object_version")
4413       return ParseDirectiveHSACodeObjectVersion();
4414 
4415     if (IDVal == ".hsa_code_object_isa")
4416       return ParseDirectiveHSACodeObjectISA();
4417 
4418     if (IDVal == ".amd_kernel_code_t")
4419       return ParseDirectiveAMDKernelCodeT();
4420 
4421     if (IDVal == ".amdgpu_hsa_kernel")
4422       return ParseDirectiveAMDGPUHsaKernel();
4423 
4424     if (IDVal == ".amd_amdgpu_isa")
4425       return ParseDirectiveISAVersion();
4426 
4427     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4428       return ParseDirectiveHSAMetadata();
4429   }
4430 
4431   if (IDVal == ".amdgpu_lds")
4432     return ParseDirectiveAMDGPULDS();
4433 
4434   if (IDVal == PALMD::AssemblerDirectiveBegin)
4435     return ParseDirectivePALMetadataBegin();
4436 
4437   if (IDVal == PALMD::AssemblerDirective)
4438     return ParseDirectivePALMetadata();
4439 
4440   return true;
4441 }
4442 
4443 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4444                                            unsigned RegNo) const {
4445 
4446   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4447        R.isValid(); ++R) {
4448     if (*R == RegNo)
4449       return isGFX9() || isGFX10();
4450   }
4451 
4452   // GFX10 has 2 more SGPRs 104 and 105.
4453   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4454        R.isValid(); ++R) {
4455     if (*R == RegNo)
4456       return hasSGPR104_SGPR105();
4457   }
4458 
4459   switch (RegNo) {
4460   case AMDGPU::SRC_SHARED_BASE:
4461   case AMDGPU::SRC_SHARED_LIMIT:
4462   case AMDGPU::SRC_PRIVATE_BASE:
4463   case AMDGPU::SRC_PRIVATE_LIMIT:
4464   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4465     return !isCI() && !isSI() && !isVI();
4466   case AMDGPU::TBA:
4467   case AMDGPU::TBA_LO:
4468   case AMDGPU::TBA_HI:
4469   case AMDGPU::TMA:
4470   case AMDGPU::TMA_LO:
4471   case AMDGPU::TMA_HI:
4472     return !isGFX9() && !isGFX10();
4473   case AMDGPU::XNACK_MASK:
4474   case AMDGPU::XNACK_MASK_LO:
4475   case AMDGPU::XNACK_MASK_HI:
4476     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4477   case AMDGPU::SGPR_NULL:
4478     return isGFX10();
4479   default:
4480     break;
4481   }
4482 
4483   if (isCI())
4484     return true;
4485 
4486   if (isSI() || isGFX10()) {
4487     // No flat_scr on SI.
4488     // On GFX10 flat scratch is not a valid register operand and can only be
4489     // accessed with s_setreg/s_getreg.
4490     switch (RegNo) {
4491     case AMDGPU::FLAT_SCR:
4492     case AMDGPU::FLAT_SCR_LO:
4493     case AMDGPU::FLAT_SCR_HI:
4494       return false;
4495     default:
4496       return true;
4497     }
4498   }
4499 
4500   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4501   // SI/CI have.
4502   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4503        R.isValid(); ++R) {
4504     if (*R == RegNo)
4505       return hasSGPR102_SGPR103();
4506   }
4507 
4508   return true;
4509 }
4510 
4511 OperandMatchResultTy
4512 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4513                               OperandMode Mode) {
4514   // Try to parse with a custom parser
4515   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4516 
4517   // If we successfully parsed the operand or if there as an error parsing,
4518   // we are done.
4519   //
4520   // If we are parsing after we reach EndOfStatement then this means we
4521   // are appending default values to the Operands list.  This is only done
4522   // by custom parser, so we shouldn't continue on to the generic parsing.
4523   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4524       getLexer().is(AsmToken::EndOfStatement))
4525     return ResTy;
4526 
4527   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4528     unsigned Prefix = Operands.size();
4529     SMLoc LBraceLoc = getTok().getLoc();
4530     Parser.Lex(); // eat the '['
4531 
4532     for (;;) {
4533       ResTy = parseReg(Operands);
4534       if (ResTy != MatchOperand_Success)
4535         return ResTy;
4536 
4537       if (getLexer().is(AsmToken::RBrac))
4538         break;
4539 
4540       if (getLexer().isNot(AsmToken::Comma))
4541         return MatchOperand_ParseFail;
4542       Parser.Lex();
4543     }
4544 
4545     if (Operands.size() - Prefix > 1) {
4546       Operands.insert(Operands.begin() + Prefix,
4547                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4548       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4549                                                     getTok().getLoc()));
4550     }
4551 
4552     Parser.Lex(); // eat the ']'
4553     return MatchOperand_Success;
4554   }
4555 
4556   return parseRegOrImm(Operands);
4557 }
4558 
4559 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4560   // Clear any forced encodings from the previous instruction.
4561   setForcedEncodingSize(0);
4562   setForcedDPP(false);
4563   setForcedSDWA(false);
4564 
4565   if (Name.endswith("_e64")) {
4566     setForcedEncodingSize(64);
4567     return Name.substr(0, Name.size() - 4);
4568   } else if (Name.endswith("_e32")) {
4569     setForcedEncodingSize(32);
4570     return Name.substr(0, Name.size() - 4);
4571   } else if (Name.endswith("_dpp")) {
4572     setForcedDPP(true);
4573     return Name.substr(0, Name.size() - 4);
4574   } else if (Name.endswith("_sdwa")) {
4575     setForcedSDWA(true);
4576     return Name.substr(0, Name.size() - 5);
4577   }
4578   return Name;
4579 }
4580 
4581 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4582                                        StringRef Name,
4583                                        SMLoc NameLoc, OperandVector &Operands) {
4584   // Add the instruction mnemonic
4585   Name = parseMnemonicSuffix(Name);
4586   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4587 
4588   bool IsMIMG = Name.startswith("image_");
4589 
4590   while (!getLexer().is(AsmToken::EndOfStatement)) {
4591     OperandMode Mode = OperandMode_Default;
4592     if (IsMIMG && isGFX10() && Operands.size() == 2)
4593       Mode = OperandMode_NSA;
4594     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4595 
4596     // Eat the comma or space if there is one.
4597     if (getLexer().is(AsmToken::Comma))
4598       Parser.Lex();
4599 
4600     switch (Res) {
4601       case MatchOperand_Success: break;
4602       case MatchOperand_ParseFail:
4603         // FIXME: use real operand location rather than the current location.
4604         Error(getLexer().getLoc(), "failed parsing operand.");
4605         while (!getLexer().is(AsmToken::EndOfStatement)) {
4606           Parser.Lex();
4607         }
4608         return true;
4609       case MatchOperand_NoMatch:
4610         // FIXME: use real operand location rather than the current location.
4611         Error(getLexer().getLoc(), "not a valid operand.");
4612         while (!getLexer().is(AsmToken::EndOfStatement)) {
4613           Parser.Lex();
4614         }
4615         return true;
4616     }
4617   }
4618 
4619   return false;
4620 }
4621 
4622 //===----------------------------------------------------------------------===//
4623 // Utility functions
4624 //===----------------------------------------------------------------------===//
4625 
4626 OperandMatchResultTy
4627 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4628 
4629   if (!trySkipId(Prefix, AsmToken::Colon))
4630     return MatchOperand_NoMatch;
4631 
4632   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4633 }
4634 
4635 OperandMatchResultTy
4636 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4637                                     AMDGPUOperand::ImmTy ImmTy,
4638                                     bool (*ConvertResult)(int64_t&)) {
4639   SMLoc S = getLoc();
4640   int64_t Value = 0;
4641 
4642   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4643   if (Res != MatchOperand_Success)
4644     return Res;
4645 
4646   if (ConvertResult && !ConvertResult(Value)) {
4647     Error(S, "invalid " + StringRef(Prefix) + " value.");
4648   }
4649 
4650   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4651   return MatchOperand_Success;
4652 }
4653 
4654 OperandMatchResultTy
4655 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4656                                              OperandVector &Operands,
4657                                              AMDGPUOperand::ImmTy ImmTy,
4658                                              bool (*ConvertResult)(int64_t&)) {
4659   SMLoc S = getLoc();
4660   if (!trySkipId(Prefix, AsmToken::Colon))
4661     return MatchOperand_NoMatch;
4662 
4663   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4664     return MatchOperand_ParseFail;
4665 
4666   unsigned Val = 0;
4667   const unsigned MaxSize = 4;
4668 
4669   // FIXME: How to verify the number of elements matches the number of src
4670   // operands?
4671   for (int I = 0; ; ++I) {
4672     int64_t Op;
4673     SMLoc Loc = getLoc();
4674     if (!parseExpr(Op))
4675       return MatchOperand_ParseFail;
4676 
4677     if (Op != 0 && Op != 1) {
4678       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4679       return MatchOperand_ParseFail;
4680     }
4681 
4682     Val |= (Op << I);
4683 
4684     if (trySkipToken(AsmToken::RBrac))
4685       break;
4686 
4687     if (I + 1 == MaxSize) {
4688       Error(getLoc(), "expected a closing square bracket");
4689       return MatchOperand_ParseFail;
4690     }
4691 
4692     if (!skipToken(AsmToken::Comma, "expected a comma"))
4693       return MatchOperand_ParseFail;
4694   }
4695 
4696   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4697   return MatchOperand_Success;
4698 }
4699 
4700 OperandMatchResultTy
4701 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4702                                AMDGPUOperand::ImmTy ImmTy) {
4703   int64_t Bit = 0;
4704   SMLoc S = Parser.getTok().getLoc();
4705 
4706   // We are at the end of the statement, and this is a default argument, so
4707   // use a default value.
4708   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4709     switch(getLexer().getKind()) {
4710       case AsmToken::Identifier: {
4711         StringRef Tok = Parser.getTok().getString();
4712         if (Tok == Name) {
4713           if (Tok == "r128" && !hasMIMG_R128())
4714             Error(S, "r128 modifier is not supported on this GPU");
4715           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
4716             Error(S, "a16 modifier is not supported on this GPU");
4717           Bit = 1;
4718           Parser.Lex();
4719         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4720           Bit = 0;
4721           Parser.Lex();
4722         } else {
4723           return MatchOperand_NoMatch;
4724         }
4725         break;
4726       }
4727       default:
4728         return MatchOperand_NoMatch;
4729     }
4730   }
4731 
4732   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4733     return MatchOperand_ParseFail;
4734 
4735   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
4736     ImmTy = AMDGPUOperand::ImmTyR128A16;
4737 
4738   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4739   return MatchOperand_Success;
4740 }
4741 
4742 static void addOptionalImmOperand(
4743   MCInst& Inst, const OperandVector& Operands,
4744   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4745   AMDGPUOperand::ImmTy ImmT,
4746   int64_t Default = 0) {
4747   auto i = OptionalIdx.find(ImmT);
4748   if (i != OptionalIdx.end()) {
4749     unsigned Idx = i->second;
4750     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4751   } else {
4752     Inst.addOperand(MCOperand::createImm(Default));
4753   }
4754 }
4755 
4756 OperandMatchResultTy
4757 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4758   if (getLexer().isNot(AsmToken::Identifier)) {
4759     return MatchOperand_NoMatch;
4760   }
4761   StringRef Tok = Parser.getTok().getString();
4762   if (Tok != Prefix) {
4763     return MatchOperand_NoMatch;
4764   }
4765 
4766   Parser.Lex();
4767   if (getLexer().isNot(AsmToken::Colon)) {
4768     return MatchOperand_ParseFail;
4769   }
4770 
4771   Parser.Lex();
4772   if (getLexer().isNot(AsmToken::Identifier)) {
4773     return MatchOperand_ParseFail;
4774   }
4775 
4776   Value = Parser.getTok().getString();
4777   return MatchOperand_Success;
4778 }
4779 
4780 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4781 // values to live in a joint format operand in the MCInst encoding.
4782 OperandMatchResultTy
4783 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4784   SMLoc S = Parser.getTok().getLoc();
4785   int64_t Dfmt = 0, Nfmt = 0;
4786   // dfmt and nfmt can appear in either order, and each is optional.
4787   bool GotDfmt = false, GotNfmt = false;
4788   while (!GotDfmt || !GotNfmt) {
4789     if (!GotDfmt) {
4790       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4791       if (Res != MatchOperand_NoMatch) {
4792         if (Res != MatchOperand_Success)
4793           return Res;
4794         if (Dfmt >= 16) {
4795           Error(Parser.getTok().getLoc(), "out of range dfmt");
4796           return MatchOperand_ParseFail;
4797         }
4798         GotDfmt = true;
4799         Parser.Lex();
4800         continue;
4801       }
4802     }
4803     if (!GotNfmt) {
4804       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4805       if (Res != MatchOperand_NoMatch) {
4806         if (Res != MatchOperand_Success)
4807           return Res;
4808         if (Nfmt >= 8) {
4809           Error(Parser.getTok().getLoc(), "out of range nfmt");
4810           return MatchOperand_ParseFail;
4811         }
4812         GotNfmt = true;
4813         Parser.Lex();
4814         continue;
4815       }
4816     }
4817     break;
4818   }
4819   if (!GotDfmt && !GotNfmt)
4820     return MatchOperand_NoMatch;
4821   auto Format = Dfmt | Nfmt << 4;
4822   Operands.push_back(
4823       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4824   return MatchOperand_Success;
4825 }
4826 
4827 //===----------------------------------------------------------------------===//
4828 // ds
4829 //===----------------------------------------------------------------------===//
4830 
4831 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4832                                     const OperandVector &Operands) {
4833   OptionalImmIndexMap OptionalIdx;
4834 
4835   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4836     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4837 
4838     // Add the register arguments
4839     if (Op.isReg()) {
4840       Op.addRegOperands(Inst, 1);
4841       continue;
4842     }
4843 
4844     // Handle optional arguments
4845     OptionalIdx[Op.getImmTy()] = i;
4846   }
4847 
4848   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4849   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4850   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4851 
4852   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4853 }
4854 
4855 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4856                                 bool IsGdsHardcoded) {
4857   OptionalImmIndexMap OptionalIdx;
4858 
4859   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4860     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4861 
4862     // Add the register arguments
4863     if (Op.isReg()) {
4864       Op.addRegOperands(Inst, 1);
4865       continue;
4866     }
4867 
4868     if (Op.isToken() && Op.getToken() == "gds") {
4869       IsGdsHardcoded = true;
4870       continue;
4871     }
4872 
4873     // Handle optional arguments
4874     OptionalIdx[Op.getImmTy()] = i;
4875   }
4876 
4877   AMDGPUOperand::ImmTy OffsetType =
4878     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4879      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4880      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4881                                                       AMDGPUOperand::ImmTyOffset;
4882 
4883   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4884 
4885   if (!IsGdsHardcoded) {
4886     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4887   }
4888   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4889 }
4890 
4891 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4892   OptionalImmIndexMap OptionalIdx;
4893 
4894   unsigned OperandIdx[4];
4895   unsigned EnMask = 0;
4896   int SrcIdx = 0;
4897 
4898   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4899     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4900 
4901     // Add the register arguments
4902     if (Op.isReg()) {
4903       assert(SrcIdx < 4);
4904       OperandIdx[SrcIdx] = Inst.size();
4905       Op.addRegOperands(Inst, 1);
4906       ++SrcIdx;
4907       continue;
4908     }
4909 
4910     if (Op.isOff()) {
4911       assert(SrcIdx < 4);
4912       OperandIdx[SrcIdx] = Inst.size();
4913       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4914       ++SrcIdx;
4915       continue;
4916     }
4917 
4918     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4919       Op.addImmOperands(Inst, 1);
4920       continue;
4921     }
4922 
4923     if (Op.isToken() && Op.getToken() == "done")
4924       continue;
4925 
4926     // Handle optional arguments
4927     OptionalIdx[Op.getImmTy()] = i;
4928   }
4929 
4930   assert(SrcIdx == 4);
4931 
4932   bool Compr = false;
4933   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4934     Compr = true;
4935     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4936     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4937     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4938   }
4939 
4940   for (auto i = 0; i < SrcIdx; ++i) {
4941     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4942       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4943     }
4944   }
4945 
4946   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4947   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4948 
4949   Inst.addOperand(MCOperand::createImm(EnMask));
4950 }
4951 
4952 //===----------------------------------------------------------------------===//
4953 // s_waitcnt
4954 //===----------------------------------------------------------------------===//
4955 
4956 static bool
4957 encodeCnt(
4958   const AMDGPU::IsaVersion ISA,
4959   int64_t &IntVal,
4960   int64_t CntVal,
4961   bool Saturate,
4962   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4963   unsigned (*decode)(const IsaVersion &Version, unsigned))
4964 {
4965   bool Failed = false;
4966 
4967   IntVal = encode(ISA, IntVal, CntVal);
4968   if (CntVal != decode(ISA, IntVal)) {
4969     if (Saturate) {
4970       IntVal = encode(ISA, IntVal, -1);
4971     } else {
4972       Failed = true;
4973     }
4974   }
4975   return Failed;
4976 }
4977 
4978 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4979 
4980   SMLoc CntLoc = getLoc();
4981   StringRef CntName = getTokenStr();
4982 
4983   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4984       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4985     return false;
4986 
4987   int64_t CntVal;
4988   SMLoc ValLoc = getLoc();
4989   if (!parseExpr(CntVal))
4990     return false;
4991 
4992   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4993 
4994   bool Failed = true;
4995   bool Sat = CntName.endswith("_sat");
4996 
4997   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4998     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4999   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5000     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5001   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5002     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5003   } else {
5004     Error(CntLoc, "invalid counter name " + CntName);
5005     return false;
5006   }
5007 
5008   if (Failed) {
5009     Error(ValLoc, "too large value for " + CntName);
5010     return false;
5011   }
5012 
5013   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5014     return false;
5015 
5016   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5017     if (isToken(AsmToken::EndOfStatement)) {
5018       Error(getLoc(), "expected a counter name");
5019       return false;
5020     }
5021   }
5022 
5023   return true;
5024 }
5025 
5026 OperandMatchResultTy
5027 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5028   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5029   int64_t Waitcnt = getWaitcntBitMask(ISA);
5030   SMLoc S = getLoc();
5031 
5032   // If parse failed, do not return error code
5033   // to avoid excessive error messages.
5034   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5035     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
5036   } else {
5037     parseExpr(Waitcnt);
5038   }
5039 
5040   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5041   return MatchOperand_Success;
5042 }
5043 
5044 bool
5045 AMDGPUOperand::isSWaitCnt() const {
5046   return isImm();
5047 }
5048 
5049 //===----------------------------------------------------------------------===//
5050 // hwreg
5051 //===----------------------------------------------------------------------===//
5052 
5053 bool
5054 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5055                                 int64_t &Offset,
5056                                 int64_t &Width) {
5057   using namespace llvm::AMDGPU::Hwreg;
5058 
5059   // The register may be specified by name or using a numeric code
5060   if (isToken(AsmToken::Identifier) &&
5061       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5062     HwReg.IsSymbolic = true;
5063     lex(); // skip message name
5064   } else if (!parseExpr(HwReg.Id)) {
5065     return false;
5066   }
5067 
5068   if (trySkipToken(AsmToken::RParen))
5069     return true;
5070 
5071   // parse optional params
5072   return
5073     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
5074     parseExpr(Offset) &&
5075     skipToken(AsmToken::Comma, "expected a comma") &&
5076     parseExpr(Width) &&
5077     skipToken(AsmToken::RParen, "expected a closing parenthesis");
5078 }
5079 
5080 bool
5081 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5082                                const int64_t Offset,
5083                                const int64_t Width,
5084                                const SMLoc Loc) {
5085 
5086   using namespace llvm::AMDGPU::Hwreg;
5087 
5088   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5089     Error(Loc, "specified hardware register is not supported on this GPU");
5090     return false;
5091   } else if (!isValidHwreg(HwReg.Id)) {
5092     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
5093     return false;
5094   } else if (!isValidHwregOffset(Offset)) {
5095     Error(Loc, "invalid bit offset: only 5-bit values are legal");
5096     return false;
5097   } else if (!isValidHwregWidth(Width)) {
5098     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
5099     return false;
5100   }
5101   return true;
5102 }
5103 
5104 OperandMatchResultTy
5105 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5106   using namespace llvm::AMDGPU::Hwreg;
5107 
5108   int64_t ImmVal = 0;
5109   SMLoc Loc = getLoc();
5110 
5111   // If parse failed, do not return error code
5112   // to avoid excessive error messages.
5113   if (trySkipId("hwreg", AsmToken::LParen)) {
5114     OperandInfoTy HwReg(ID_UNKNOWN_);
5115     int64_t Offset = OFFSET_DEFAULT_;
5116     int64_t Width = WIDTH_DEFAULT_;
5117     if (parseHwregBody(HwReg, Offset, Width) &&
5118         validateHwreg(HwReg, Offset, Width, Loc)) {
5119       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5120     }
5121   } else if (parseExpr(ImmVal)) {
5122     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5123       Error(Loc, "invalid immediate: only 16-bit values are legal");
5124   }
5125 
5126   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5127   return MatchOperand_Success;
5128 }
5129 
5130 bool AMDGPUOperand::isHwreg() const {
5131   return isImmTy(ImmTyHwreg);
5132 }
5133 
5134 //===----------------------------------------------------------------------===//
5135 // sendmsg
5136 //===----------------------------------------------------------------------===//
5137 
5138 bool
5139 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5140                                   OperandInfoTy &Op,
5141                                   OperandInfoTy &Stream) {
5142   using namespace llvm::AMDGPU::SendMsg;
5143 
5144   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5145     Msg.IsSymbolic = true;
5146     lex(); // skip message name
5147   } else if (!parseExpr(Msg.Id)) {
5148     return false;
5149   }
5150 
5151   if (trySkipToken(AsmToken::Comma)) {
5152     Op.IsDefined = true;
5153     if (isToken(AsmToken::Identifier) &&
5154         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5155       lex(); // skip operation name
5156     } else if (!parseExpr(Op.Id)) {
5157       return false;
5158     }
5159 
5160     if (trySkipToken(AsmToken::Comma)) {
5161       Stream.IsDefined = true;
5162       if (!parseExpr(Stream.Id))
5163         return false;
5164     }
5165   }
5166 
5167   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5168 }
5169 
5170 bool
5171 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5172                                  const OperandInfoTy &Op,
5173                                  const OperandInfoTy &Stream,
5174                                  const SMLoc S) {
5175   using namespace llvm::AMDGPU::SendMsg;
5176 
5177   // Validation strictness depends on whether message is specified
5178   // in a symbolc or in a numeric form. In the latter case
5179   // only encoding possibility is checked.
5180   bool Strict = Msg.IsSymbolic;
5181 
5182   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5183     Error(S, "invalid message id");
5184     return false;
5185   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5186     Error(S, Op.IsDefined ?
5187              "message does not support operations" :
5188              "missing message operation");
5189     return false;
5190   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5191     Error(S, "invalid operation id");
5192     return false;
5193   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5194     Error(S, "message operation does not support streams");
5195     return false;
5196   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5197     Error(S, "invalid message stream id");
5198     return false;
5199   }
5200   return true;
5201 }
5202 
5203 OperandMatchResultTy
5204 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5205   using namespace llvm::AMDGPU::SendMsg;
5206 
5207   int64_t ImmVal = 0;
5208   SMLoc Loc = getLoc();
5209 
5210   // If parse failed, do not return error code
5211   // to avoid excessive error messages.
5212   if (trySkipId("sendmsg", AsmToken::LParen)) {
5213     OperandInfoTy Msg(ID_UNKNOWN_);
5214     OperandInfoTy Op(OP_NONE_);
5215     OperandInfoTy Stream(STREAM_ID_NONE_);
5216     if (parseSendMsgBody(Msg, Op, Stream) &&
5217         validateSendMsg(Msg, Op, Stream, Loc)) {
5218       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5219     }
5220   } else if (parseExpr(ImmVal)) {
5221     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5222       Error(Loc, "invalid immediate: only 16-bit values are legal");
5223   }
5224 
5225   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5226   return MatchOperand_Success;
5227 }
5228 
5229 bool AMDGPUOperand::isSendMsg() const {
5230   return isImmTy(ImmTySendMsg);
5231 }
5232 
5233 //===----------------------------------------------------------------------===//
5234 // v_interp
5235 //===----------------------------------------------------------------------===//
5236 
5237 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5238   if (getLexer().getKind() != AsmToken::Identifier)
5239     return MatchOperand_NoMatch;
5240 
5241   StringRef Str = Parser.getTok().getString();
5242   int Slot = StringSwitch<int>(Str)
5243     .Case("p10", 0)
5244     .Case("p20", 1)
5245     .Case("p0", 2)
5246     .Default(-1);
5247 
5248   SMLoc S = Parser.getTok().getLoc();
5249   if (Slot == -1)
5250     return MatchOperand_ParseFail;
5251 
5252   Parser.Lex();
5253   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5254                                               AMDGPUOperand::ImmTyInterpSlot));
5255   return MatchOperand_Success;
5256 }
5257 
5258 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5259   if (getLexer().getKind() != AsmToken::Identifier)
5260     return MatchOperand_NoMatch;
5261 
5262   StringRef Str = Parser.getTok().getString();
5263   if (!Str.startswith("attr"))
5264     return MatchOperand_NoMatch;
5265 
5266   StringRef Chan = Str.take_back(2);
5267   int AttrChan = StringSwitch<int>(Chan)
5268     .Case(".x", 0)
5269     .Case(".y", 1)
5270     .Case(".z", 2)
5271     .Case(".w", 3)
5272     .Default(-1);
5273   if (AttrChan == -1)
5274     return MatchOperand_ParseFail;
5275 
5276   Str = Str.drop_back(2).drop_front(4);
5277 
5278   uint8_t Attr;
5279   if (Str.getAsInteger(10, Attr))
5280     return MatchOperand_ParseFail;
5281 
5282   SMLoc S = Parser.getTok().getLoc();
5283   Parser.Lex();
5284   if (Attr > 63) {
5285     Error(S, "out of bounds attr");
5286     return MatchOperand_Success;
5287   }
5288 
5289   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5290 
5291   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5292                                               AMDGPUOperand::ImmTyInterpAttr));
5293   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5294                                               AMDGPUOperand::ImmTyAttrChan));
5295   return MatchOperand_Success;
5296 }
5297 
5298 //===----------------------------------------------------------------------===//
5299 // exp
5300 //===----------------------------------------------------------------------===//
5301 
5302 void AMDGPUAsmParser::errorExpTgt() {
5303   Error(Parser.getTok().getLoc(), "invalid exp target");
5304 }
5305 
5306 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5307                                                       uint8_t &Val) {
5308   if (Str == "null") {
5309     Val = 9;
5310     return MatchOperand_Success;
5311   }
5312 
5313   if (Str.startswith("mrt")) {
5314     Str = Str.drop_front(3);
5315     if (Str == "z") { // == mrtz
5316       Val = 8;
5317       return MatchOperand_Success;
5318     }
5319 
5320     if (Str.getAsInteger(10, Val))
5321       return MatchOperand_ParseFail;
5322 
5323     if (Val > 7)
5324       errorExpTgt();
5325 
5326     return MatchOperand_Success;
5327   }
5328 
5329   if (Str.startswith("pos")) {
5330     Str = Str.drop_front(3);
5331     if (Str.getAsInteger(10, Val))
5332       return MatchOperand_ParseFail;
5333 
5334     if (Val > 4 || (Val == 4 && !isGFX10()))
5335       errorExpTgt();
5336 
5337     Val += 12;
5338     return MatchOperand_Success;
5339   }
5340 
5341   if (isGFX10() && Str == "prim") {
5342     Val = 20;
5343     return MatchOperand_Success;
5344   }
5345 
5346   if (Str.startswith("param")) {
5347     Str = Str.drop_front(5);
5348     if (Str.getAsInteger(10, Val))
5349       return MatchOperand_ParseFail;
5350 
5351     if (Val >= 32)
5352       errorExpTgt();
5353 
5354     Val += 32;
5355     return MatchOperand_Success;
5356   }
5357 
5358   if (Str.startswith("invalid_target_")) {
5359     Str = Str.drop_front(15);
5360     if (Str.getAsInteger(10, Val))
5361       return MatchOperand_ParseFail;
5362 
5363     errorExpTgt();
5364     return MatchOperand_Success;
5365   }
5366 
5367   return MatchOperand_NoMatch;
5368 }
5369 
5370 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5371   uint8_t Val;
5372   StringRef Str = Parser.getTok().getString();
5373 
5374   auto Res = parseExpTgtImpl(Str, Val);
5375   if (Res != MatchOperand_Success)
5376     return Res;
5377 
5378   SMLoc S = Parser.getTok().getLoc();
5379   Parser.Lex();
5380 
5381   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5382                                               AMDGPUOperand::ImmTyExpTgt));
5383   return MatchOperand_Success;
5384 }
5385 
5386 //===----------------------------------------------------------------------===//
5387 // parser helpers
5388 //===----------------------------------------------------------------------===//
5389 
5390 bool
5391 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5392   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5393 }
5394 
5395 bool
5396 AMDGPUAsmParser::isId(const StringRef Id) const {
5397   return isId(getToken(), Id);
5398 }
5399 
5400 bool
5401 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5402   return getTokenKind() == Kind;
5403 }
5404 
5405 bool
5406 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5407   if (isId(Id)) {
5408     lex();
5409     return true;
5410   }
5411   return false;
5412 }
5413 
5414 bool
5415 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5416   if (isId(Id) && peekToken().is(Kind)) {
5417     lex();
5418     lex();
5419     return true;
5420   }
5421   return false;
5422 }
5423 
5424 bool
5425 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5426   if (isToken(Kind)) {
5427     lex();
5428     return true;
5429   }
5430   return false;
5431 }
5432 
5433 bool
5434 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5435                            const StringRef ErrMsg) {
5436   if (!trySkipToken(Kind)) {
5437     Error(getLoc(), ErrMsg);
5438     return false;
5439   }
5440   return true;
5441 }
5442 
5443 bool
5444 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5445   return !getParser().parseAbsoluteExpression(Imm);
5446 }
5447 
5448 bool
5449 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5450   SMLoc S = getLoc();
5451 
5452   const MCExpr *Expr;
5453   if (Parser.parseExpression(Expr))
5454     return false;
5455 
5456   int64_t IntVal;
5457   if (Expr->evaluateAsAbsolute(IntVal)) {
5458     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5459   } else {
5460     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5461   }
5462   return true;
5463 }
5464 
5465 bool
5466 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5467   if (isToken(AsmToken::String)) {
5468     Val = getToken().getStringContents();
5469     lex();
5470     return true;
5471   } else {
5472     Error(getLoc(), ErrMsg);
5473     return false;
5474   }
5475 }
5476 
5477 AsmToken
5478 AMDGPUAsmParser::getToken() const {
5479   return Parser.getTok();
5480 }
5481 
5482 AsmToken
5483 AMDGPUAsmParser::peekToken() {
5484   return getLexer().peekTok();
5485 }
5486 
5487 void
5488 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5489   auto TokCount = getLexer().peekTokens(Tokens);
5490 
5491   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5492     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5493 }
5494 
5495 AsmToken::TokenKind
5496 AMDGPUAsmParser::getTokenKind() const {
5497   return getLexer().getKind();
5498 }
5499 
5500 SMLoc
5501 AMDGPUAsmParser::getLoc() const {
5502   return getToken().getLoc();
5503 }
5504 
5505 StringRef
5506 AMDGPUAsmParser::getTokenStr() const {
5507   return getToken().getString();
5508 }
5509 
5510 void
5511 AMDGPUAsmParser::lex() {
5512   Parser.Lex();
5513 }
5514 
5515 //===----------------------------------------------------------------------===//
5516 // swizzle
5517 //===----------------------------------------------------------------------===//
5518 
5519 LLVM_READNONE
5520 static unsigned
5521 encodeBitmaskPerm(const unsigned AndMask,
5522                   const unsigned OrMask,
5523                   const unsigned XorMask) {
5524   using namespace llvm::AMDGPU::Swizzle;
5525 
5526   return BITMASK_PERM_ENC |
5527          (AndMask << BITMASK_AND_SHIFT) |
5528          (OrMask  << BITMASK_OR_SHIFT)  |
5529          (XorMask << BITMASK_XOR_SHIFT);
5530 }
5531 
5532 bool
5533 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5534                                       const unsigned MinVal,
5535                                       const unsigned MaxVal,
5536                                       const StringRef ErrMsg) {
5537   for (unsigned i = 0; i < OpNum; ++i) {
5538     if (!skipToken(AsmToken::Comma, "expected a comma")){
5539       return false;
5540     }
5541     SMLoc ExprLoc = Parser.getTok().getLoc();
5542     if (!parseExpr(Op[i])) {
5543       return false;
5544     }
5545     if (Op[i] < MinVal || Op[i] > MaxVal) {
5546       Error(ExprLoc, ErrMsg);
5547       return false;
5548     }
5549   }
5550 
5551   return true;
5552 }
5553 
5554 bool
5555 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5556   using namespace llvm::AMDGPU::Swizzle;
5557 
5558   int64_t Lane[LANE_NUM];
5559   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5560                            "expected a 2-bit lane id")) {
5561     Imm = QUAD_PERM_ENC;
5562     for (unsigned I = 0; I < LANE_NUM; ++I) {
5563       Imm |= Lane[I] << (LANE_SHIFT * I);
5564     }
5565     return true;
5566   }
5567   return false;
5568 }
5569 
5570 bool
5571 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5572   using namespace llvm::AMDGPU::Swizzle;
5573 
5574   SMLoc S = Parser.getTok().getLoc();
5575   int64_t GroupSize;
5576   int64_t LaneIdx;
5577 
5578   if (!parseSwizzleOperands(1, &GroupSize,
5579                             2, 32,
5580                             "group size must be in the interval [2,32]")) {
5581     return false;
5582   }
5583   if (!isPowerOf2_64(GroupSize)) {
5584     Error(S, "group size must be a power of two");
5585     return false;
5586   }
5587   if (parseSwizzleOperands(1, &LaneIdx,
5588                            0, GroupSize - 1,
5589                            "lane id must be in the interval [0,group size - 1]")) {
5590     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5591     return true;
5592   }
5593   return false;
5594 }
5595 
5596 bool
5597 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5598   using namespace llvm::AMDGPU::Swizzle;
5599 
5600   SMLoc S = Parser.getTok().getLoc();
5601   int64_t GroupSize;
5602 
5603   if (!parseSwizzleOperands(1, &GroupSize,
5604       2, 32, "group size must be in the interval [2,32]")) {
5605     return false;
5606   }
5607   if (!isPowerOf2_64(GroupSize)) {
5608     Error(S, "group size must be a power of two");
5609     return false;
5610   }
5611 
5612   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5613   return true;
5614 }
5615 
5616 bool
5617 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5618   using namespace llvm::AMDGPU::Swizzle;
5619 
5620   SMLoc S = Parser.getTok().getLoc();
5621   int64_t GroupSize;
5622 
5623   if (!parseSwizzleOperands(1, &GroupSize,
5624       1, 16, "group size must be in the interval [1,16]")) {
5625     return false;
5626   }
5627   if (!isPowerOf2_64(GroupSize)) {
5628     Error(S, "group size must be a power of two");
5629     return false;
5630   }
5631 
5632   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5633   return true;
5634 }
5635 
5636 bool
5637 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5638   using namespace llvm::AMDGPU::Swizzle;
5639 
5640   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5641     return false;
5642   }
5643 
5644   StringRef Ctl;
5645   SMLoc StrLoc = Parser.getTok().getLoc();
5646   if (!parseString(Ctl)) {
5647     return false;
5648   }
5649   if (Ctl.size() != BITMASK_WIDTH) {
5650     Error(StrLoc, "expected a 5-character mask");
5651     return false;
5652   }
5653 
5654   unsigned AndMask = 0;
5655   unsigned OrMask = 0;
5656   unsigned XorMask = 0;
5657 
5658   for (size_t i = 0; i < Ctl.size(); ++i) {
5659     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5660     switch(Ctl[i]) {
5661     default:
5662       Error(StrLoc, "invalid mask");
5663       return false;
5664     case '0':
5665       break;
5666     case '1':
5667       OrMask |= Mask;
5668       break;
5669     case 'p':
5670       AndMask |= Mask;
5671       break;
5672     case 'i':
5673       AndMask |= Mask;
5674       XorMask |= Mask;
5675       break;
5676     }
5677   }
5678 
5679   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5680   return true;
5681 }
5682 
5683 bool
5684 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5685 
5686   SMLoc OffsetLoc = Parser.getTok().getLoc();
5687 
5688   if (!parseExpr(Imm)) {
5689     return false;
5690   }
5691   if (!isUInt<16>(Imm)) {
5692     Error(OffsetLoc, "expected a 16-bit offset");
5693     return false;
5694   }
5695   return true;
5696 }
5697 
5698 bool
5699 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5700   using namespace llvm::AMDGPU::Swizzle;
5701 
5702   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5703 
5704     SMLoc ModeLoc = Parser.getTok().getLoc();
5705     bool Ok = false;
5706 
5707     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5708       Ok = parseSwizzleQuadPerm(Imm);
5709     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5710       Ok = parseSwizzleBitmaskPerm(Imm);
5711     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5712       Ok = parseSwizzleBroadcast(Imm);
5713     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5714       Ok = parseSwizzleSwap(Imm);
5715     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5716       Ok = parseSwizzleReverse(Imm);
5717     } else {
5718       Error(ModeLoc, "expected a swizzle mode");
5719     }
5720 
5721     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5722   }
5723 
5724   return false;
5725 }
5726 
5727 OperandMatchResultTy
5728 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5729   SMLoc S = Parser.getTok().getLoc();
5730   int64_t Imm = 0;
5731 
5732   if (trySkipId("offset")) {
5733 
5734     bool Ok = false;
5735     if (skipToken(AsmToken::Colon, "expected a colon")) {
5736       if (trySkipId("swizzle")) {
5737         Ok = parseSwizzleMacro(Imm);
5738       } else {
5739         Ok = parseSwizzleOffset(Imm);
5740       }
5741     }
5742 
5743     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5744 
5745     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5746   } else {
5747     // Swizzle "offset" operand is optional.
5748     // If it is omitted, try parsing other optional operands.
5749     return parseOptionalOpr(Operands);
5750   }
5751 }
5752 
5753 bool
5754 AMDGPUOperand::isSwizzle() const {
5755   return isImmTy(ImmTySwizzle);
5756 }
5757 
5758 //===----------------------------------------------------------------------===//
5759 // VGPR Index Mode
5760 //===----------------------------------------------------------------------===//
5761 
5762 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5763 
5764   using namespace llvm::AMDGPU::VGPRIndexMode;
5765 
5766   if (trySkipToken(AsmToken::RParen)) {
5767     return OFF;
5768   }
5769 
5770   int64_t Imm = 0;
5771 
5772   while (true) {
5773     unsigned Mode = 0;
5774     SMLoc S = Parser.getTok().getLoc();
5775 
5776     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5777       if (trySkipId(IdSymbolic[ModeId])) {
5778         Mode = 1 << ModeId;
5779         break;
5780       }
5781     }
5782 
5783     if (Mode == 0) {
5784       Error(S, (Imm == 0)?
5785                "expected a VGPR index mode or a closing parenthesis" :
5786                "expected a VGPR index mode");
5787       break;
5788     }
5789 
5790     if (Imm & Mode) {
5791       Error(S, "duplicate VGPR index mode");
5792       break;
5793     }
5794     Imm |= Mode;
5795 
5796     if (trySkipToken(AsmToken::RParen))
5797       break;
5798     if (!skipToken(AsmToken::Comma,
5799                    "expected a comma or a closing parenthesis"))
5800       break;
5801   }
5802 
5803   return Imm;
5804 }
5805 
5806 OperandMatchResultTy
5807 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5808 
5809   int64_t Imm = 0;
5810   SMLoc S = Parser.getTok().getLoc();
5811 
5812   if (getLexer().getKind() == AsmToken::Identifier &&
5813       Parser.getTok().getString() == "gpr_idx" &&
5814       getLexer().peekTok().is(AsmToken::LParen)) {
5815 
5816     Parser.Lex();
5817     Parser.Lex();
5818 
5819     // If parse failed, trigger an error but do not return error code
5820     // to avoid excessive error messages.
5821     Imm = parseGPRIdxMacro();
5822 
5823   } else {
5824     if (getParser().parseAbsoluteExpression(Imm))
5825       return MatchOperand_NoMatch;
5826     if (Imm < 0 || !isUInt<4>(Imm)) {
5827       Error(S, "invalid immediate: only 4-bit values are legal");
5828     }
5829   }
5830 
5831   Operands.push_back(
5832       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5833   return MatchOperand_Success;
5834 }
5835 
5836 bool AMDGPUOperand::isGPRIdxMode() const {
5837   return isImmTy(ImmTyGprIdxMode);
5838 }
5839 
5840 //===----------------------------------------------------------------------===//
5841 // sopp branch targets
5842 //===----------------------------------------------------------------------===//
5843 
5844 OperandMatchResultTy
5845 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5846 
5847   // Make sure we are not parsing something
5848   // that looks like a label or an expression but is not.
5849   // This will improve error messages.
5850   if (isRegister() || isModifier())
5851     return MatchOperand_NoMatch;
5852 
5853   if (parseExpr(Operands)) {
5854 
5855     AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5856     assert(Opr.isImm() || Opr.isExpr());
5857     SMLoc Loc = Opr.getStartLoc();
5858 
5859     // Currently we do not support arbitrary expressions as branch targets.
5860     // Only labels and absolute expressions are accepted.
5861     if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5862       Error(Loc, "expected an absolute expression or a label");
5863     } else if (Opr.isImm() && !Opr.isS16Imm()) {
5864       Error(Loc, "expected a 16-bit signed jump offset");
5865     }
5866   }
5867 
5868   return MatchOperand_Success; // avoid excessive error messages
5869 }
5870 
5871 //===----------------------------------------------------------------------===//
5872 // Boolean holding registers
5873 //===----------------------------------------------------------------------===//
5874 
5875 OperandMatchResultTy
5876 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5877   return parseReg(Operands);
5878 }
5879 
5880 //===----------------------------------------------------------------------===//
5881 // mubuf
5882 //===----------------------------------------------------------------------===//
5883 
5884 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5885   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5886 }
5887 
5888 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5889   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5890 }
5891 
5892 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5893   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5894 }
5895 
5896 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5897                                const OperandVector &Operands,
5898                                bool IsAtomic,
5899                                bool IsAtomicReturn,
5900                                bool IsLds) {
5901   bool IsLdsOpcode = IsLds;
5902   bool HasLdsModifier = false;
5903   OptionalImmIndexMap OptionalIdx;
5904   assert(IsAtomicReturn ? IsAtomic : true);
5905   unsigned FirstOperandIdx = 1;
5906 
5907   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5908     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5909 
5910     // Add the register arguments
5911     if (Op.isReg()) {
5912       Op.addRegOperands(Inst, 1);
5913       // Insert a tied src for atomic return dst.
5914       // This cannot be postponed as subsequent calls to
5915       // addImmOperands rely on correct number of MC operands.
5916       if (IsAtomicReturn && i == FirstOperandIdx)
5917         Op.addRegOperands(Inst, 1);
5918       continue;
5919     }
5920 
5921     // Handle the case where soffset is an immediate
5922     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5923       Op.addImmOperands(Inst, 1);
5924       continue;
5925     }
5926 
5927     HasLdsModifier |= Op.isLDS();
5928 
5929     // Handle tokens like 'offen' which are sometimes hard-coded into the
5930     // asm string.  There are no MCInst operands for these.
5931     if (Op.isToken()) {
5932       continue;
5933     }
5934     assert(Op.isImm());
5935 
5936     // Handle optional arguments
5937     OptionalIdx[Op.getImmTy()] = i;
5938   }
5939 
5940   // This is a workaround for an llvm quirk which may result in an
5941   // incorrect instruction selection. Lds and non-lds versions of
5942   // MUBUF instructions are identical except that lds versions
5943   // have mandatory 'lds' modifier. However this modifier follows
5944   // optional modifiers and llvm asm matcher regards this 'lds'
5945   // modifier as an optional one. As a result, an lds version
5946   // of opcode may be selected even if it has no 'lds' modifier.
5947   if (IsLdsOpcode && !HasLdsModifier) {
5948     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5949     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5950       Inst.setOpcode(NoLdsOpcode);
5951       IsLdsOpcode = false;
5952     }
5953   }
5954 
5955   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5956   if (!IsAtomic) { // glc is hard-coded.
5957     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5958   }
5959   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5960 
5961   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5962     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5963   }
5964 
5965   if (isGFX10())
5966     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5967 }
5968 
5969 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5970   OptionalImmIndexMap OptionalIdx;
5971 
5972   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5973     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5974 
5975     // Add the register arguments
5976     if (Op.isReg()) {
5977       Op.addRegOperands(Inst, 1);
5978       continue;
5979     }
5980 
5981     // Handle the case where soffset is an immediate
5982     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5983       Op.addImmOperands(Inst, 1);
5984       continue;
5985     }
5986 
5987     // Handle tokens like 'offen' which are sometimes hard-coded into the
5988     // asm string.  There are no MCInst operands for these.
5989     if (Op.isToken()) {
5990       continue;
5991     }
5992     assert(Op.isImm());
5993 
5994     // Handle optional arguments
5995     OptionalIdx[Op.getImmTy()] = i;
5996   }
5997 
5998   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5999                         AMDGPUOperand::ImmTyOffset);
6000   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6001   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6002   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6003   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6004 
6005   if (isGFX10())
6006     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6007 }
6008 
6009 //===----------------------------------------------------------------------===//
6010 // mimg
6011 //===----------------------------------------------------------------------===//
6012 
6013 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6014                               bool IsAtomic) {
6015   unsigned I = 1;
6016   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6017   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6018     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6019   }
6020 
6021   if (IsAtomic) {
6022     // Add src, same as dst
6023     assert(Desc.getNumDefs() == 1);
6024     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6025   }
6026 
6027   OptionalImmIndexMap OptionalIdx;
6028 
6029   for (unsigned E = Operands.size(); I != E; ++I) {
6030     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6031 
6032     // Add the register arguments
6033     if (Op.isReg()) {
6034       Op.addRegOperands(Inst, 1);
6035     } else if (Op.isImmModifier()) {
6036       OptionalIdx[Op.getImmTy()] = I;
6037     } else if (!Op.isToken()) {
6038       llvm_unreachable("unexpected operand type");
6039     }
6040   }
6041 
6042   bool IsGFX10 = isGFX10();
6043 
6044   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6045   if (IsGFX10)
6046     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6047   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6048   if (IsGFX10)
6049     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6050   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6051   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6052   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6053   if (IsGFX10)
6054     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6055   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6056   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6057   if (!IsGFX10)
6058     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6059   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6060 }
6061 
6062 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6063   cvtMIMG(Inst, Operands, true);
6064 }
6065 
6066 //===----------------------------------------------------------------------===//
6067 // smrd
6068 //===----------------------------------------------------------------------===//
6069 
6070 bool AMDGPUOperand::isSMRDOffset8() const {
6071   return isImm() && isUInt<8>(getImm());
6072 }
6073 
6074 bool AMDGPUOperand::isSMRDOffset20() const {
6075   return isImm() && isUInt<20>(getImm());
6076 }
6077 
6078 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6079   // 32-bit literals are only supported on CI and we only want to use them
6080   // when the offset is > 8-bits.
6081   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6082 }
6083 
6084 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6085   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6086 }
6087 
6088 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
6089   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6090 }
6091 
6092 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6093   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6094 }
6095 
6096 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6097   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6098 }
6099 
6100 //===----------------------------------------------------------------------===//
6101 // vop3
6102 //===----------------------------------------------------------------------===//
6103 
6104 static bool ConvertOmodMul(int64_t &Mul) {
6105   if (Mul != 1 && Mul != 2 && Mul != 4)
6106     return false;
6107 
6108   Mul >>= 1;
6109   return true;
6110 }
6111 
6112 static bool ConvertOmodDiv(int64_t &Div) {
6113   if (Div == 1) {
6114     Div = 0;
6115     return true;
6116   }
6117 
6118   if (Div == 2) {
6119     Div = 3;
6120     return true;
6121   }
6122 
6123   return false;
6124 }
6125 
6126 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6127   if (BoundCtrl == 0) {
6128     BoundCtrl = 1;
6129     return true;
6130   }
6131 
6132   if (BoundCtrl == -1) {
6133     BoundCtrl = 0;
6134     return true;
6135   }
6136 
6137   return false;
6138 }
6139 
6140 // Note: the order in this table matches the order of operands in AsmString.
6141 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6142   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6143   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6144   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6145   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6146   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6147   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6148   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6149   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6150   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6151   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6152   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
6153   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6154   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6155   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6156   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6157   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6158   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6159   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6160   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6161   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6162   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6163   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6164   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6165   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6166   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6167   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6168   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6169   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6170   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6171   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6172   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6173   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6174   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6175   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6176   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6177   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6178   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6179   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6180   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6181   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6182   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6183   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6184   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6185   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6186 };
6187 
6188 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6189 
6190   OperandMatchResultTy res = parseOptionalOpr(Operands);
6191 
6192   // This is a hack to enable hardcoded mandatory operands which follow
6193   // optional operands.
6194   //
6195   // Current design assumes that all operands after the first optional operand
6196   // are also optional. However implementation of some instructions violates
6197   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6198   //
6199   // To alleviate this problem, we have to (implicitly) parse extra operands
6200   // to make sure autogenerated parser of custom operands never hit hardcoded
6201   // mandatory operands.
6202 
6203   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6204     if (res != MatchOperand_Success ||
6205         isToken(AsmToken::EndOfStatement))
6206       break;
6207 
6208     trySkipToken(AsmToken::Comma);
6209     res = parseOptionalOpr(Operands);
6210   }
6211 
6212   return res;
6213 }
6214 
6215 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6216   OperandMatchResultTy res;
6217   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6218     // try to parse any optional operand here
6219     if (Op.IsBit) {
6220       res = parseNamedBit(Op.Name, Operands, Op.Type);
6221     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6222       res = parseOModOperand(Operands);
6223     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6224                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6225                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6226       res = parseSDWASel(Operands, Op.Name, Op.Type);
6227     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6228       res = parseSDWADstUnused(Operands);
6229     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6230                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6231                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6232                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6233       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6234                                         Op.ConvertResult);
6235     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6236       res = parseDim(Operands);
6237     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6238       res = parseDfmtNfmt(Operands);
6239     } else {
6240       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6241     }
6242     if (res != MatchOperand_NoMatch) {
6243       return res;
6244     }
6245   }
6246   return MatchOperand_NoMatch;
6247 }
6248 
6249 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6250   StringRef Name = Parser.getTok().getString();
6251   if (Name == "mul") {
6252     return parseIntWithPrefix("mul", Operands,
6253                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6254   }
6255 
6256   if (Name == "div") {
6257     return parseIntWithPrefix("div", Operands,
6258                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6259   }
6260 
6261   return MatchOperand_NoMatch;
6262 }
6263 
6264 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6265   cvtVOP3P(Inst, Operands);
6266 
6267   int Opc = Inst.getOpcode();
6268 
6269   int SrcNum;
6270   const int Ops[] = { AMDGPU::OpName::src0,
6271                       AMDGPU::OpName::src1,
6272                       AMDGPU::OpName::src2 };
6273   for (SrcNum = 0;
6274        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6275        ++SrcNum);
6276   assert(SrcNum > 0);
6277 
6278   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6279   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6280 
6281   if ((OpSel & (1 << SrcNum)) != 0) {
6282     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6283     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6284     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6285   }
6286 }
6287 
6288 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6289       // 1. This operand is input modifiers
6290   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6291       // 2. This is not last operand
6292       && Desc.NumOperands > (OpNum + 1)
6293       // 3. Next operand is register class
6294       && Desc.OpInfo[OpNum + 1].RegClass != -1
6295       // 4. Next register is not tied to any other operand
6296       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6297 }
6298 
6299 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6300 {
6301   OptionalImmIndexMap OptionalIdx;
6302   unsigned Opc = Inst.getOpcode();
6303 
6304   unsigned I = 1;
6305   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6306   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6307     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6308   }
6309 
6310   for (unsigned E = Operands.size(); I != E; ++I) {
6311     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6312     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6313       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6314     } else if (Op.isInterpSlot() ||
6315                Op.isInterpAttr() ||
6316                Op.isAttrChan()) {
6317       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6318     } else if (Op.isImmModifier()) {
6319       OptionalIdx[Op.getImmTy()] = I;
6320     } else {
6321       llvm_unreachable("unhandled operand type");
6322     }
6323   }
6324 
6325   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6326     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6327   }
6328 
6329   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6330     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6331   }
6332 
6333   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6334     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6335   }
6336 }
6337 
6338 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6339                               OptionalImmIndexMap &OptionalIdx) {
6340   unsigned Opc = Inst.getOpcode();
6341 
6342   unsigned I = 1;
6343   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6344   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6345     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6346   }
6347 
6348   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6349     // This instruction has src modifiers
6350     for (unsigned E = Operands.size(); I != E; ++I) {
6351       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6352       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6353         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6354       } else if (Op.isImmModifier()) {
6355         OptionalIdx[Op.getImmTy()] = I;
6356       } else if (Op.isRegOrImm()) {
6357         Op.addRegOrImmOperands(Inst, 1);
6358       } else {
6359         llvm_unreachable("unhandled operand type");
6360       }
6361     }
6362   } else {
6363     // No src modifiers
6364     for (unsigned E = Operands.size(); I != E; ++I) {
6365       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6366       if (Op.isMod()) {
6367         OptionalIdx[Op.getImmTy()] = I;
6368       } else {
6369         Op.addRegOrImmOperands(Inst, 1);
6370       }
6371     }
6372   }
6373 
6374   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6375     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6376   }
6377 
6378   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6379     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6380   }
6381 
6382   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6383   // it has src2 register operand that is tied to dst operand
6384   // we don't allow modifiers for this operand in assembler so src2_modifiers
6385   // should be 0.
6386   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6387       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6388       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6389       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6390       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6391       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6392       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6393     auto it = Inst.begin();
6394     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6395     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6396     ++it;
6397     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6398   }
6399 }
6400 
6401 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6402   OptionalImmIndexMap OptionalIdx;
6403   cvtVOP3(Inst, Operands, OptionalIdx);
6404 }
6405 
6406 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6407                                const OperandVector &Operands) {
6408   OptionalImmIndexMap OptIdx;
6409   const int Opc = Inst.getOpcode();
6410   const MCInstrDesc &Desc = MII.get(Opc);
6411 
6412   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6413 
6414   cvtVOP3(Inst, Operands, OptIdx);
6415 
6416   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6417     assert(!IsPacked);
6418     Inst.addOperand(Inst.getOperand(0));
6419   }
6420 
6421   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6422   // instruction, and then figure out where to actually put the modifiers
6423 
6424   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6425 
6426   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6427   if (OpSelHiIdx != -1) {
6428     int DefaultVal = IsPacked ? -1 : 0;
6429     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6430                           DefaultVal);
6431   }
6432 
6433   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6434   if (NegLoIdx != -1) {
6435     assert(IsPacked);
6436     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6437     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6438   }
6439 
6440   const int Ops[] = { AMDGPU::OpName::src0,
6441                       AMDGPU::OpName::src1,
6442                       AMDGPU::OpName::src2 };
6443   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6444                          AMDGPU::OpName::src1_modifiers,
6445                          AMDGPU::OpName::src2_modifiers };
6446 
6447   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6448 
6449   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6450   unsigned OpSelHi = 0;
6451   unsigned NegLo = 0;
6452   unsigned NegHi = 0;
6453 
6454   if (OpSelHiIdx != -1) {
6455     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6456   }
6457 
6458   if (NegLoIdx != -1) {
6459     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6460     NegLo = Inst.getOperand(NegLoIdx).getImm();
6461     NegHi = Inst.getOperand(NegHiIdx).getImm();
6462   }
6463 
6464   for (int J = 0; J < 3; ++J) {
6465     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6466     if (OpIdx == -1)
6467       break;
6468 
6469     uint32_t ModVal = 0;
6470 
6471     if ((OpSel & (1 << J)) != 0)
6472       ModVal |= SISrcMods::OP_SEL_0;
6473 
6474     if ((OpSelHi & (1 << J)) != 0)
6475       ModVal |= SISrcMods::OP_SEL_1;
6476 
6477     if ((NegLo & (1 << J)) != 0)
6478       ModVal |= SISrcMods::NEG;
6479 
6480     if ((NegHi & (1 << J)) != 0)
6481       ModVal |= SISrcMods::NEG_HI;
6482 
6483     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6484 
6485     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6486   }
6487 }
6488 
6489 //===----------------------------------------------------------------------===//
6490 // dpp
6491 //===----------------------------------------------------------------------===//
6492 
6493 bool AMDGPUOperand::isDPP8() const {
6494   return isImmTy(ImmTyDPP8);
6495 }
6496 
6497 bool AMDGPUOperand::isDPPCtrl() const {
6498   using namespace AMDGPU::DPP;
6499 
6500   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6501   if (result) {
6502     int64_t Imm = getImm();
6503     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6504            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6505            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6506            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6507            (Imm == DppCtrl::WAVE_SHL1) ||
6508            (Imm == DppCtrl::WAVE_ROL1) ||
6509            (Imm == DppCtrl::WAVE_SHR1) ||
6510            (Imm == DppCtrl::WAVE_ROR1) ||
6511            (Imm == DppCtrl::ROW_MIRROR) ||
6512            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6513            (Imm == DppCtrl::BCAST15) ||
6514            (Imm == DppCtrl::BCAST31) ||
6515            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6516            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6517   }
6518   return false;
6519 }
6520 
6521 //===----------------------------------------------------------------------===//
6522 // mAI
6523 //===----------------------------------------------------------------------===//
6524 
6525 bool AMDGPUOperand::isBLGP() const {
6526   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6527 }
6528 
6529 bool AMDGPUOperand::isCBSZ() const {
6530   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6531 }
6532 
6533 bool AMDGPUOperand::isABID() const {
6534   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6535 }
6536 
6537 bool AMDGPUOperand::isS16Imm() const {
6538   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6539 }
6540 
6541 bool AMDGPUOperand::isU16Imm() const {
6542   return isImm() && isUInt<16>(getImm());
6543 }
6544 
6545 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6546   if (!isGFX10())
6547     return MatchOperand_NoMatch;
6548 
6549   SMLoc S = Parser.getTok().getLoc();
6550 
6551   if (getLexer().isNot(AsmToken::Identifier))
6552     return MatchOperand_NoMatch;
6553   if (getLexer().getTok().getString() != "dim")
6554     return MatchOperand_NoMatch;
6555 
6556   Parser.Lex();
6557   if (getLexer().isNot(AsmToken::Colon))
6558     return MatchOperand_ParseFail;
6559 
6560   Parser.Lex();
6561 
6562   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6563   // integer.
6564   std::string Token;
6565   if (getLexer().is(AsmToken::Integer)) {
6566     SMLoc Loc = getLexer().getTok().getEndLoc();
6567     Token = std::string(getLexer().getTok().getString());
6568     Parser.Lex();
6569     if (getLexer().getTok().getLoc() != Loc)
6570       return MatchOperand_ParseFail;
6571   }
6572   if (getLexer().isNot(AsmToken::Identifier))
6573     return MatchOperand_ParseFail;
6574   Token += getLexer().getTok().getString();
6575 
6576   StringRef DimId = Token;
6577   if (DimId.startswith("SQ_RSRC_IMG_"))
6578     DimId = DimId.substr(12);
6579 
6580   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6581   if (!DimInfo)
6582     return MatchOperand_ParseFail;
6583 
6584   Parser.Lex();
6585 
6586   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6587                                               AMDGPUOperand::ImmTyDim));
6588   return MatchOperand_Success;
6589 }
6590 
6591 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6592   SMLoc S = Parser.getTok().getLoc();
6593   StringRef Prefix;
6594 
6595   if (getLexer().getKind() == AsmToken::Identifier) {
6596     Prefix = Parser.getTok().getString();
6597   } else {
6598     return MatchOperand_NoMatch;
6599   }
6600 
6601   if (Prefix != "dpp8")
6602     return parseDPPCtrl(Operands);
6603   if (!isGFX10())
6604     return MatchOperand_NoMatch;
6605 
6606   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6607 
6608   int64_t Sels[8];
6609 
6610   Parser.Lex();
6611   if (getLexer().isNot(AsmToken::Colon))
6612     return MatchOperand_ParseFail;
6613 
6614   Parser.Lex();
6615   if (getLexer().isNot(AsmToken::LBrac))
6616     return MatchOperand_ParseFail;
6617 
6618   Parser.Lex();
6619   if (getParser().parseAbsoluteExpression(Sels[0]))
6620     return MatchOperand_ParseFail;
6621   if (0 > Sels[0] || 7 < Sels[0])
6622     return MatchOperand_ParseFail;
6623 
6624   for (size_t i = 1; i < 8; ++i) {
6625     if (getLexer().isNot(AsmToken::Comma))
6626       return MatchOperand_ParseFail;
6627 
6628     Parser.Lex();
6629     if (getParser().parseAbsoluteExpression(Sels[i]))
6630       return MatchOperand_ParseFail;
6631     if (0 > Sels[i] || 7 < Sels[i])
6632       return MatchOperand_ParseFail;
6633   }
6634 
6635   if (getLexer().isNot(AsmToken::RBrac))
6636     return MatchOperand_ParseFail;
6637   Parser.Lex();
6638 
6639   unsigned DPP8 = 0;
6640   for (size_t i = 0; i < 8; ++i)
6641     DPP8 |= (Sels[i] << (i * 3));
6642 
6643   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6644   return MatchOperand_Success;
6645 }
6646 
6647 OperandMatchResultTy
6648 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6649   using namespace AMDGPU::DPP;
6650 
6651   SMLoc S = Parser.getTok().getLoc();
6652   StringRef Prefix;
6653   int64_t Int;
6654 
6655   if (getLexer().getKind() == AsmToken::Identifier) {
6656     Prefix = Parser.getTok().getString();
6657   } else {
6658     return MatchOperand_NoMatch;
6659   }
6660 
6661   if (Prefix == "row_mirror") {
6662     Int = DppCtrl::ROW_MIRROR;
6663     Parser.Lex();
6664   } else if (Prefix == "row_half_mirror") {
6665     Int = DppCtrl::ROW_HALF_MIRROR;
6666     Parser.Lex();
6667   } else {
6668     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6669     if (Prefix != "quad_perm"
6670         && Prefix != "row_shl"
6671         && Prefix != "row_shr"
6672         && Prefix != "row_ror"
6673         && Prefix != "wave_shl"
6674         && Prefix != "wave_rol"
6675         && Prefix != "wave_shr"
6676         && Prefix != "wave_ror"
6677         && Prefix != "row_bcast"
6678         && Prefix != "row_share"
6679         && Prefix != "row_xmask") {
6680       return MatchOperand_NoMatch;
6681     }
6682 
6683     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6684       return MatchOperand_NoMatch;
6685 
6686     if (!isVI() && !isGFX9() &&
6687         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6688          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6689          Prefix == "row_bcast"))
6690       return MatchOperand_NoMatch;
6691 
6692     Parser.Lex();
6693     if (getLexer().isNot(AsmToken::Colon))
6694       return MatchOperand_ParseFail;
6695 
6696     if (Prefix == "quad_perm") {
6697       // quad_perm:[%d,%d,%d,%d]
6698       Parser.Lex();
6699       if (getLexer().isNot(AsmToken::LBrac))
6700         return MatchOperand_ParseFail;
6701       Parser.Lex();
6702 
6703       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6704         return MatchOperand_ParseFail;
6705 
6706       for (int i = 0; i < 3; ++i) {
6707         if (getLexer().isNot(AsmToken::Comma))
6708           return MatchOperand_ParseFail;
6709         Parser.Lex();
6710 
6711         int64_t Temp;
6712         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6713           return MatchOperand_ParseFail;
6714         const int shift = i*2 + 2;
6715         Int += (Temp << shift);
6716       }
6717 
6718       if (getLexer().isNot(AsmToken::RBrac))
6719         return MatchOperand_ParseFail;
6720       Parser.Lex();
6721     } else {
6722       // sel:%d
6723       Parser.Lex();
6724       if (getParser().parseAbsoluteExpression(Int))
6725         return MatchOperand_ParseFail;
6726 
6727       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6728         Int |= DppCtrl::ROW_SHL0;
6729       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6730         Int |= DppCtrl::ROW_SHR0;
6731       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6732         Int |= DppCtrl::ROW_ROR0;
6733       } else if (Prefix == "wave_shl" && 1 == Int) {
6734         Int = DppCtrl::WAVE_SHL1;
6735       } else if (Prefix == "wave_rol" && 1 == Int) {
6736         Int = DppCtrl::WAVE_ROL1;
6737       } else if (Prefix == "wave_shr" && 1 == Int) {
6738         Int = DppCtrl::WAVE_SHR1;
6739       } else if (Prefix == "wave_ror" && 1 == Int) {
6740         Int = DppCtrl::WAVE_ROR1;
6741       } else if (Prefix == "row_bcast") {
6742         if (Int == 15) {
6743           Int = DppCtrl::BCAST15;
6744         } else if (Int == 31) {
6745           Int = DppCtrl::BCAST31;
6746         } else {
6747           return MatchOperand_ParseFail;
6748         }
6749       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6750         Int |= DppCtrl::ROW_SHARE_FIRST;
6751       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6752         Int |= DppCtrl::ROW_XMASK_FIRST;
6753       } else {
6754         return MatchOperand_ParseFail;
6755       }
6756     }
6757   }
6758 
6759   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6760   return MatchOperand_Success;
6761 }
6762 
6763 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6764   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6765 }
6766 
6767 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6768   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6769 }
6770 
6771 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6772   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6773 }
6774 
6775 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6776   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6777 }
6778 
6779 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6780   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6781 }
6782 
6783 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6784   OptionalImmIndexMap OptionalIdx;
6785 
6786   unsigned I = 1;
6787   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6788   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6789     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6790   }
6791 
6792   int Fi = 0;
6793   for (unsigned E = Operands.size(); I != E; ++I) {
6794     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6795                                             MCOI::TIED_TO);
6796     if (TiedTo != -1) {
6797       assert((unsigned)TiedTo < Inst.getNumOperands());
6798       // handle tied old or src2 for MAC instructions
6799       Inst.addOperand(Inst.getOperand(TiedTo));
6800     }
6801     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6802     // Add the register arguments
6803     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6804       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6805       // Skip it.
6806       continue;
6807     }
6808 
6809     if (IsDPP8) {
6810       if (Op.isDPP8()) {
6811         Op.addImmOperands(Inst, 1);
6812       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6813         Op.addRegWithFPInputModsOperands(Inst, 2);
6814       } else if (Op.isFI()) {
6815         Fi = Op.getImm();
6816       } else if (Op.isReg()) {
6817         Op.addRegOperands(Inst, 1);
6818       } else {
6819         llvm_unreachable("Invalid operand type");
6820       }
6821     } else {
6822       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6823         Op.addRegWithFPInputModsOperands(Inst, 2);
6824       } else if (Op.isDPPCtrl()) {
6825         Op.addImmOperands(Inst, 1);
6826       } else if (Op.isImm()) {
6827         // Handle optional arguments
6828         OptionalIdx[Op.getImmTy()] = I;
6829       } else {
6830         llvm_unreachable("Invalid operand type");
6831       }
6832     }
6833   }
6834 
6835   if (IsDPP8) {
6836     using namespace llvm::AMDGPU::DPP;
6837     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6838   } else {
6839     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6840     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6841     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6842     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6843       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6844     }
6845   }
6846 }
6847 
6848 //===----------------------------------------------------------------------===//
6849 // sdwa
6850 //===----------------------------------------------------------------------===//
6851 
6852 OperandMatchResultTy
6853 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6854                               AMDGPUOperand::ImmTy Type) {
6855   using namespace llvm::AMDGPU::SDWA;
6856 
6857   SMLoc S = Parser.getTok().getLoc();
6858   StringRef Value;
6859   OperandMatchResultTy res;
6860 
6861   res = parseStringWithPrefix(Prefix, Value);
6862   if (res != MatchOperand_Success) {
6863     return res;
6864   }
6865 
6866   int64_t Int;
6867   Int = StringSwitch<int64_t>(Value)
6868         .Case("BYTE_0", SdwaSel::BYTE_0)
6869         .Case("BYTE_1", SdwaSel::BYTE_1)
6870         .Case("BYTE_2", SdwaSel::BYTE_2)
6871         .Case("BYTE_3", SdwaSel::BYTE_3)
6872         .Case("WORD_0", SdwaSel::WORD_0)
6873         .Case("WORD_1", SdwaSel::WORD_1)
6874         .Case("DWORD", SdwaSel::DWORD)
6875         .Default(0xffffffff);
6876   Parser.Lex(); // eat last token
6877 
6878   if (Int == 0xffffffff) {
6879     return MatchOperand_ParseFail;
6880   }
6881 
6882   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6883   return MatchOperand_Success;
6884 }
6885 
6886 OperandMatchResultTy
6887 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6888   using namespace llvm::AMDGPU::SDWA;
6889 
6890   SMLoc S = Parser.getTok().getLoc();
6891   StringRef Value;
6892   OperandMatchResultTy res;
6893 
6894   res = parseStringWithPrefix("dst_unused", Value);
6895   if (res != MatchOperand_Success) {
6896     return res;
6897   }
6898 
6899   int64_t Int;
6900   Int = StringSwitch<int64_t>(Value)
6901         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6902         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6903         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6904         .Default(0xffffffff);
6905   Parser.Lex(); // eat last token
6906 
6907   if (Int == 0xffffffff) {
6908     return MatchOperand_ParseFail;
6909   }
6910 
6911   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6912   return MatchOperand_Success;
6913 }
6914 
6915 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6916   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6917 }
6918 
6919 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6920   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6921 }
6922 
6923 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6924   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
6925 }
6926 
6927 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
6928   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
6929 }
6930 
6931 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6932   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6933 }
6934 
6935 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6936                               uint64_t BasicInstType,
6937                               bool SkipDstVcc,
6938                               bool SkipSrcVcc) {
6939   using namespace llvm::AMDGPU::SDWA;
6940 
6941   OptionalImmIndexMap OptionalIdx;
6942   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
6943   bool SkippedVcc = false;
6944 
6945   unsigned I = 1;
6946   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6947   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6948     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6949   }
6950 
6951   for (unsigned E = Operands.size(); I != E; ++I) {
6952     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6953     if (SkipVcc && !SkippedVcc && Op.isReg() &&
6954         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6955       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6956       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6957       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6958       // Skip VCC only if we didn't skip it on previous iteration.
6959       // Note that src0 and src1 occupy 2 slots each because of modifiers.
6960       if (BasicInstType == SIInstrFlags::VOP2 &&
6961           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
6962            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
6963         SkippedVcc = true;
6964         continue;
6965       } else if (BasicInstType == SIInstrFlags::VOPC &&
6966                  Inst.getNumOperands() == 0) {
6967         SkippedVcc = true;
6968         continue;
6969       }
6970     }
6971     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6972       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6973     } else if (Op.isImm()) {
6974       // Handle optional arguments
6975       OptionalIdx[Op.getImmTy()] = I;
6976     } else {
6977       llvm_unreachable("Invalid operand type");
6978     }
6979     SkippedVcc = false;
6980   }
6981 
6982   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6983       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6984       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6985     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6986     switch (BasicInstType) {
6987     case SIInstrFlags::VOP1:
6988       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6989       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6990         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6991       }
6992       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6993       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6994       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6995       break;
6996 
6997     case SIInstrFlags::VOP2:
6998       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6999       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7000         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7001       }
7002       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7003       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7004       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7005       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7006       break;
7007 
7008     case SIInstrFlags::VOPC:
7009       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7010         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7011       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7012       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7013       break;
7014 
7015     default:
7016       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7017     }
7018   }
7019 
7020   // special case v_mac_{f16, f32}:
7021   // it has src2 register operand that is tied to dst operand
7022   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7023       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7024     auto it = Inst.begin();
7025     std::advance(
7026       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7027     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7028   }
7029 }
7030 
7031 //===----------------------------------------------------------------------===//
7032 // mAI
7033 //===----------------------------------------------------------------------===//
7034 
7035 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7036   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7037 }
7038 
7039 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7040   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7041 }
7042 
7043 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7044   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7045 }
7046 
7047 /// Force static initialization.
7048 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7049   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7050   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7051 }
7052 
7053 #define GET_REGISTER_MATCHER
7054 #define GET_MATCHER_IMPLEMENTATION
7055 #define GET_MNEMONIC_SPELL_CHECKER
7056 #include "AMDGPUGenAsmMatcher.inc"
7057 
7058 // This fuction should be defined after auto-generated include so that we have
7059 // MatchClassKind enum defined
7060 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7061                                                      unsigned Kind) {
7062   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7063   // But MatchInstructionImpl() expects to meet token and fails to validate
7064   // operand. This method checks if we are given immediate operand but expect to
7065   // get corresponding token.
7066   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7067   switch (Kind) {
7068   case MCK_addr64:
7069     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7070   case MCK_gds:
7071     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7072   case MCK_lds:
7073     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7074   case MCK_glc:
7075     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7076   case MCK_idxen:
7077     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7078   case MCK_offen:
7079     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7080   case MCK_SSrcB32:
7081     // When operands have expression values, they will return true for isToken,
7082     // because it is not possible to distinguish between a token and an
7083     // expression at parse time. MatchInstructionImpl() will always try to
7084     // match an operand as a token, when isToken returns true, and when the
7085     // name of the expression is not a valid token, the match will fail,
7086     // so we need to handle it here.
7087     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7088   case MCK_SSrcF32:
7089     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7090   case MCK_SoppBrTarget:
7091     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7092   case MCK_VReg32OrOff:
7093     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7094   case MCK_InterpSlot:
7095     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7096   case MCK_Attr:
7097     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7098   case MCK_AttrChan:
7099     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7100   case MCK_SReg_64:
7101   case MCK_SReg_64_XEXEC:
7102     // Null is defined as a 32-bit register but
7103     // it should also be enabled with 64-bit operands.
7104     // The following code enables it for SReg_64 operands
7105     // used as source and destination. Remaining source
7106     // operands are handled in isInlinableImm.
7107     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7108   default:
7109     return Match_InvalidOperand;
7110   }
7111 }
7112 
7113 //===----------------------------------------------------------------------===//
7114 // endpgm
7115 //===----------------------------------------------------------------------===//
7116 
7117 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7118   SMLoc S = Parser.getTok().getLoc();
7119   int64_t Imm = 0;
7120 
7121   if (!parseExpr(Imm)) {
7122     // The operand is optional, if not present default to 0
7123     Imm = 0;
7124   }
7125 
7126   if (!isUInt<16>(Imm)) {
7127     Error(S, "expected a 16-bit value");
7128     return MatchOperand_ParseFail;
7129   }
7130 
7131   Operands.push_back(
7132       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7133   return MatchOperand_Success;
7134 }
7135 
7136 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7137