1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyA16,
167     ImmTyLWE,
168     ImmTyExpTgt,
169     ImmTyExpCompr,
170     ImmTyExpVM,
171     ImmTyFORMAT,
172     ImmTyHwreg,
173     ImmTyOff,
174     ImmTySendMsg,
175     ImmTyInterpSlot,
176     ImmTyInterpAttr,
177     ImmTyAttrChan,
178     ImmTyOpSel,
179     ImmTyOpSelHi,
180     ImmTyNegLo,
181     ImmTyNegHi,
182     ImmTySwizzle,
183     ImmTyGprIdxMode,
184     ImmTyHigh,
185     ImmTyBLGP,
186     ImmTyCBSZ,
187     ImmTyABID,
188     ImmTyEndpgm,
189   };
190 
191 private:
192   struct TokOp {
193     const char *Data;
194     unsigned Length;
195   };
196 
197   struct ImmOp {
198     int64_t Val;
199     ImmTy Type;
200     bool IsFPImm;
201     Modifiers Mods;
202   };
203 
204   struct RegOp {
205     unsigned RegNo;
206     Modifiers Mods;
207   };
208 
209   union {
210     TokOp Tok;
211     ImmOp Imm;
212     RegOp Reg;
213     const MCExpr *Expr;
214   };
215 
216 public:
217   bool isToken() const override {
218     if (Kind == Token)
219       return true;
220 
221     // When parsing operands, we can't always tell if something was meant to be
222     // a token, like 'gds', or an expression that references a global variable.
223     // In this case, we assume the string is an expression, and if we need to
224     // interpret is a token, then we treat the symbol name as the token.
225     return isSymbolRefExpr();
226   }
227 
228   bool isSymbolRefExpr() const {
229     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230   }
231 
232   bool isImm() const override {
233     return Kind == Immediate;
234   }
235 
236   bool isInlinableImm(MVT type) const;
237   bool isLiteralImm(MVT type) const;
238 
239   bool isRegKind() const {
240     return Kind == Register;
241   }
242 
243   bool isReg() const override {
244     return isRegKind() && !hasModifiers();
245   }
246 
247   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
248     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
249   }
250 
251   bool isRegOrImmWithInt16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
253   }
254 
255   bool isRegOrImmWithInt32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
257   }
258 
259   bool isRegOrImmWithInt64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
261   }
262 
263   bool isRegOrImmWithFP16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
265   }
266 
267   bool isRegOrImmWithFP32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
269   }
270 
271   bool isRegOrImmWithFP64InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
273   }
274 
275   bool isVReg() const {
276     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
277            isRegClass(AMDGPU::VReg_64RegClassID) ||
278            isRegClass(AMDGPU::VReg_96RegClassID) ||
279            isRegClass(AMDGPU::VReg_128RegClassID) ||
280            isRegClass(AMDGPU::VReg_160RegClassID) ||
281            isRegClass(AMDGPU::VReg_192RegClassID) ||
282            isRegClass(AMDGPU::VReg_256RegClassID) ||
283            isRegClass(AMDGPU::VReg_512RegClassID) ||
284            isRegClass(AMDGPU::VReg_1024RegClassID);
285   }
286 
287   bool isVReg32() const {
288     return isRegClass(AMDGPU::VGPR_32RegClassID);
289   }
290 
291   bool isVReg32OrOff() const {
292     return isOff() || isVReg32();
293   }
294 
295   bool isNull() const {
296     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
297   }
298 
299   bool isSDWAOperand(MVT type) const;
300   bool isSDWAFP16Operand() const;
301   bool isSDWAFP32Operand() const;
302   bool isSDWAInt16Operand() const;
303   bool isSDWAInt32Operand() const;
304 
305   bool isImmTy(ImmTy ImmT) const {
306     return isImm() && Imm.Type == ImmT;
307   }
308 
309   bool isImmModifier() const {
310     return isImm() && Imm.Type != ImmTyNone;
311   }
312 
313   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
314   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
315   bool isDMask() const { return isImmTy(ImmTyDMask); }
316   bool isDim() const { return isImmTy(ImmTyDim); }
317   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
318   bool isDA() const { return isImmTy(ImmTyDA); }
319   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
320   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
321   bool isLWE() const { return isImmTy(ImmTyLWE); }
322   bool isOff() const { return isImmTy(ImmTyOff); }
323   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
324   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
325   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
326   bool isOffen() const { return isImmTy(ImmTyOffen); }
327   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
328   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
329   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
330   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
331   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
332 
333   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
334   bool isGDS() const { return isImmTy(ImmTyGDS); }
335   bool isLDS() const { return isImmTy(ImmTyLDS); }
336   bool isDLC() const { return isImmTy(ImmTyDLC); }
337   bool isGLC() const { return isImmTy(ImmTyGLC); }
338   bool isSLC() const { return isImmTy(ImmTySLC); }
339   bool isSWZ() const { return isImmTy(ImmTySWZ); }
340   bool isTFE() const { return isImmTy(ImmTyTFE); }
341   bool isD16() const { return isImmTy(ImmTyD16); }
342   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
343   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
344   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
345   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
346   bool isFI() const { return isImmTy(ImmTyDppFi); }
347   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
348   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
349   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
350   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
351   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
352   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
353   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
354   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
355   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
356   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
357   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
358   bool isHigh() const { return isImmTy(ImmTyHigh); }
359 
360   bool isMod() const {
361     return isClampSI() || isOModSI();
362   }
363 
364   bool isRegOrImm() const {
365     return isReg() || isImm();
366   }
367 
368   bool isRegClass(unsigned RCID) const;
369 
370   bool isInlineValue() const;
371 
372   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
373     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
374   }
375 
376   bool isSCSrcB16() const {
377     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
378   }
379 
380   bool isSCSrcV2B16() const {
381     return isSCSrcB16();
382   }
383 
384   bool isSCSrcB32() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
386   }
387 
388   bool isSCSrcB64() const {
389     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
390   }
391 
392   bool isBoolReg() const;
393 
394   bool isSCSrcF16() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
396   }
397 
398   bool isSCSrcV2F16() const {
399     return isSCSrcF16();
400   }
401 
402   bool isSCSrcF32() const {
403     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
404   }
405 
406   bool isSCSrcF64() const {
407     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
408   }
409 
410   bool isSSrcB32() const {
411     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
412   }
413 
414   bool isSSrcB16() const {
415     return isSCSrcB16() || isLiteralImm(MVT::i16);
416   }
417 
418   bool isSSrcV2B16() const {
419     llvm_unreachable("cannot happen");
420     return isSSrcB16();
421   }
422 
423   bool isSSrcB64() const {
424     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
425     // See isVSrc64().
426     return isSCSrcB64() || isLiteralImm(MVT::i64);
427   }
428 
429   bool isSSrcF32() const {
430     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
431   }
432 
433   bool isSSrcF64() const {
434     return isSCSrcB64() || isLiteralImm(MVT::f64);
435   }
436 
437   bool isSSrcF16() const {
438     return isSCSrcB16() || isLiteralImm(MVT::f16);
439   }
440 
441   bool isSSrcV2F16() const {
442     llvm_unreachable("cannot happen");
443     return isSSrcF16();
444   }
445 
446   bool isSSrcOrLdsB32() const {
447     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
448            isLiteralImm(MVT::i32) || isExpr();
449   }
450 
451   bool isVCSrcB32() const {
452     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
453   }
454 
455   bool isVCSrcB64() const {
456     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
457   }
458 
459   bool isVCSrcB16() const {
460     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
461   }
462 
463   bool isVCSrcV2B16() const {
464     return isVCSrcB16();
465   }
466 
467   bool isVCSrcF32() const {
468     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
469   }
470 
471   bool isVCSrcF64() const {
472     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
473   }
474 
475   bool isVCSrcF16() const {
476     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
477   }
478 
479   bool isVCSrcV2F16() const {
480     return isVCSrcF16();
481   }
482 
483   bool isVSrcB32() const {
484     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
485   }
486 
487   bool isVSrcB64() const {
488     return isVCSrcF64() || isLiteralImm(MVT::i64);
489   }
490 
491   bool isVSrcB16() const {
492     return isVCSrcF16() || isLiteralImm(MVT::i16);
493   }
494 
495   bool isVSrcV2B16() const {
496     return isVSrcB16() || isLiteralImm(MVT::v2i16);
497   }
498 
499   bool isVSrcF32() const {
500     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
501   }
502 
503   bool isVSrcF64() const {
504     return isVCSrcF64() || isLiteralImm(MVT::f64);
505   }
506 
507   bool isVSrcF16() const {
508     return isVCSrcF16() || isLiteralImm(MVT::f16);
509   }
510 
511   bool isVSrcV2F16() const {
512     return isVSrcF16() || isLiteralImm(MVT::v2f16);
513   }
514 
515   bool isVISrcB32() const {
516     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
517   }
518 
519   bool isVISrcB16() const {
520     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
521   }
522 
523   bool isVISrcV2B16() const {
524     return isVISrcB16();
525   }
526 
527   bool isVISrcF32() const {
528     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
529   }
530 
531   bool isVISrcF16() const {
532     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
533   }
534 
535   bool isVISrcV2F16() const {
536     return isVISrcF16() || isVISrcB32();
537   }
538 
539   bool isAISrcB32() const {
540     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
541   }
542 
543   bool isAISrcB16() const {
544     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
545   }
546 
547   bool isAISrcV2B16() const {
548     return isAISrcB16();
549   }
550 
551   bool isAISrcF32() const {
552     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
553   }
554 
555   bool isAISrcF16() const {
556     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
557   }
558 
559   bool isAISrcV2F16() const {
560     return isAISrcF16() || isAISrcB32();
561   }
562 
563   bool isAISrc_128B32() const {
564     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
565   }
566 
567   bool isAISrc_128B16() const {
568     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
569   }
570 
571   bool isAISrc_128V2B16() const {
572     return isAISrc_128B16();
573   }
574 
575   bool isAISrc_128F32() const {
576     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
577   }
578 
579   bool isAISrc_128F16() const {
580     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
581   }
582 
583   bool isAISrc_128V2F16() const {
584     return isAISrc_128F16() || isAISrc_128B32();
585   }
586 
587   bool isAISrc_512B32() const {
588     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
589   }
590 
591   bool isAISrc_512B16() const {
592     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
593   }
594 
595   bool isAISrc_512V2B16() const {
596     return isAISrc_512B16();
597   }
598 
599   bool isAISrc_512F32() const {
600     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
601   }
602 
603   bool isAISrc_512F16() const {
604     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
605   }
606 
607   bool isAISrc_512V2F16() const {
608     return isAISrc_512F16() || isAISrc_512B32();
609   }
610 
611   bool isAISrc_1024B32() const {
612     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
613   }
614 
615   bool isAISrc_1024B16() const {
616     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
617   }
618 
619   bool isAISrc_1024V2B16() const {
620     return isAISrc_1024B16();
621   }
622 
623   bool isAISrc_1024F32() const {
624     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
625   }
626 
627   bool isAISrc_1024F16() const {
628     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
629   }
630 
631   bool isAISrc_1024V2F16() const {
632     return isAISrc_1024F16() || isAISrc_1024B32();
633   }
634 
635   bool isKImmFP32() const {
636     return isLiteralImm(MVT::f32);
637   }
638 
639   bool isKImmFP16() const {
640     return isLiteralImm(MVT::f16);
641   }
642 
643   bool isMem() const override {
644     return false;
645   }
646 
647   bool isExpr() const {
648     return Kind == Expression;
649   }
650 
651   bool isSoppBrTarget() const {
652     return isExpr() || isImm();
653   }
654 
655   bool isSWaitCnt() const;
656   bool isHwreg() const;
657   bool isSendMsg() const;
658   bool isSwizzle() const;
659   bool isSMRDOffset8() const;
660   bool isSMEMOffset() const;
661   bool isSMRDLiteralOffset() const;
662   bool isDPP8() const;
663   bool isDPPCtrl() const;
664   bool isBLGP() const;
665   bool isCBSZ() const;
666   bool isABID() const;
667   bool isGPRIdxMode() const;
668   bool isS16Imm() const;
669   bool isU16Imm() const;
670   bool isEndpgm() const;
671 
672   StringRef getExpressionAsToken() const {
673     assert(isExpr());
674     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
675     return S->getSymbol().getName();
676   }
677 
678   StringRef getToken() const {
679     assert(isToken());
680 
681     if (Kind == Expression)
682       return getExpressionAsToken();
683 
684     return StringRef(Tok.Data, Tok.Length);
685   }
686 
687   int64_t getImm() const {
688     assert(isImm());
689     return Imm.Val;
690   }
691 
692   ImmTy getImmTy() const {
693     assert(isImm());
694     return Imm.Type;
695   }
696 
697   unsigned getReg() const override {
698     assert(isRegKind());
699     return Reg.RegNo;
700   }
701 
702   SMLoc getStartLoc() const override {
703     return StartLoc;
704   }
705 
706   SMLoc getEndLoc() const override {
707     return EndLoc;
708   }
709 
710   SMRange getLocRange() const {
711     return SMRange(StartLoc, EndLoc);
712   }
713 
714   Modifiers getModifiers() const {
715     assert(isRegKind() || isImmTy(ImmTyNone));
716     return isRegKind() ? Reg.Mods : Imm.Mods;
717   }
718 
719   void setModifiers(Modifiers Mods) {
720     assert(isRegKind() || isImmTy(ImmTyNone));
721     if (isRegKind())
722       Reg.Mods = Mods;
723     else
724       Imm.Mods = Mods;
725   }
726 
727   bool hasModifiers() const {
728     return getModifiers().hasModifiers();
729   }
730 
731   bool hasFPModifiers() const {
732     return getModifiers().hasFPModifiers();
733   }
734 
735   bool hasIntModifiers() const {
736     return getModifiers().hasIntModifiers();
737   }
738 
739   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
740 
741   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
742 
743   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
744 
745   template <unsigned Bitwidth>
746   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
747 
748   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
749     addKImmFPOperands<16>(Inst, N);
750   }
751 
752   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
753     addKImmFPOperands<32>(Inst, N);
754   }
755 
756   void addRegOperands(MCInst &Inst, unsigned N) const;
757 
758   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
759     addRegOperands(Inst, N);
760   }
761 
762   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
763     if (isRegKind())
764       addRegOperands(Inst, N);
765     else if (isExpr())
766       Inst.addOperand(MCOperand::createExpr(Expr));
767     else
768       addImmOperands(Inst, N);
769   }
770 
771   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
772     Modifiers Mods = getModifiers();
773     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
774     if (isRegKind()) {
775       addRegOperands(Inst, N);
776     } else {
777       addImmOperands(Inst, N, false);
778     }
779   }
780 
781   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
782     assert(!hasIntModifiers());
783     addRegOrImmWithInputModsOperands(Inst, N);
784   }
785 
786   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
787     assert(!hasFPModifiers());
788     addRegOrImmWithInputModsOperands(Inst, N);
789   }
790 
791   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
792     Modifiers Mods = getModifiers();
793     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
794     assert(isRegKind());
795     addRegOperands(Inst, N);
796   }
797 
798   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
799     assert(!hasIntModifiers());
800     addRegWithInputModsOperands(Inst, N);
801   }
802 
803   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
804     assert(!hasFPModifiers());
805     addRegWithInputModsOperands(Inst, N);
806   }
807 
808   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
809     if (isImm())
810       addImmOperands(Inst, N);
811     else {
812       assert(isExpr());
813       Inst.addOperand(MCOperand::createExpr(Expr));
814     }
815   }
816 
817   static void printImmTy(raw_ostream& OS, ImmTy Type) {
818     switch (Type) {
819     case ImmTyNone: OS << "None"; break;
820     case ImmTyGDS: OS << "GDS"; break;
821     case ImmTyLDS: OS << "LDS"; break;
822     case ImmTyOffen: OS << "Offen"; break;
823     case ImmTyIdxen: OS << "Idxen"; break;
824     case ImmTyAddr64: OS << "Addr64"; break;
825     case ImmTyOffset: OS << "Offset"; break;
826     case ImmTyInstOffset: OS << "InstOffset"; break;
827     case ImmTyOffset0: OS << "Offset0"; break;
828     case ImmTyOffset1: OS << "Offset1"; break;
829     case ImmTyDLC: OS << "DLC"; break;
830     case ImmTyGLC: OS << "GLC"; break;
831     case ImmTySLC: OS << "SLC"; break;
832     case ImmTySWZ: OS << "SWZ"; break;
833     case ImmTyTFE: OS << "TFE"; break;
834     case ImmTyD16: OS << "D16"; break;
835     case ImmTyFORMAT: OS << "FORMAT"; break;
836     case ImmTyClampSI: OS << "ClampSI"; break;
837     case ImmTyOModSI: OS << "OModSI"; break;
838     case ImmTyDPP8: OS << "DPP8"; break;
839     case ImmTyDppCtrl: OS << "DppCtrl"; break;
840     case ImmTyDppRowMask: OS << "DppRowMask"; break;
841     case ImmTyDppBankMask: OS << "DppBankMask"; break;
842     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
843     case ImmTyDppFi: OS << "FI"; break;
844     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
845     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
846     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
847     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
848     case ImmTyDMask: OS << "DMask"; break;
849     case ImmTyDim: OS << "Dim"; break;
850     case ImmTyUNorm: OS << "UNorm"; break;
851     case ImmTyDA: OS << "DA"; break;
852     case ImmTyR128A16: OS << "R128A16"; break;
853     case ImmTyA16: OS << "A16"; break;
854     case ImmTyLWE: OS << "LWE"; break;
855     case ImmTyOff: OS << "Off"; break;
856     case ImmTyExpTgt: OS << "ExpTgt"; break;
857     case ImmTyExpCompr: OS << "ExpCompr"; break;
858     case ImmTyExpVM: OS << "ExpVM"; break;
859     case ImmTyHwreg: OS << "Hwreg"; break;
860     case ImmTySendMsg: OS << "SendMsg"; break;
861     case ImmTyInterpSlot: OS << "InterpSlot"; break;
862     case ImmTyInterpAttr: OS << "InterpAttr"; break;
863     case ImmTyAttrChan: OS << "AttrChan"; break;
864     case ImmTyOpSel: OS << "OpSel"; break;
865     case ImmTyOpSelHi: OS << "OpSelHi"; break;
866     case ImmTyNegLo: OS << "NegLo"; break;
867     case ImmTyNegHi: OS << "NegHi"; break;
868     case ImmTySwizzle: OS << "Swizzle"; break;
869     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
870     case ImmTyHigh: OS << "High"; break;
871     case ImmTyBLGP: OS << "BLGP"; break;
872     case ImmTyCBSZ: OS << "CBSZ"; break;
873     case ImmTyABID: OS << "ABID"; break;
874     case ImmTyEndpgm: OS << "Endpgm"; break;
875     }
876   }
877 
878   void print(raw_ostream &OS) const override {
879     switch (Kind) {
880     case Register:
881       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
882       break;
883     case Immediate:
884       OS << '<' << getImm();
885       if (getImmTy() != ImmTyNone) {
886         OS << " type: "; printImmTy(OS, getImmTy());
887       }
888       OS << " mods: " << Imm.Mods << '>';
889       break;
890     case Token:
891       OS << '\'' << getToken() << '\'';
892       break;
893     case Expression:
894       OS << "<expr " << *Expr << '>';
895       break;
896     }
897   }
898 
899   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
900                                       int64_t Val, SMLoc Loc,
901                                       ImmTy Type = ImmTyNone,
902                                       bool IsFPImm = false) {
903     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
904     Op->Imm.Val = Val;
905     Op->Imm.IsFPImm = IsFPImm;
906     Op->Imm.Type = Type;
907     Op->Imm.Mods = Modifiers();
908     Op->StartLoc = Loc;
909     Op->EndLoc = Loc;
910     return Op;
911   }
912 
913   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
914                                         StringRef Str, SMLoc Loc,
915                                         bool HasExplicitEncodingSize = true) {
916     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
917     Res->Tok.Data = Str.data();
918     Res->Tok.Length = Str.size();
919     Res->StartLoc = Loc;
920     Res->EndLoc = Loc;
921     return Res;
922   }
923 
924   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
925                                       unsigned RegNo, SMLoc S,
926                                       SMLoc E) {
927     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
928     Op->Reg.RegNo = RegNo;
929     Op->Reg.Mods = Modifiers();
930     Op->StartLoc = S;
931     Op->EndLoc = E;
932     return Op;
933   }
934 
935   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
936                                        const class MCExpr *Expr, SMLoc S) {
937     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
938     Op->Expr = Expr;
939     Op->StartLoc = S;
940     Op->EndLoc = S;
941     return Op;
942   }
943 };
944 
945 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
946   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
947   return OS;
948 }
949 
950 //===----------------------------------------------------------------------===//
951 // AsmParser
952 //===----------------------------------------------------------------------===//
953 
954 // Holds info related to the current kernel, e.g. count of SGPRs used.
955 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
956 // .amdgpu_hsa_kernel or at EOF.
957 class KernelScopeInfo {
958   int SgprIndexUnusedMin = -1;
959   int VgprIndexUnusedMin = -1;
960   MCContext *Ctx = nullptr;
961 
962   void usesSgprAt(int i) {
963     if (i >= SgprIndexUnusedMin) {
964       SgprIndexUnusedMin = ++i;
965       if (Ctx) {
966         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
967         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
968       }
969     }
970   }
971 
972   void usesVgprAt(int i) {
973     if (i >= VgprIndexUnusedMin) {
974       VgprIndexUnusedMin = ++i;
975       if (Ctx) {
976         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
977         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
978       }
979     }
980   }
981 
982 public:
983   KernelScopeInfo() = default;
984 
985   void initialize(MCContext &Context) {
986     Ctx = &Context;
987     usesSgprAt(SgprIndexUnusedMin = -1);
988     usesVgprAt(VgprIndexUnusedMin = -1);
989   }
990 
991   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
992     switch (RegKind) {
993       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
994       case IS_AGPR: // fall through
995       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
996       default: break;
997     }
998   }
999 };
1000 
1001 class AMDGPUAsmParser : public MCTargetAsmParser {
1002   MCAsmParser &Parser;
1003 
1004   // Number of extra operands parsed after the first optional operand.
1005   // This may be necessary to skip hardcoded mandatory operands.
1006   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1007 
1008   unsigned ForcedEncodingSize = 0;
1009   bool ForcedDPP = false;
1010   bool ForcedSDWA = false;
1011   KernelScopeInfo KernelScope;
1012 
1013   /// @name Auto-generated Match Functions
1014   /// {
1015 
1016 #define GET_ASSEMBLER_HEADER
1017 #include "AMDGPUGenAsmMatcher.inc"
1018 
1019   /// }
1020 
1021 private:
1022   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1023   bool OutOfRangeError(SMRange Range);
1024   /// Calculate VGPR/SGPR blocks required for given target, reserved
1025   /// registers, and user-specified NextFreeXGPR values.
1026   ///
1027   /// \param Features [in] Target features, used for bug corrections.
1028   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1029   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1030   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1031   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1032   /// descriptor field, if valid.
1033   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1034   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1035   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1036   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1037   /// \param VGPRBlocks [out] Result VGPR block count.
1038   /// \param SGPRBlocks [out] Result SGPR block count.
1039   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1040                           bool FlatScrUsed, bool XNACKUsed,
1041                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1042                           SMRange VGPRRange, unsigned NextFreeSGPR,
1043                           SMRange SGPRRange, unsigned &VGPRBlocks,
1044                           unsigned &SGPRBlocks);
1045   bool ParseDirectiveAMDGCNTarget();
1046   bool ParseDirectiveAMDHSAKernel();
1047   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1048   bool ParseDirectiveHSACodeObjectVersion();
1049   bool ParseDirectiveHSACodeObjectISA();
1050   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1051   bool ParseDirectiveAMDKernelCodeT();
1052   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1053   bool ParseDirectiveAMDGPUHsaKernel();
1054 
1055   bool ParseDirectiveISAVersion();
1056   bool ParseDirectiveHSAMetadata();
1057   bool ParseDirectivePALMetadataBegin();
1058   bool ParseDirectivePALMetadata();
1059   bool ParseDirectiveAMDGPULDS();
1060 
1061   /// Common code to parse out a block of text (typically YAML) between start and
1062   /// end directives.
1063   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1064                            const char *AssemblerDirectiveEnd,
1065                            std::string &CollectString);
1066 
1067   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1068                              RegisterKind RegKind, unsigned Reg1);
1069   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1070                            unsigned &RegNum, unsigned &RegWidth,
1071                            bool RestoreOnFailure = false);
1072   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1073                            unsigned &RegNum, unsigned &RegWidth,
1074                            SmallVectorImpl<AsmToken> &Tokens);
1075   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1076                            unsigned &RegWidth,
1077                            SmallVectorImpl<AsmToken> &Tokens);
1078   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1079                            unsigned &RegWidth,
1080                            SmallVectorImpl<AsmToken> &Tokens);
1081   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1082                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1083   bool ParseRegRange(unsigned& Num, unsigned& Width);
1084   unsigned getRegularReg(RegisterKind RegKind,
1085                          unsigned RegNum,
1086                          unsigned RegWidth);
1087 
1088   bool isRegister();
1089   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1090   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1091   void initializeGprCountSymbol(RegisterKind RegKind);
1092   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1093                              unsigned RegWidth);
1094   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1095                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1096   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1097                  bool IsGdsHardcoded);
1098 
1099 public:
1100   enum AMDGPUMatchResultTy {
1101     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1102   };
1103   enum OperandMode {
1104     OperandMode_Default,
1105     OperandMode_NSA,
1106   };
1107 
1108   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1109 
1110   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1111                const MCInstrInfo &MII,
1112                const MCTargetOptions &Options)
1113       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1114     MCAsmParserExtension::Initialize(Parser);
1115 
1116     if (getFeatureBits().none()) {
1117       // Set default features.
1118       copySTI().ToggleFeature("southern-islands");
1119     }
1120 
1121     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1122 
1123     {
1124       // TODO: make those pre-defined variables read-only.
1125       // Currently there is none suitable machinery in the core llvm-mc for this.
1126       // MCSymbol::isRedefinable is intended for another purpose, and
1127       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1128       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1129       MCContext &Ctx = getContext();
1130       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1131         MCSymbol *Sym =
1132             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1133         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1134         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1135         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1136         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1137         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1138       } else {
1139         MCSymbol *Sym =
1140             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1141         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1142         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1143         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1144         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1145         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1146       }
1147       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1148         initializeGprCountSymbol(IS_VGPR);
1149         initializeGprCountSymbol(IS_SGPR);
1150       } else
1151         KernelScope.initialize(getContext());
1152     }
1153   }
1154 
1155   bool hasXNACK() const {
1156     return AMDGPU::hasXNACK(getSTI());
1157   }
1158 
1159   bool hasMIMG_R128() const {
1160     return AMDGPU::hasMIMG_R128(getSTI());
1161   }
1162 
1163   bool hasPackedD16() const {
1164     return AMDGPU::hasPackedD16(getSTI());
1165   }
1166 
1167   bool hasGFX10A16() const {
1168     return AMDGPU::hasGFX10A16(getSTI());
1169   }
1170 
1171   bool isSI() const {
1172     return AMDGPU::isSI(getSTI());
1173   }
1174 
1175   bool isCI() const {
1176     return AMDGPU::isCI(getSTI());
1177   }
1178 
1179   bool isVI() const {
1180     return AMDGPU::isVI(getSTI());
1181   }
1182 
1183   bool isGFX9() const {
1184     return AMDGPU::isGFX9(getSTI());
1185   }
1186 
1187   bool isGFX10() const {
1188     return AMDGPU::isGFX10(getSTI());
1189   }
1190 
1191   bool isGFX10_BEncoding() const {
1192     return AMDGPU::isGFX10_BEncoding(getSTI());
1193   }
1194 
1195   bool hasInv2PiInlineImm() const {
1196     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1197   }
1198 
1199   bool hasFlatOffsets() const {
1200     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1201   }
1202 
1203   bool hasSGPR102_SGPR103() const {
1204     return !isVI() && !isGFX9();
1205   }
1206 
1207   bool hasSGPR104_SGPR105() const {
1208     return isGFX10();
1209   }
1210 
1211   bool hasIntClamp() const {
1212     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1213   }
1214 
1215   AMDGPUTargetStreamer &getTargetStreamer() {
1216     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1217     return static_cast<AMDGPUTargetStreamer &>(TS);
1218   }
1219 
1220   const MCRegisterInfo *getMRI() const {
1221     // We need this const_cast because for some reason getContext() is not const
1222     // in MCAsmParser.
1223     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1224   }
1225 
1226   const MCInstrInfo *getMII() const {
1227     return &MII;
1228   }
1229 
1230   const FeatureBitset &getFeatureBits() const {
1231     return getSTI().getFeatureBits();
1232   }
1233 
1234   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1235   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1236   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1237 
1238   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1239   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1240   bool isForcedDPP() const { return ForcedDPP; }
1241   bool isForcedSDWA() const { return ForcedSDWA; }
1242   ArrayRef<unsigned> getMatchedVariants() const;
1243 
1244   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1245   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1246                      bool RestoreOnFailure);
1247   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1248   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1249                                         SMLoc &EndLoc) override;
1250   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1251   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1252                                       unsigned Kind) override;
1253   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1254                                OperandVector &Operands, MCStreamer &Out,
1255                                uint64_t &ErrorInfo,
1256                                bool MatchingInlineAsm) override;
1257   bool ParseDirective(AsmToken DirectiveID) override;
1258   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1259                                     OperandMode Mode = OperandMode_Default);
1260   StringRef parseMnemonicSuffix(StringRef Name);
1261   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1262                         SMLoc NameLoc, OperandVector &Operands) override;
1263   //bool ProcessInstruction(MCInst &Inst);
1264 
1265   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1266 
1267   OperandMatchResultTy
1268   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1269                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1270                      bool (*ConvertResult)(int64_t &) = nullptr);
1271 
1272   OperandMatchResultTy
1273   parseOperandArrayWithPrefix(const char *Prefix,
1274                               OperandVector &Operands,
1275                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1276                               bool (*ConvertResult)(int64_t&) = nullptr);
1277 
1278   OperandMatchResultTy
1279   parseNamedBit(const char *Name, OperandVector &Operands,
1280                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1281   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1282                                              StringRef &Value);
1283 
1284   bool isModifier();
1285   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1286   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1287   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1288   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1289   bool parseSP3NegModifier();
1290   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1291   OperandMatchResultTy parseReg(OperandVector &Operands);
1292   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1293   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1294   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1295   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1296   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1297   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1298   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1299 
1300   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1301   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1302   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1303   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1304 
1305   bool parseCnt(int64_t &IntVal);
1306   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1307   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1308 
1309 private:
1310   struct OperandInfoTy {
1311     int64_t Id;
1312     bool IsSymbolic = false;
1313     bool IsDefined = false;
1314 
1315     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1316   };
1317 
1318   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1319   bool validateSendMsg(const OperandInfoTy &Msg,
1320                        const OperandInfoTy &Op,
1321                        const OperandInfoTy &Stream,
1322                        const SMLoc Loc);
1323 
1324   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1325   bool validateHwreg(const OperandInfoTy &HwReg,
1326                      const int64_t Offset,
1327                      const int64_t Width,
1328                      const SMLoc Loc);
1329 
1330   void errorExpTgt();
1331   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1332   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1333   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1334 
1335   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1336   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1337   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1338   bool validateSOPLiteral(const MCInst &Inst) const;
1339   bool validateConstantBusLimitations(const MCInst &Inst);
1340   bool validateEarlyClobberLimitations(const MCInst &Inst);
1341   bool validateIntClampSupported(const MCInst &Inst);
1342   bool validateMIMGAtomicDMask(const MCInst &Inst);
1343   bool validateMIMGGatherDMask(const MCInst &Inst);
1344   bool validateMovrels(const MCInst &Inst);
1345   bool validateMIMGDataSize(const MCInst &Inst);
1346   bool validateMIMGAddrSize(const MCInst &Inst);
1347   bool validateMIMGD16(const MCInst &Inst);
1348   bool validateMIMGDim(const MCInst &Inst);
1349   bool validateLdsDirect(const MCInst &Inst);
1350   bool validateOpSel(const MCInst &Inst);
1351   bool validateVccOperand(unsigned Reg) const;
1352   bool validateVOP3Literal(const MCInst &Inst) const;
1353   bool validateMAIAccWrite(const MCInst &Inst);
1354   unsigned getConstantBusLimit(unsigned Opcode) const;
1355   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1356   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1357   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1358 
1359   bool isId(const StringRef Id) const;
1360   bool isId(const AsmToken &Token, const StringRef Id) const;
1361   bool isToken(const AsmToken::TokenKind Kind) const;
1362   bool trySkipId(const StringRef Id);
1363   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1364   bool trySkipToken(const AsmToken::TokenKind Kind);
1365   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1366   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1367   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1368   AsmToken::TokenKind getTokenKind() const;
1369   bool parseExpr(int64_t &Imm);
1370   bool parseExpr(OperandVector &Operands);
1371   StringRef getTokenStr() const;
1372   AsmToken peekToken();
1373   AsmToken getToken() const;
1374   SMLoc getLoc() const;
1375   void lex();
1376 
1377 public:
1378   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1379   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1380 
1381   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1382   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1383   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1384   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1385   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1386   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1387 
1388   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1389                             const unsigned MinVal,
1390                             const unsigned MaxVal,
1391                             const StringRef ErrMsg);
1392   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1393   bool parseSwizzleOffset(int64_t &Imm);
1394   bool parseSwizzleMacro(int64_t &Imm);
1395   bool parseSwizzleQuadPerm(int64_t &Imm);
1396   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1397   bool parseSwizzleBroadcast(int64_t &Imm);
1398   bool parseSwizzleSwap(int64_t &Imm);
1399   bool parseSwizzleReverse(int64_t &Imm);
1400 
1401   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1402   int64_t parseGPRIdxMacro();
1403 
1404   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1405   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1406   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1407   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1408   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1409 
1410   AMDGPUOperand::Ptr defaultDLC() const;
1411   AMDGPUOperand::Ptr defaultGLC() const;
1412   AMDGPUOperand::Ptr defaultSLC() const;
1413 
1414   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1415   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1416   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1417   AMDGPUOperand::Ptr defaultFlatOffset() const;
1418 
1419   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1420 
1421   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1422                OptionalImmIndexMap &OptionalIdx);
1423   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1424   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1425   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1426 
1427   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1428 
1429   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1430                bool IsAtomic = false);
1431   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1432 
1433   OperandMatchResultTy parseDim(OperandVector &Operands);
1434   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1435   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1436   AMDGPUOperand::Ptr defaultRowMask() const;
1437   AMDGPUOperand::Ptr defaultBankMask() const;
1438   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1439   AMDGPUOperand::Ptr defaultFI() const;
1440   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1441   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1442 
1443   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1444                                     AMDGPUOperand::ImmTy Type);
1445   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1446   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1447   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1448   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1449   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1450   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1451   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1452                uint64_t BasicInstType,
1453                bool SkipDstVcc = false,
1454                bool SkipSrcVcc = false);
1455 
1456   AMDGPUOperand::Ptr defaultBLGP() const;
1457   AMDGPUOperand::Ptr defaultCBSZ() const;
1458   AMDGPUOperand::Ptr defaultABID() const;
1459 
1460   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1461   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1462 };
1463 
1464 struct OptionalOperand {
1465   const char *Name;
1466   AMDGPUOperand::ImmTy Type;
1467   bool IsBit;
1468   bool (*ConvertResult)(int64_t&);
1469 };
1470 
1471 } // end anonymous namespace
1472 
1473 // May be called with integer type with equivalent bitwidth.
1474 static const fltSemantics *getFltSemantics(unsigned Size) {
1475   switch (Size) {
1476   case 4:
1477     return &APFloat::IEEEsingle();
1478   case 8:
1479     return &APFloat::IEEEdouble();
1480   case 2:
1481     return &APFloat::IEEEhalf();
1482   default:
1483     llvm_unreachable("unsupported fp type");
1484   }
1485 }
1486 
1487 static const fltSemantics *getFltSemantics(MVT VT) {
1488   return getFltSemantics(VT.getSizeInBits() / 8);
1489 }
1490 
1491 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1492   switch (OperandType) {
1493   case AMDGPU::OPERAND_REG_IMM_INT32:
1494   case AMDGPU::OPERAND_REG_IMM_FP32:
1495   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1496   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1497   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1498   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1499     return &APFloat::IEEEsingle();
1500   case AMDGPU::OPERAND_REG_IMM_INT64:
1501   case AMDGPU::OPERAND_REG_IMM_FP64:
1502   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1503   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1504     return &APFloat::IEEEdouble();
1505   case AMDGPU::OPERAND_REG_IMM_INT16:
1506   case AMDGPU::OPERAND_REG_IMM_FP16:
1507   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1508   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1509   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1510   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1511   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1512   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1513   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1514   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1515   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1516   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1517     return &APFloat::IEEEhalf();
1518   default:
1519     llvm_unreachable("unsupported fp type");
1520   }
1521 }
1522 
1523 //===----------------------------------------------------------------------===//
1524 // Operand
1525 //===----------------------------------------------------------------------===//
1526 
1527 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1528   bool Lost;
1529 
1530   // Convert literal to single precision
1531   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1532                                                APFloat::rmNearestTiesToEven,
1533                                                &Lost);
1534   // We allow precision lost but not overflow or underflow
1535   if (Status != APFloat::opOK &&
1536       Lost &&
1537       ((Status & APFloat::opOverflow)  != 0 ||
1538        (Status & APFloat::opUnderflow) != 0)) {
1539     return false;
1540   }
1541 
1542   return true;
1543 }
1544 
1545 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1546   return isUIntN(Size, Val) || isIntN(Size, Val);
1547 }
1548 
1549 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1550 
1551   // This is a hack to enable named inline values like
1552   // shared_base with both 32-bit and 64-bit operands.
1553   // Note that these values are defined as
1554   // 32-bit operands only.
1555   if (isInlineValue()) {
1556     return true;
1557   }
1558 
1559   if (!isImmTy(ImmTyNone)) {
1560     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1561     return false;
1562   }
1563   // TODO: We should avoid using host float here. It would be better to
1564   // check the float bit values which is what a few other places do.
1565   // We've had bot failures before due to weird NaN support on mips hosts.
1566 
1567   APInt Literal(64, Imm.Val);
1568 
1569   if (Imm.IsFPImm) { // We got fp literal token
1570     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1571       return AMDGPU::isInlinableLiteral64(Imm.Val,
1572                                           AsmParser->hasInv2PiInlineImm());
1573     }
1574 
1575     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1576     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1577       return false;
1578 
1579     if (type.getScalarSizeInBits() == 16) {
1580       return AMDGPU::isInlinableLiteral16(
1581         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1582         AsmParser->hasInv2PiInlineImm());
1583     }
1584 
1585     // Check if single precision literal is inlinable
1586     return AMDGPU::isInlinableLiteral32(
1587       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1588       AsmParser->hasInv2PiInlineImm());
1589   }
1590 
1591   // We got int literal token.
1592   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1593     return AMDGPU::isInlinableLiteral64(Imm.Val,
1594                                         AsmParser->hasInv2PiInlineImm());
1595   }
1596 
1597   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1598     return false;
1599   }
1600 
1601   if (type.getScalarSizeInBits() == 16) {
1602     return AMDGPU::isInlinableLiteral16(
1603       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1604       AsmParser->hasInv2PiInlineImm());
1605   }
1606 
1607   return AMDGPU::isInlinableLiteral32(
1608     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1609     AsmParser->hasInv2PiInlineImm());
1610 }
1611 
1612 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1613   // Check that this immediate can be added as literal
1614   if (!isImmTy(ImmTyNone)) {
1615     return false;
1616   }
1617 
1618   if (!Imm.IsFPImm) {
1619     // We got int literal token.
1620 
1621     if (type == MVT::f64 && hasFPModifiers()) {
1622       // Cannot apply fp modifiers to int literals preserving the same semantics
1623       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1624       // disable these cases.
1625       return false;
1626     }
1627 
1628     unsigned Size = type.getSizeInBits();
1629     if (Size == 64)
1630       Size = 32;
1631 
1632     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1633     // types.
1634     return isSafeTruncation(Imm.Val, Size);
1635   }
1636 
1637   // We got fp literal token
1638   if (type == MVT::f64) { // Expected 64-bit fp operand
1639     // We would set low 64-bits of literal to zeroes but we accept this literals
1640     return true;
1641   }
1642 
1643   if (type == MVT::i64) { // Expected 64-bit int operand
1644     // We don't allow fp literals in 64-bit integer instructions. It is
1645     // unclear how we should encode them.
1646     return false;
1647   }
1648 
1649   // We allow fp literals with f16x2 operands assuming that the specified
1650   // literal goes into the lower half and the upper half is zero. We also
1651   // require that the literal may be losslesly converted to f16.
1652   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1653                      (type == MVT::v2i16)? MVT::i16 : type;
1654 
1655   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1656   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1657 }
1658 
1659 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1660   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1661 }
1662 
1663 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1664   if (AsmParser->isVI())
1665     return isVReg32();
1666   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1667     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1668   else
1669     return false;
1670 }
1671 
1672 bool AMDGPUOperand::isSDWAFP16Operand() const {
1673   return isSDWAOperand(MVT::f16);
1674 }
1675 
1676 bool AMDGPUOperand::isSDWAFP32Operand() const {
1677   return isSDWAOperand(MVT::f32);
1678 }
1679 
1680 bool AMDGPUOperand::isSDWAInt16Operand() const {
1681   return isSDWAOperand(MVT::i16);
1682 }
1683 
1684 bool AMDGPUOperand::isSDWAInt32Operand() const {
1685   return isSDWAOperand(MVT::i32);
1686 }
1687 
1688 bool AMDGPUOperand::isBoolReg() const {
1689   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1690          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1691 }
1692 
1693 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1694 {
1695   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1696   assert(Size == 2 || Size == 4 || Size == 8);
1697 
1698   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1699 
1700   if (Imm.Mods.Abs) {
1701     Val &= ~FpSignMask;
1702   }
1703   if (Imm.Mods.Neg) {
1704     Val ^= FpSignMask;
1705   }
1706 
1707   return Val;
1708 }
1709 
1710 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1711   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1712                              Inst.getNumOperands())) {
1713     addLiteralImmOperand(Inst, Imm.Val,
1714                          ApplyModifiers &
1715                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1716   } else {
1717     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1718     Inst.addOperand(MCOperand::createImm(Imm.Val));
1719   }
1720 }
1721 
1722 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1723   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1724   auto OpNum = Inst.getNumOperands();
1725   // Check that this operand accepts literals
1726   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1727 
1728   if (ApplyModifiers) {
1729     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1730     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1731     Val = applyInputFPModifiers(Val, Size);
1732   }
1733 
1734   APInt Literal(64, Val);
1735   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1736 
1737   if (Imm.IsFPImm) { // We got fp literal token
1738     switch (OpTy) {
1739     case AMDGPU::OPERAND_REG_IMM_INT64:
1740     case AMDGPU::OPERAND_REG_IMM_FP64:
1741     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1742     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1743       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1744                                        AsmParser->hasInv2PiInlineImm())) {
1745         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1746         return;
1747       }
1748 
1749       // Non-inlineable
1750       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1751         // For fp operands we check if low 32 bits are zeros
1752         if (Literal.getLoBits(32) != 0) {
1753           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1754           "Can't encode literal as exact 64-bit floating-point operand. "
1755           "Low 32-bits will be set to zero");
1756         }
1757 
1758         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1759         return;
1760       }
1761 
1762       // We don't allow fp literals in 64-bit integer instructions. It is
1763       // unclear how we should encode them. This case should be checked earlier
1764       // in predicate methods (isLiteralImm())
1765       llvm_unreachable("fp literal in 64-bit integer instruction.");
1766 
1767     case AMDGPU::OPERAND_REG_IMM_INT32:
1768     case AMDGPU::OPERAND_REG_IMM_FP32:
1769     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1770     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1771     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1772     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1773     case AMDGPU::OPERAND_REG_IMM_INT16:
1774     case AMDGPU::OPERAND_REG_IMM_FP16:
1775     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1776     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1777     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1778     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1779     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1780     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1781     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1782     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1783     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1784     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1785       bool lost;
1786       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1787       // Convert literal to single precision
1788       FPLiteral.convert(*getOpFltSemantics(OpTy),
1789                         APFloat::rmNearestTiesToEven, &lost);
1790       // We allow precision lost but not overflow or underflow. This should be
1791       // checked earlier in isLiteralImm()
1792 
1793       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1794       Inst.addOperand(MCOperand::createImm(ImmVal));
1795       return;
1796     }
1797     default:
1798       llvm_unreachable("invalid operand size");
1799     }
1800 
1801     return;
1802   }
1803 
1804   // We got int literal token.
1805   // Only sign extend inline immediates.
1806   switch (OpTy) {
1807   case AMDGPU::OPERAND_REG_IMM_INT32:
1808   case AMDGPU::OPERAND_REG_IMM_FP32:
1809   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1810   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1811   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1812   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1813   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1814   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1815     if (isSafeTruncation(Val, 32) &&
1816         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1817                                      AsmParser->hasInv2PiInlineImm())) {
1818       Inst.addOperand(MCOperand::createImm(Val));
1819       return;
1820     }
1821 
1822     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1823     return;
1824 
1825   case AMDGPU::OPERAND_REG_IMM_INT64:
1826   case AMDGPU::OPERAND_REG_IMM_FP64:
1827   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1828   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1829     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1830       Inst.addOperand(MCOperand::createImm(Val));
1831       return;
1832     }
1833 
1834     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1835     return;
1836 
1837   case AMDGPU::OPERAND_REG_IMM_INT16:
1838   case AMDGPU::OPERAND_REG_IMM_FP16:
1839   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1840   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1841   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1842   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1843     if (isSafeTruncation(Val, 16) &&
1844         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1845                                      AsmParser->hasInv2PiInlineImm())) {
1846       Inst.addOperand(MCOperand::createImm(Val));
1847       return;
1848     }
1849 
1850     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1851     return;
1852 
1853   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1854   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1855   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1856   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1857     assert(isSafeTruncation(Val, 16));
1858     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1859                                         AsmParser->hasInv2PiInlineImm()));
1860 
1861     Inst.addOperand(MCOperand::createImm(Val));
1862     return;
1863   }
1864   default:
1865     llvm_unreachable("invalid operand size");
1866   }
1867 }
1868 
1869 template <unsigned Bitwidth>
1870 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1871   APInt Literal(64, Imm.Val);
1872 
1873   if (!Imm.IsFPImm) {
1874     // We got int literal token.
1875     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1876     return;
1877   }
1878 
1879   bool Lost;
1880   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1881   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1882                     APFloat::rmNearestTiesToEven, &Lost);
1883   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1884 }
1885 
1886 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1887   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1888 }
1889 
1890 static bool isInlineValue(unsigned Reg) {
1891   switch (Reg) {
1892   case AMDGPU::SRC_SHARED_BASE:
1893   case AMDGPU::SRC_SHARED_LIMIT:
1894   case AMDGPU::SRC_PRIVATE_BASE:
1895   case AMDGPU::SRC_PRIVATE_LIMIT:
1896   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1897     return true;
1898   case AMDGPU::SRC_VCCZ:
1899   case AMDGPU::SRC_EXECZ:
1900   case AMDGPU::SRC_SCC:
1901     return true;
1902   case AMDGPU::SGPR_NULL:
1903     return true;
1904   default:
1905     return false;
1906   }
1907 }
1908 
1909 bool AMDGPUOperand::isInlineValue() const {
1910   return isRegKind() && ::isInlineValue(getReg());
1911 }
1912 
1913 //===----------------------------------------------------------------------===//
1914 // AsmParser
1915 //===----------------------------------------------------------------------===//
1916 
1917 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1918   if (Is == IS_VGPR) {
1919     switch (RegWidth) {
1920       default: return -1;
1921       case 1: return AMDGPU::VGPR_32RegClassID;
1922       case 2: return AMDGPU::VReg_64RegClassID;
1923       case 3: return AMDGPU::VReg_96RegClassID;
1924       case 4: return AMDGPU::VReg_128RegClassID;
1925       case 5: return AMDGPU::VReg_160RegClassID;
1926       case 6: return AMDGPU::VReg_192RegClassID;
1927       case 8: return AMDGPU::VReg_256RegClassID;
1928       case 16: return AMDGPU::VReg_512RegClassID;
1929       case 32: return AMDGPU::VReg_1024RegClassID;
1930     }
1931   } else if (Is == IS_TTMP) {
1932     switch (RegWidth) {
1933       default: return -1;
1934       case 1: return AMDGPU::TTMP_32RegClassID;
1935       case 2: return AMDGPU::TTMP_64RegClassID;
1936       case 4: return AMDGPU::TTMP_128RegClassID;
1937       case 8: return AMDGPU::TTMP_256RegClassID;
1938       case 16: return AMDGPU::TTMP_512RegClassID;
1939     }
1940   } else if (Is == IS_SGPR) {
1941     switch (RegWidth) {
1942       default: return -1;
1943       case 1: return AMDGPU::SGPR_32RegClassID;
1944       case 2: return AMDGPU::SGPR_64RegClassID;
1945       case 3: return AMDGPU::SGPR_96RegClassID;
1946       case 4: return AMDGPU::SGPR_128RegClassID;
1947       case 5: return AMDGPU::SGPR_160RegClassID;
1948       case 6: return AMDGPU::SGPR_192RegClassID;
1949       case 8: return AMDGPU::SGPR_256RegClassID;
1950       case 16: return AMDGPU::SGPR_512RegClassID;
1951     }
1952   } else if (Is == IS_AGPR) {
1953     switch (RegWidth) {
1954       default: return -1;
1955       case 1: return AMDGPU::AGPR_32RegClassID;
1956       case 2: return AMDGPU::AReg_64RegClassID;
1957       case 3: return AMDGPU::AReg_96RegClassID;
1958       case 4: return AMDGPU::AReg_128RegClassID;
1959       case 5: return AMDGPU::AReg_160RegClassID;
1960       case 6: return AMDGPU::AReg_192RegClassID;
1961       case 8: return AMDGPU::AReg_256RegClassID;
1962       case 16: return AMDGPU::AReg_512RegClassID;
1963       case 32: return AMDGPU::AReg_1024RegClassID;
1964     }
1965   }
1966   return -1;
1967 }
1968 
1969 static unsigned getSpecialRegForName(StringRef RegName) {
1970   return StringSwitch<unsigned>(RegName)
1971     .Case("exec", AMDGPU::EXEC)
1972     .Case("vcc", AMDGPU::VCC)
1973     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1974     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1975     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1976     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1977     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1978     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1979     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1980     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1981     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1982     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1983     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1984     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1985     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1986     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1987     .Case("m0", AMDGPU::M0)
1988     .Case("vccz", AMDGPU::SRC_VCCZ)
1989     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1990     .Case("execz", AMDGPU::SRC_EXECZ)
1991     .Case("src_execz", AMDGPU::SRC_EXECZ)
1992     .Case("scc", AMDGPU::SRC_SCC)
1993     .Case("src_scc", AMDGPU::SRC_SCC)
1994     .Case("tba", AMDGPU::TBA)
1995     .Case("tma", AMDGPU::TMA)
1996     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1997     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1998     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1999     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2000     .Case("vcc_lo", AMDGPU::VCC_LO)
2001     .Case("vcc_hi", AMDGPU::VCC_HI)
2002     .Case("exec_lo", AMDGPU::EXEC_LO)
2003     .Case("exec_hi", AMDGPU::EXEC_HI)
2004     .Case("tma_lo", AMDGPU::TMA_LO)
2005     .Case("tma_hi", AMDGPU::TMA_HI)
2006     .Case("tba_lo", AMDGPU::TBA_LO)
2007     .Case("tba_hi", AMDGPU::TBA_HI)
2008     .Case("pc", AMDGPU::PC_REG)
2009     .Case("null", AMDGPU::SGPR_NULL)
2010     .Default(AMDGPU::NoRegister);
2011 }
2012 
2013 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2014                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2015   auto R = parseRegister();
2016   if (!R) return true;
2017   assert(R->isReg());
2018   RegNo = R->getReg();
2019   StartLoc = R->getStartLoc();
2020   EndLoc = R->getEndLoc();
2021   return false;
2022 }
2023 
2024 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2025                                     SMLoc &EndLoc) {
2026   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2027 }
2028 
2029 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2030                                                        SMLoc &StartLoc,
2031                                                        SMLoc &EndLoc) {
2032   bool Result =
2033       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2034   bool PendingErrors = getParser().hasPendingError();
2035   getParser().clearPendingErrors();
2036   if (PendingErrors)
2037     return MatchOperand_ParseFail;
2038   if (Result)
2039     return MatchOperand_NoMatch;
2040   return MatchOperand_Success;
2041 }
2042 
2043 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2044                                             RegisterKind RegKind, unsigned Reg1) {
2045   switch (RegKind) {
2046   case IS_SPECIAL:
2047     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2048       Reg = AMDGPU::EXEC;
2049       RegWidth = 2;
2050       return true;
2051     }
2052     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2053       Reg = AMDGPU::FLAT_SCR;
2054       RegWidth = 2;
2055       return true;
2056     }
2057     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2058       Reg = AMDGPU::XNACK_MASK;
2059       RegWidth = 2;
2060       return true;
2061     }
2062     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2063       Reg = AMDGPU::VCC;
2064       RegWidth = 2;
2065       return true;
2066     }
2067     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2068       Reg = AMDGPU::TBA;
2069       RegWidth = 2;
2070       return true;
2071     }
2072     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2073       Reg = AMDGPU::TMA;
2074       RegWidth = 2;
2075       return true;
2076     }
2077     return false;
2078   case IS_VGPR:
2079   case IS_SGPR:
2080   case IS_AGPR:
2081   case IS_TTMP:
2082     if (Reg1 != Reg + RegWidth) {
2083       return false;
2084     }
2085     RegWidth++;
2086     return true;
2087   default:
2088     llvm_unreachable("unexpected register kind");
2089   }
2090 }
2091 
2092 struct RegInfo {
2093   StringLiteral Name;
2094   RegisterKind Kind;
2095 };
2096 
2097 static constexpr RegInfo RegularRegisters[] = {
2098   {{"v"},    IS_VGPR},
2099   {{"s"},    IS_SGPR},
2100   {{"ttmp"}, IS_TTMP},
2101   {{"acc"},  IS_AGPR},
2102   {{"a"},    IS_AGPR},
2103 };
2104 
2105 static bool isRegularReg(RegisterKind Kind) {
2106   return Kind == IS_VGPR ||
2107          Kind == IS_SGPR ||
2108          Kind == IS_TTMP ||
2109          Kind == IS_AGPR;
2110 }
2111 
2112 static const RegInfo* getRegularRegInfo(StringRef Str) {
2113   for (const RegInfo &Reg : RegularRegisters)
2114     if (Str.startswith(Reg.Name))
2115       return &Reg;
2116   return nullptr;
2117 }
2118 
2119 static bool getRegNum(StringRef Str, unsigned& Num) {
2120   return !Str.getAsInteger(10, Num);
2121 }
2122 
2123 bool
2124 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2125                             const AsmToken &NextToken) const {
2126 
2127   // A list of consecutive registers: [s0,s1,s2,s3]
2128   if (Token.is(AsmToken::LBrac))
2129     return true;
2130 
2131   if (!Token.is(AsmToken::Identifier))
2132     return false;
2133 
2134   // A single register like s0 or a range of registers like s[0:1]
2135 
2136   StringRef Str = Token.getString();
2137   const RegInfo *Reg = getRegularRegInfo(Str);
2138   if (Reg) {
2139     StringRef RegName = Reg->Name;
2140     StringRef RegSuffix = Str.substr(RegName.size());
2141     if (!RegSuffix.empty()) {
2142       unsigned Num;
2143       // A single register with an index: rXX
2144       if (getRegNum(RegSuffix, Num))
2145         return true;
2146     } else {
2147       // A range of registers: r[XX:YY].
2148       if (NextToken.is(AsmToken::LBrac))
2149         return true;
2150     }
2151   }
2152 
2153   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2154 }
2155 
2156 bool
2157 AMDGPUAsmParser::isRegister()
2158 {
2159   return isRegister(getToken(), peekToken());
2160 }
2161 
2162 unsigned
2163 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2164                                unsigned RegNum,
2165                                unsigned RegWidth) {
2166 
2167   assert(isRegularReg(RegKind));
2168 
2169   unsigned AlignSize = 1;
2170   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2171     // SGPR and TTMP registers must be aligned.
2172     // Max required alignment is 4 dwords.
2173     AlignSize = std::min(RegWidth, 4u);
2174   }
2175 
2176   if (RegNum % AlignSize != 0)
2177     return AMDGPU::NoRegister;
2178 
2179   unsigned RegIdx = RegNum / AlignSize;
2180   int RCID = getRegClass(RegKind, RegWidth);
2181   if (RCID == -1)
2182     return AMDGPU::NoRegister;
2183 
2184   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2185   const MCRegisterClass RC = TRI->getRegClass(RCID);
2186   if (RegIdx >= RC.getNumRegs())
2187     return AMDGPU::NoRegister;
2188 
2189   return RC.getRegister(RegIdx);
2190 }
2191 
2192 bool
2193 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2194   int64_t RegLo, RegHi;
2195   if (!trySkipToken(AsmToken::LBrac))
2196     return false;
2197 
2198   if (!parseExpr(RegLo))
2199     return false;
2200 
2201   if (trySkipToken(AsmToken::Colon)) {
2202     if (!parseExpr(RegHi))
2203       return false;
2204   } else {
2205     RegHi = RegLo;
2206   }
2207 
2208   if (!trySkipToken(AsmToken::RBrac))
2209     return false;
2210 
2211   if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi)
2212     return false;
2213 
2214   Num = static_cast<unsigned>(RegLo);
2215   Width = (RegHi - RegLo) + 1;
2216   return true;
2217 }
2218 
2219 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2220                                           unsigned &RegNum, unsigned &RegWidth,
2221                                           SmallVectorImpl<AsmToken> &Tokens) {
2222   assert(isToken(AsmToken::Identifier));
2223   unsigned Reg = getSpecialRegForName(getTokenStr());
2224   if (Reg) {
2225     RegNum = 0;
2226     RegWidth = 1;
2227     RegKind = IS_SPECIAL;
2228     Tokens.push_back(getToken());
2229     lex(); // skip register name
2230   }
2231   return Reg;
2232 }
2233 
2234 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2235                                           unsigned &RegNum, unsigned &RegWidth,
2236                                           SmallVectorImpl<AsmToken> &Tokens) {
2237   assert(isToken(AsmToken::Identifier));
2238   StringRef RegName = getTokenStr();
2239 
2240   const RegInfo *RI = getRegularRegInfo(RegName);
2241   if (!RI)
2242     return AMDGPU::NoRegister;
2243   Tokens.push_back(getToken());
2244   lex(); // skip register name
2245 
2246   RegKind = RI->Kind;
2247   StringRef RegSuffix = RegName.substr(RI->Name.size());
2248   if (!RegSuffix.empty()) {
2249     // Single 32-bit register: vXX.
2250     if (!getRegNum(RegSuffix, RegNum))
2251       return AMDGPU::NoRegister;
2252     RegWidth = 1;
2253   } else {
2254     // Range of registers: v[XX:YY]. ":YY" is optional.
2255     if (!ParseRegRange(RegNum, RegWidth))
2256       return AMDGPU::NoRegister;
2257   }
2258 
2259   return getRegularReg(RegKind, RegNum, RegWidth);
2260 }
2261 
2262 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2263                                        unsigned &RegWidth,
2264                                        SmallVectorImpl<AsmToken> &Tokens) {
2265   unsigned Reg = AMDGPU::NoRegister;
2266 
2267   if (!trySkipToken(AsmToken::LBrac))
2268     return AMDGPU::NoRegister;
2269 
2270   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2271 
2272   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2273     return AMDGPU::NoRegister;
2274   if (RegWidth != 1)
2275     return AMDGPU::NoRegister;
2276 
2277   for (; trySkipToken(AsmToken::Comma); ) {
2278     RegisterKind NextRegKind;
2279     unsigned NextReg, NextRegNum, NextRegWidth;
2280 
2281     if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth,
2282                              Tokens))
2283       return AMDGPU::NoRegister;
2284     if (NextRegWidth != 1)
2285       return AMDGPU::NoRegister;
2286     if (NextRegKind != RegKind)
2287       return AMDGPU::NoRegister;
2288     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg))
2289       return AMDGPU::NoRegister;
2290   }
2291 
2292   if (!trySkipToken(AsmToken::RBrac))
2293     return AMDGPU::NoRegister;
2294 
2295   if (isRegularReg(RegKind))
2296     Reg = getRegularReg(RegKind, RegNum, RegWidth);
2297 
2298   return Reg;
2299 }
2300 
2301 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2302                                           unsigned &RegNum, unsigned &RegWidth,
2303                                           SmallVectorImpl<AsmToken> &Tokens) {
2304   Reg = AMDGPU::NoRegister;
2305 
2306   if (isToken(AsmToken::Identifier)) {
2307     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2308     if (Reg == AMDGPU::NoRegister)
2309       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2310   } else {
2311     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2312   }
2313 
2314   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2315   return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg);
2316 }
2317 
2318 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2319                                           unsigned &RegNum, unsigned &RegWidth,
2320                                           bool RestoreOnFailure) {
2321   Reg = AMDGPU::NoRegister;
2322 
2323   SmallVector<AsmToken, 1> Tokens;
2324   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2325     if (RestoreOnFailure) {
2326       while (!Tokens.empty()) {
2327         getLexer().UnLex(Tokens.pop_back_val());
2328       }
2329     }
2330     return true;
2331   }
2332   return false;
2333 }
2334 
2335 Optional<StringRef>
2336 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2337   switch (RegKind) {
2338   case IS_VGPR:
2339     return StringRef(".amdgcn.next_free_vgpr");
2340   case IS_SGPR:
2341     return StringRef(".amdgcn.next_free_sgpr");
2342   default:
2343     return None;
2344   }
2345 }
2346 
2347 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2348   auto SymbolName = getGprCountSymbolName(RegKind);
2349   assert(SymbolName && "initializing invalid register kind");
2350   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2351   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2352 }
2353 
2354 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2355                                             unsigned DwordRegIndex,
2356                                             unsigned RegWidth) {
2357   // Symbols are only defined for GCN targets
2358   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2359     return true;
2360 
2361   auto SymbolName = getGprCountSymbolName(RegKind);
2362   if (!SymbolName)
2363     return true;
2364   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2365 
2366   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2367   int64_t OldCount;
2368 
2369   if (!Sym->isVariable())
2370     return !Error(getParser().getTok().getLoc(),
2371                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2372   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2373     return !Error(
2374         getParser().getTok().getLoc(),
2375         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2376 
2377   if (OldCount <= NewMax)
2378     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2379 
2380   return true;
2381 }
2382 
2383 std::unique_ptr<AMDGPUOperand>
2384 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2385   const auto &Tok = Parser.getTok();
2386   SMLoc StartLoc = Tok.getLoc();
2387   SMLoc EndLoc = Tok.getEndLoc();
2388   RegisterKind RegKind;
2389   unsigned Reg, RegNum, RegWidth;
2390 
2391   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2392     //FIXME: improve error messages (bug 41303).
2393     Error(StartLoc, "not a valid operand.");
2394     return nullptr;
2395   }
2396   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2397     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2398       return nullptr;
2399   } else
2400     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2401   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2402 }
2403 
2404 OperandMatchResultTy
2405 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2406   // TODO: add syntactic sugar for 1/(2*PI)
2407 
2408   assert(!isRegister());
2409   assert(!isModifier());
2410 
2411   const auto& Tok = getToken();
2412   const auto& NextTok = peekToken();
2413   bool IsReal = Tok.is(AsmToken::Real);
2414   SMLoc S = getLoc();
2415   bool Negate = false;
2416 
2417   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2418     lex();
2419     IsReal = true;
2420     Negate = true;
2421   }
2422 
2423   if (IsReal) {
2424     // Floating-point expressions are not supported.
2425     // Can only allow floating-point literals with an
2426     // optional sign.
2427 
2428     StringRef Num = getTokenStr();
2429     lex();
2430 
2431     APFloat RealVal(APFloat::IEEEdouble());
2432     auto roundMode = APFloat::rmNearestTiesToEven;
2433     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2434       return MatchOperand_ParseFail;
2435     }
2436     if (Negate)
2437       RealVal.changeSign();
2438 
2439     Operands.push_back(
2440       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2441                                AMDGPUOperand::ImmTyNone, true));
2442 
2443     return MatchOperand_Success;
2444 
2445   } else {
2446     int64_t IntVal;
2447     const MCExpr *Expr;
2448     SMLoc S = getLoc();
2449 
2450     if (HasSP3AbsModifier) {
2451       // This is a workaround for handling expressions
2452       // as arguments of SP3 'abs' modifier, for example:
2453       //     |1.0|
2454       //     |-1|
2455       //     |1+x|
2456       // This syntax is not compatible with syntax of standard
2457       // MC expressions (due to the trailing '|').
2458       SMLoc EndLoc;
2459       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2460         return MatchOperand_ParseFail;
2461     } else {
2462       if (Parser.parseExpression(Expr))
2463         return MatchOperand_ParseFail;
2464     }
2465 
2466     if (Expr->evaluateAsAbsolute(IntVal)) {
2467       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2468     } else {
2469       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2470     }
2471 
2472     return MatchOperand_Success;
2473   }
2474 
2475   return MatchOperand_NoMatch;
2476 }
2477 
2478 OperandMatchResultTy
2479 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2480   if (!isRegister())
2481     return MatchOperand_NoMatch;
2482 
2483   if (auto R = parseRegister()) {
2484     assert(R->isReg());
2485     Operands.push_back(std::move(R));
2486     return MatchOperand_Success;
2487   }
2488   return MatchOperand_ParseFail;
2489 }
2490 
2491 OperandMatchResultTy
2492 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2493   auto res = parseReg(Operands);
2494   if (res != MatchOperand_NoMatch) {
2495     return res;
2496   } else if (isModifier()) {
2497     return MatchOperand_NoMatch;
2498   } else {
2499     return parseImm(Operands, HasSP3AbsMod);
2500   }
2501 }
2502 
2503 bool
2504 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2505   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2506     const auto &str = Token.getString();
2507     return str == "abs" || str == "neg" || str == "sext";
2508   }
2509   return false;
2510 }
2511 
2512 bool
2513 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2514   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2515 }
2516 
2517 bool
2518 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2519   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2520 }
2521 
2522 bool
2523 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2524   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2525 }
2526 
2527 // Check if this is an operand modifier or an opcode modifier
2528 // which may look like an expression but it is not. We should
2529 // avoid parsing these modifiers as expressions. Currently
2530 // recognized sequences are:
2531 //   |...|
2532 //   abs(...)
2533 //   neg(...)
2534 //   sext(...)
2535 //   -reg
2536 //   -|...|
2537 //   -abs(...)
2538 //   name:...
2539 // Note that simple opcode modifiers like 'gds' may be parsed as
2540 // expressions; this is a special case. See getExpressionAsToken.
2541 //
2542 bool
2543 AMDGPUAsmParser::isModifier() {
2544 
2545   AsmToken Tok = getToken();
2546   AsmToken NextToken[2];
2547   peekTokens(NextToken);
2548 
2549   return isOperandModifier(Tok, NextToken[0]) ||
2550          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2551          isOpcodeModifierWithVal(Tok, NextToken[0]);
2552 }
2553 
2554 // Check if the current token is an SP3 'neg' modifier.
2555 // Currently this modifier is allowed in the following context:
2556 //
2557 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2558 // 2. Before an 'abs' modifier: -abs(...)
2559 // 3. Before an SP3 'abs' modifier: -|...|
2560 //
2561 // In all other cases "-" is handled as a part
2562 // of an expression that follows the sign.
2563 //
2564 // Note: When "-" is followed by an integer literal,
2565 // this is interpreted as integer negation rather
2566 // than a floating-point NEG modifier applied to N.
2567 // Beside being contr-intuitive, such use of floating-point
2568 // NEG modifier would have resulted in different meaning
2569 // of integer literals used with VOP1/2/C and VOP3,
2570 // for example:
2571 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2572 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2573 // Negative fp literals with preceding "-" are
2574 // handled likewise for unifomtity
2575 //
2576 bool
2577 AMDGPUAsmParser::parseSP3NegModifier() {
2578 
2579   AsmToken NextToken[2];
2580   peekTokens(NextToken);
2581 
2582   if (isToken(AsmToken::Minus) &&
2583       (isRegister(NextToken[0], NextToken[1]) ||
2584        NextToken[0].is(AsmToken::Pipe) ||
2585        isId(NextToken[0], "abs"))) {
2586     lex();
2587     return true;
2588   }
2589 
2590   return false;
2591 }
2592 
2593 OperandMatchResultTy
2594 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2595                                               bool AllowImm) {
2596   bool Neg, SP3Neg;
2597   bool Abs, SP3Abs;
2598   SMLoc Loc;
2599 
2600   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2601   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2602     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2603     return MatchOperand_ParseFail;
2604   }
2605 
2606   SP3Neg = parseSP3NegModifier();
2607 
2608   Loc = getLoc();
2609   Neg = trySkipId("neg");
2610   if (Neg && SP3Neg) {
2611     Error(Loc, "expected register or immediate");
2612     return MatchOperand_ParseFail;
2613   }
2614   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2615     return MatchOperand_ParseFail;
2616 
2617   Abs = trySkipId("abs");
2618   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2619     return MatchOperand_ParseFail;
2620 
2621   Loc = getLoc();
2622   SP3Abs = trySkipToken(AsmToken::Pipe);
2623   if (Abs && SP3Abs) {
2624     Error(Loc, "expected register or immediate");
2625     return MatchOperand_ParseFail;
2626   }
2627 
2628   OperandMatchResultTy Res;
2629   if (AllowImm) {
2630     Res = parseRegOrImm(Operands, SP3Abs);
2631   } else {
2632     Res = parseReg(Operands);
2633   }
2634   if (Res != MatchOperand_Success) {
2635     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2636   }
2637 
2638   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2639     return MatchOperand_ParseFail;
2640   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2641     return MatchOperand_ParseFail;
2642   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2643     return MatchOperand_ParseFail;
2644 
2645   AMDGPUOperand::Modifiers Mods;
2646   Mods.Abs = Abs || SP3Abs;
2647   Mods.Neg = Neg || SP3Neg;
2648 
2649   if (Mods.hasFPModifiers()) {
2650     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2651     if (Op.isExpr()) {
2652       Error(Op.getStartLoc(), "expected an absolute expression");
2653       return MatchOperand_ParseFail;
2654     }
2655     Op.setModifiers(Mods);
2656   }
2657   return MatchOperand_Success;
2658 }
2659 
2660 OperandMatchResultTy
2661 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2662                                                bool AllowImm) {
2663   bool Sext = trySkipId("sext");
2664   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2665     return MatchOperand_ParseFail;
2666 
2667   OperandMatchResultTy Res;
2668   if (AllowImm) {
2669     Res = parseRegOrImm(Operands);
2670   } else {
2671     Res = parseReg(Operands);
2672   }
2673   if (Res != MatchOperand_Success) {
2674     return Sext? MatchOperand_ParseFail : Res;
2675   }
2676 
2677   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2678     return MatchOperand_ParseFail;
2679 
2680   AMDGPUOperand::Modifiers Mods;
2681   Mods.Sext = Sext;
2682 
2683   if (Mods.hasIntModifiers()) {
2684     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2685     if (Op.isExpr()) {
2686       Error(Op.getStartLoc(), "expected an absolute expression");
2687       return MatchOperand_ParseFail;
2688     }
2689     Op.setModifiers(Mods);
2690   }
2691 
2692   return MatchOperand_Success;
2693 }
2694 
2695 OperandMatchResultTy
2696 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2697   return parseRegOrImmWithFPInputMods(Operands, false);
2698 }
2699 
2700 OperandMatchResultTy
2701 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2702   return parseRegOrImmWithIntInputMods(Operands, false);
2703 }
2704 
2705 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2706   auto Loc = getLoc();
2707   if (trySkipId("off")) {
2708     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2709                                                 AMDGPUOperand::ImmTyOff, false));
2710     return MatchOperand_Success;
2711   }
2712 
2713   if (!isRegister())
2714     return MatchOperand_NoMatch;
2715 
2716   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2717   if (Reg) {
2718     Operands.push_back(std::move(Reg));
2719     return MatchOperand_Success;
2720   }
2721 
2722   return MatchOperand_ParseFail;
2723 
2724 }
2725 
2726 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2727   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2728 
2729   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2730       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2731       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2732       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2733     return Match_InvalidOperand;
2734 
2735   if ((TSFlags & SIInstrFlags::VOP3) &&
2736       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2737       getForcedEncodingSize() != 64)
2738     return Match_PreferE32;
2739 
2740   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2741       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2742     // v_mac_f32/16 allow only dst_sel == DWORD;
2743     auto OpNum =
2744         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2745     const auto &Op = Inst.getOperand(OpNum);
2746     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2747       return Match_InvalidOperand;
2748     }
2749   }
2750 
2751   return Match_Success;
2752 }
2753 
2754 // What asm variants we should check
2755 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2756   if (getForcedEncodingSize() == 32) {
2757     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2758     return makeArrayRef(Variants);
2759   }
2760 
2761   if (isForcedVOP3()) {
2762     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2763     return makeArrayRef(Variants);
2764   }
2765 
2766   if (isForcedSDWA()) {
2767     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2768                                         AMDGPUAsmVariants::SDWA9};
2769     return makeArrayRef(Variants);
2770   }
2771 
2772   if (isForcedDPP()) {
2773     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2774     return makeArrayRef(Variants);
2775   }
2776 
2777   static const unsigned Variants[] = {
2778     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2779     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2780   };
2781 
2782   return makeArrayRef(Variants);
2783 }
2784 
2785 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2786   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2787   const unsigned Num = Desc.getNumImplicitUses();
2788   for (unsigned i = 0; i < Num; ++i) {
2789     unsigned Reg = Desc.ImplicitUses[i];
2790     switch (Reg) {
2791     case AMDGPU::FLAT_SCR:
2792     case AMDGPU::VCC:
2793     case AMDGPU::VCC_LO:
2794     case AMDGPU::VCC_HI:
2795     case AMDGPU::M0:
2796       return Reg;
2797     default:
2798       break;
2799     }
2800   }
2801   return AMDGPU::NoRegister;
2802 }
2803 
2804 // NB: This code is correct only when used to check constant
2805 // bus limitations because GFX7 support no f16 inline constants.
2806 // Note that there are no cases when a GFX7 opcode violates
2807 // constant bus limitations due to the use of an f16 constant.
2808 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2809                                        unsigned OpIdx) const {
2810   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2811 
2812   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2813     return false;
2814   }
2815 
2816   const MCOperand &MO = Inst.getOperand(OpIdx);
2817 
2818   int64_t Val = MO.getImm();
2819   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2820 
2821   switch (OpSize) { // expected operand size
2822   case 8:
2823     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2824   case 4:
2825     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2826   case 2: {
2827     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2828     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2829         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2830         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2831         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2832         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2833         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2834       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2835     } else {
2836       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2837     }
2838   }
2839   default:
2840     llvm_unreachable("invalid operand size");
2841   }
2842 }
2843 
2844 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2845   if (!isGFX10())
2846     return 1;
2847 
2848   switch (Opcode) {
2849   // 64-bit shift instructions can use only one scalar value input
2850   case AMDGPU::V_LSHLREV_B64:
2851   case AMDGPU::V_LSHLREV_B64_gfx10:
2852   case AMDGPU::V_LSHL_B64:
2853   case AMDGPU::V_LSHRREV_B64:
2854   case AMDGPU::V_LSHRREV_B64_gfx10:
2855   case AMDGPU::V_LSHR_B64:
2856   case AMDGPU::V_ASHRREV_I64:
2857   case AMDGPU::V_ASHRREV_I64_gfx10:
2858   case AMDGPU::V_ASHR_I64:
2859     return 1;
2860   default:
2861     return 2;
2862   }
2863 }
2864 
2865 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2866   const MCOperand &MO = Inst.getOperand(OpIdx);
2867   if (MO.isImm()) {
2868     return !isInlineConstant(Inst, OpIdx);
2869   } else if (MO.isReg()) {
2870     auto Reg = MO.getReg();
2871     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2872     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2873   } else {
2874     return true;
2875   }
2876 }
2877 
2878 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2879   const unsigned Opcode = Inst.getOpcode();
2880   const MCInstrDesc &Desc = MII.get(Opcode);
2881   unsigned ConstantBusUseCount = 0;
2882   unsigned NumLiterals = 0;
2883   unsigned LiteralSize;
2884 
2885   if (Desc.TSFlags &
2886       (SIInstrFlags::VOPC |
2887        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2888        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2889        SIInstrFlags::SDWA)) {
2890     // Check special imm operands (used by madmk, etc)
2891     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2892       ++ConstantBusUseCount;
2893     }
2894 
2895     SmallDenseSet<unsigned> SGPRsUsed;
2896     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2897     if (SGPRUsed != AMDGPU::NoRegister) {
2898       SGPRsUsed.insert(SGPRUsed);
2899       ++ConstantBusUseCount;
2900     }
2901 
2902     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2903     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2904     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2905 
2906     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2907 
2908     for (int OpIdx : OpIndices) {
2909       if (OpIdx == -1) break;
2910 
2911       const MCOperand &MO = Inst.getOperand(OpIdx);
2912       if (usesConstantBus(Inst, OpIdx)) {
2913         if (MO.isReg()) {
2914           const unsigned Reg = mc2PseudoReg(MO.getReg());
2915           // Pairs of registers with a partial intersections like these
2916           //   s0, s[0:1]
2917           //   flat_scratch_lo, flat_scratch
2918           //   flat_scratch_lo, flat_scratch_hi
2919           // are theoretically valid but they are disabled anyway.
2920           // Note that this code mimics SIInstrInfo::verifyInstruction
2921           if (!SGPRsUsed.count(Reg)) {
2922             SGPRsUsed.insert(Reg);
2923             ++ConstantBusUseCount;
2924           }
2925         } else { // Expression or a literal
2926 
2927           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2928             continue; // special operand like VINTERP attr_chan
2929 
2930           // An instruction may use only one literal.
2931           // This has been validated on the previous step.
2932           // See validateVOP3Literal.
2933           // This literal may be used as more than one operand.
2934           // If all these operands are of the same size,
2935           // this literal counts as one scalar value.
2936           // Otherwise it counts as 2 scalar values.
2937           // See "GFX10 Shader Programming", section 3.6.2.3.
2938 
2939           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2940           if (Size < 4) Size = 4;
2941 
2942           if (NumLiterals == 0) {
2943             NumLiterals = 1;
2944             LiteralSize = Size;
2945           } else if (LiteralSize != Size) {
2946             NumLiterals = 2;
2947           }
2948         }
2949       }
2950     }
2951   }
2952   ConstantBusUseCount += NumLiterals;
2953 
2954   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2955 }
2956 
2957 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2958   const unsigned Opcode = Inst.getOpcode();
2959   const MCInstrDesc &Desc = MII.get(Opcode);
2960 
2961   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2962   if (DstIdx == -1 ||
2963       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2964     return true;
2965   }
2966 
2967   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2968 
2969   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2970   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2971   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2972 
2973   assert(DstIdx != -1);
2974   const MCOperand &Dst = Inst.getOperand(DstIdx);
2975   assert(Dst.isReg());
2976   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2977 
2978   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2979 
2980   for (int SrcIdx : SrcIndices) {
2981     if (SrcIdx == -1) break;
2982     const MCOperand &Src = Inst.getOperand(SrcIdx);
2983     if (Src.isReg()) {
2984       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2985       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2986         return false;
2987       }
2988     }
2989   }
2990 
2991   return true;
2992 }
2993 
2994 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2995 
2996   const unsigned Opc = Inst.getOpcode();
2997   const MCInstrDesc &Desc = MII.get(Opc);
2998 
2999   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3000     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3001     assert(ClampIdx != -1);
3002     return Inst.getOperand(ClampIdx).getImm() == 0;
3003   }
3004 
3005   return true;
3006 }
3007 
3008 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3009 
3010   const unsigned Opc = Inst.getOpcode();
3011   const MCInstrDesc &Desc = MII.get(Opc);
3012 
3013   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3014     return true;
3015 
3016   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3017   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3018   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3019 
3020   assert(VDataIdx != -1);
3021   assert(DMaskIdx != -1);
3022   assert(TFEIdx != -1);
3023 
3024   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3025   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3026   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3027   if (DMask == 0)
3028     DMask = 1;
3029 
3030   unsigned DataSize =
3031     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3032   if (hasPackedD16()) {
3033     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3034     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3035       DataSize = (DataSize + 1) / 2;
3036   }
3037 
3038   return (VDataSize / 4) == DataSize + TFESize;
3039 }
3040 
3041 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3042   const unsigned Opc = Inst.getOpcode();
3043   const MCInstrDesc &Desc = MII.get(Opc);
3044 
3045   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
3046     return true;
3047 
3048   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3049   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3050       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3051   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3052   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3053   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3054 
3055   assert(VAddr0Idx != -1);
3056   assert(SrsrcIdx != -1);
3057   assert(DimIdx != -1);
3058   assert(SrsrcIdx > VAddr0Idx);
3059 
3060   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3061   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3062   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3063   unsigned VAddrSize =
3064       IsNSA ? SrsrcIdx - VAddr0Idx
3065             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3066 
3067   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3068                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3069                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3070                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3071   if (!IsNSA) {
3072     if (AddrSize > 8)
3073       AddrSize = 16;
3074     else if (AddrSize > 4)
3075       AddrSize = 8;
3076   }
3077 
3078   return VAddrSize == AddrSize;
3079 }
3080 
3081 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3082 
3083   const unsigned Opc = Inst.getOpcode();
3084   const MCInstrDesc &Desc = MII.get(Opc);
3085 
3086   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3087     return true;
3088   if (!Desc.mayLoad() || !Desc.mayStore())
3089     return true; // Not atomic
3090 
3091   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3092   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3093 
3094   // This is an incomplete check because image_atomic_cmpswap
3095   // may only use 0x3 and 0xf while other atomic operations
3096   // may use 0x1 and 0x3. However these limitations are
3097   // verified when we check that dmask matches dst size.
3098   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3099 }
3100 
3101 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3102 
3103   const unsigned Opc = Inst.getOpcode();
3104   const MCInstrDesc &Desc = MII.get(Opc);
3105 
3106   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3107     return true;
3108 
3109   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3110   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3111 
3112   // GATHER4 instructions use dmask in a different fashion compared to
3113   // other MIMG instructions. The only useful DMASK values are
3114   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3115   // (red,red,red,red) etc.) The ISA document doesn't mention
3116   // this.
3117   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3118 }
3119 
3120 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3121 {
3122   switch (Opcode) {
3123   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3124   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3125   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3126     return true;
3127   default:
3128     return false;
3129   }
3130 }
3131 
3132 // movrels* opcodes should only allow VGPRS as src0.
3133 // This is specified in .td description for vop1/vop3,
3134 // but sdwa is handled differently. See isSDWAOperand.
3135 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3136 
3137   const unsigned Opc = Inst.getOpcode();
3138   const MCInstrDesc &Desc = MII.get(Opc);
3139 
3140   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3141     return true;
3142 
3143   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3144   assert(Src0Idx != -1);
3145 
3146   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3147   if (!Src0.isReg())
3148     return false;
3149 
3150   auto Reg = Src0.getReg();
3151   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3152   return !isSGPR(mc2PseudoReg(Reg), TRI);
3153 }
3154 
3155 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) {
3156 
3157   const unsigned Opc = Inst.getOpcode();
3158 
3159   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3160     return true;
3161 
3162   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3163   assert(Src0Idx != -1);
3164 
3165   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3166   if (!Src0.isReg())
3167     return true;
3168 
3169   auto Reg = Src0.getReg();
3170   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3171   if (isSGPR(mc2PseudoReg(Reg), TRI)) {
3172     Error(getLoc(), "source operand must be either a VGPR or an inline constant");
3173     return false;
3174   }
3175 
3176   return true;
3177 }
3178 
3179 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3180 
3181   const unsigned Opc = Inst.getOpcode();
3182   const MCInstrDesc &Desc = MII.get(Opc);
3183 
3184   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3185     return true;
3186 
3187   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3188   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3189     if (isCI() || isSI())
3190       return false;
3191   }
3192 
3193   return true;
3194 }
3195 
3196 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3197   const unsigned Opc = Inst.getOpcode();
3198   const MCInstrDesc &Desc = MII.get(Opc);
3199 
3200   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3201     return true;
3202 
3203   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3204   if (DimIdx < 0)
3205     return true;
3206 
3207   long Imm = Inst.getOperand(DimIdx).getImm();
3208   if (Imm < 0 || Imm >= 8)
3209     return false;
3210 
3211   return true;
3212 }
3213 
3214 static bool IsRevOpcode(const unsigned Opcode)
3215 {
3216   switch (Opcode) {
3217   case AMDGPU::V_SUBREV_F32_e32:
3218   case AMDGPU::V_SUBREV_F32_e64:
3219   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3220   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3221   case AMDGPU::V_SUBREV_F32_e32_vi:
3222   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3223   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3224   case AMDGPU::V_SUBREV_F32_e64_vi:
3225 
3226   case AMDGPU::V_SUBREV_I32_e32:
3227   case AMDGPU::V_SUBREV_I32_e64:
3228   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3229   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3230 
3231   case AMDGPU::V_SUBBREV_U32_e32:
3232   case AMDGPU::V_SUBBREV_U32_e64:
3233   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3234   case AMDGPU::V_SUBBREV_U32_e32_vi:
3235   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3236   case AMDGPU::V_SUBBREV_U32_e64_vi:
3237 
3238   case AMDGPU::V_SUBREV_U32_e32:
3239   case AMDGPU::V_SUBREV_U32_e64:
3240   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3241   case AMDGPU::V_SUBREV_U32_e32_vi:
3242   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3243   case AMDGPU::V_SUBREV_U32_e64_vi:
3244 
3245   case AMDGPU::V_SUBREV_F16_e32:
3246   case AMDGPU::V_SUBREV_F16_e64:
3247   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3248   case AMDGPU::V_SUBREV_F16_e32_vi:
3249   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3250   case AMDGPU::V_SUBREV_F16_e64_vi:
3251 
3252   case AMDGPU::V_SUBREV_U16_e32:
3253   case AMDGPU::V_SUBREV_U16_e64:
3254   case AMDGPU::V_SUBREV_U16_e32_vi:
3255   case AMDGPU::V_SUBREV_U16_e64_vi:
3256 
3257   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3258   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3259   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3260 
3261   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3262   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3263 
3264   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3265   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3266 
3267   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3268   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3269 
3270   case AMDGPU::V_LSHRREV_B32_e32:
3271   case AMDGPU::V_LSHRREV_B32_e64:
3272   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3273   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3274   case AMDGPU::V_LSHRREV_B32_e32_vi:
3275   case AMDGPU::V_LSHRREV_B32_e64_vi:
3276   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3277   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3278 
3279   case AMDGPU::V_ASHRREV_I32_e32:
3280   case AMDGPU::V_ASHRREV_I32_e64:
3281   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3282   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3283   case AMDGPU::V_ASHRREV_I32_e32_vi:
3284   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3285   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3286   case AMDGPU::V_ASHRREV_I32_e64_vi:
3287 
3288   case AMDGPU::V_LSHLREV_B32_e32:
3289   case AMDGPU::V_LSHLREV_B32_e64:
3290   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3291   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3292   case AMDGPU::V_LSHLREV_B32_e32_vi:
3293   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3294   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3295   case AMDGPU::V_LSHLREV_B32_e64_vi:
3296 
3297   case AMDGPU::V_LSHLREV_B16_e32:
3298   case AMDGPU::V_LSHLREV_B16_e64:
3299   case AMDGPU::V_LSHLREV_B16_e32_vi:
3300   case AMDGPU::V_LSHLREV_B16_e64_vi:
3301   case AMDGPU::V_LSHLREV_B16_gfx10:
3302 
3303   case AMDGPU::V_LSHRREV_B16_e32:
3304   case AMDGPU::V_LSHRREV_B16_e64:
3305   case AMDGPU::V_LSHRREV_B16_e32_vi:
3306   case AMDGPU::V_LSHRREV_B16_e64_vi:
3307   case AMDGPU::V_LSHRREV_B16_gfx10:
3308 
3309   case AMDGPU::V_ASHRREV_I16_e32:
3310   case AMDGPU::V_ASHRREV_I16_e64:
3311   case AMDGPU::V_ASHRREV_I16_e32_vi:
3312   case AMDGPU::V_ASHRREV_I16_e64_vi:
3313   case AMDGPU::V_ASHRREV_I16_gfx10:
3314 
3315   case AMDGPU::V_LSHLREV_B64:
3316   case AMDGPU::V_LSHLREV_B64_gfx10:
3317   case AMDGPU::V_LSHLREV_B64_vi:
3318 
3319   case AMDGPU::V_LSHRREV_B64:
3320   case AMDGPU::V_LSHRREV_B64_gfx10:
3321   case AMDGPU::V_LSHRREV_B64_vi:
3322 
3323   case AMDGPU::V_ASHRREV_I64:
3324   case AMDGPU::V_ASHRREV_I64_gfx10:
3325   case AMDGPU::V_ASHRREV_I64_vi:
3326 
3327   case AMDGPU::V_PK_LSHLREV_B16:
3328   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3329   case AMDGPU::V_PK_LSHLREV_B16_vi:
3330 
3331   case AMDGPU::V_PK_LSHRREV_B16:
3332   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3333   case AMDGPU::V_PK_LSHRREV_B16_vi:
3334   case AMDGPU::V_PK_ASHRREV_I16:
3335   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3336   case AMDGPU::V_PK_ASHRREV_I16_vi:
3337     return true;
3338   default:
3339     return false;
3340   }
3341 }
3342 
3343 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3344 
3345   using namespace SIInstrFlags;
3346   const unsigned Opcode = Inst.getOpcode();
3347   const MCInstrDesc &Desc = MII.get(Opcode);
3348 
3349   // lds_direct register is defined so that it can be used
3350   // with 9-bit operands only. Ignore encodings which do not accept these.
3351   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3352     return true;
3353 
3354   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3355   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3356   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3357 
3358   const int SrcIndices[] = { Src1Idx, Src2Idx };
3359 
3360   // lds_direct cannot be specified as either src1 or src2.
3361   for (int SrcIdx : SrcIndices) {
3362     if (SrcIdx == -1) break;
3363     const MCOperand &Src = Inst.getOperand(SrcIdx);
3364     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3365       return false;
3366     }
3367   }
3368 
3369   if (Src0Idx == -1)
3370     return true;
3371 
3372   const MCOperand &Src = Inst.getOperand(Src0Idx);
3373   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3374     return true;
3375 
3376   // lds_direct is specified as src0. Check additional limitations.
3377   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3378 }
3379 
3380 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3381   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3382     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3383     if (Op.isFlatOffset())
3384       return Op.getStartLoc();
3385   }
3386   return getLoc();
3387 }
3388 
3389 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3390                                          const OperandVector &Operands) {
3391   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3392   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3393     return true;
3394 
3395   auto Opcode = Inst.getOpcode();
3396   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3397   assert(OpNum != -1);
3398 
3399   const auto &Op = Inst.getOperand(OpNum);
3400   if (!hasFlatOffsets() && Op.getImm() != 0) {
3401     Error(getFlatOffsetLoc(Operands),
3402           "flat offset modifier is not supported on this GPU");
3403     return false;
3404   }
3405 
3406   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3407   // For FLAT segment the offset must be positive;
3408   // MSB is ignored and forced to zero.
3409   unsigned OffsetSize = isGFX9() ? 13 : 12;
3410   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3411     if (!isIntN(OffsetSize, Op.getImm())) {
3412       Error(getFlatOffsetLoc(Operands),
3413             isGFX9() ? "expected a 13-bit signed offset" :
3414                        "expected a 12-bit signed offset");
3415       return false;
3416     }
3417   } else {
3418     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3419       Error(getFlatOffsetLoc(Operands),
3420             isGFX9() ? "expected a 12-bit unsigned offset" :
3421                        "expected an 11-bit unsigned offset");
3422       return false;
3423     }
3424   }
3425 
3426   return true;
3427 }
3428 
3429 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3430   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3431     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3432     if (Op.isSMEMOffset())
3433       return Op.getStartLoc();
3434   }
3435   return getLoc();
3436 }
3437 
3438 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3439                                          const OperandVector &Operands) {
3440   if (isCI() || isSI())
3441     return true;
3442 
3443   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3444   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3445     return true;
3446 
3447   auto Opcode = Inst.getOpcode();
3448   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3449   if (OpNum == -1)
3450     return true;
3451 
3452   const auto &Op = Inst.getOperand(OpNum);
3453   if (!Op.isImm())
3454     return true;
3455 
3456   uint64_t Offset = Op.getImm();
3457   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3458   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3459       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3460     return true;
3461 
3462   Error(getSMEMOffsetLoc(Operands),
3463         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3464                                "expected a 21-bit signed offset");
3465 
3466   return false;
3467 }
3468 
3469 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3470   unsigned Opcode = Inst.getOpcode();
3471   const MCInstrDesc &Desc = MII.get(Opcode);
3472   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3473     return true;
3474 
3475   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3476   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3477 
3478   const int OpIndices[] = { Src0Idx, Src1Idx };
3479 
3480   unsigned NumExprs = 0;
3481   unsigned NumLiterals = 0;
3482   uint32_t LiteralValue;
3483 
3484   for (int OpIdx : OpIndices) {
3485     if (OpIdx == -1) break;
3486 
3487     const MCOperand &MO = Inst.getOperand(OpIdx);
3488     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3489     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3490       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3491         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3492         if (NumLiterals == 0 || LiteralValue != Value) {
3493           LiteralValue = Value;
3494           ++NumLiterals;
3495         }
3496       } else if (MO.isExpr()) {
3497         ++NumExprs;
3498       }
3499     }
3500   }
3501 
3502   return NumLiterals + NumExprs <= 1;
3503 }
3504 
3505 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3506   const unsigned Opc = Inst.getOpcode();
3507   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3508       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3509     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3510     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3511 
3512     if (OpSel & ~3)
3513       return false;
3514   }
3515   return true;
3516 }
3517 
3518 // Check if VCC register matches wavefront size
3519 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3520   auto FB = getFeatureBits();
3521   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3522     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3523 }
3524 
3525 // VOP3 literal is only allowed in GFX10+ and only one can be used
3526 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3527   unsigned Opcode = Inst.getOpcode();
3528   const MCInstrDesc &Desc = MII.get(Opcode);
3529   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3530     return true;
3531 
3532   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3533   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3534   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3535 
3536   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3537 
3538   unsigned NumExprs = 0;
3539   unsigned NumLiterals = 0;
3540   uint32_t LiteralValue;
3541 
3542   for (int OpIdx : OpIndices) {
3543     if (OpIdx == -1) break;
3544 
3545     const MCOperand &MO = Inst.getOperand(OpIdx);
3546     if (!MO.isImm() && !MO.isExpr())
3547       continue;
3548     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3549       continue;
3550 
3551     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3552         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3553       return false;
3554 
3555     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3556       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3557       if (NumLiterals == 0 || LiteralValue != Value) {
3558         LiteralValue = Value;
3559         ++NumLiterals;
3560       }
3561     } else if (MO.isExpr()) {
3562       ++NumExprs;
3563     }
3564   }
3565   NumLiterals += NumExprs;
3566 
3567   return !NumLiterals ||
3568          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3569 }
3570 
3571 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3572                                           const SMLoc &IDLoc,
3573                                           const OperandVector &Operands) {
3574   if (!validateLdsDirect(Inst)) {
3575     Error(IDLoc,
3576       "invalid use of lds_direct");
3577     return false;
3578   }
3579   if (!validateSOPLiteral(Inst)) {
3580     Error(IDLoc,
3581       "only one literal operand is allowed");
3582     return false;
3583   }
3584   if (!validateVOP3Literal(Inst)) {
3585     Error(IDLoc,
3586       "invalid literal operand");
3587     return false;
3588   }
3589   if (!validateConstantBusLimitations(Inst)) {
3590     Error(IDLoc,
3591       "invalid operand (violates constant bus restrictions)");
3592     return false;
3593   }
3594   if (!validateEarlyClobberLimitations(Inst)) {
3595     Error(IDLoc,
3596       "destination must be different than all sources");
3597     return false;
3598   }
3599   if (!validateIntClampSupported(Inst)) {
3600     Error(IDLoc,
3601       "integer clamping is not supported on this GPU");
3602     return false;
3603   }
3604   if (!validateOpSel(Inst)) {
3605     Error(IDLoc,
3606       "invalid op_sel operand");
3607     return false;
3608   }
3609   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3610   if (!validateMIMGD16(Inst)) {
3611     Error(IDLoc,
3612       "d16 modifier is not supported on this GPU");
3613     return false;
3614   }
3615   if (!validateMIMGDim(Inst)) {
3616     Error(IDLoc, "dim modifier is required on this GPU");
3617     return false;
3618   }
3619   if (!validateMIMGDataSize(Inst)) {
3620     Error(IDLoc,
3621       "image data size does not match dmask and tfe");
3622     return false;
3623   }
3624   if (!validateMIMGAddrSize(Inst)) {
3625     Error(IDLoc,
3626       "image address size does not match dim and a16");
3627     return false;
3628   }
3629   if (!validateMIMGAtomicDMask(Inst)) {
3630     Error(IDLoc,
3631       "invalid atomic image dmask");
3632     return false;
3633   }
3634   if (!validateMIMGGatherDMask(Inst)) {
3635     Error(IDLoc,
3636       "invalid image_gather dmask: only one bit must be set");
3637     return false;
3638   }
3639   if (!validateMovrels(Inst)) {
3640     Error(IDLoc, "source operand must be a VGPR");
3641     return false;
3642   }
3643   if (!validateFlatOffset(Inst, Operands)) {
3644     return false;
3645   }
3646   if (!validateSMEMOffset(Inst, Operands)) {
3647     return false;
3648   }
3649   if (!validateMAIAccWrite(Inst)) {
3650     return false;
3651   }
3652 
3653   return true;
3654 }
3655 
3656 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3657                                             const FeatureBitset &FBS,
3658                                             unsigned VariantID = 0);
3659 
3660 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3661                                               OperandVector &Operands,
3662                                               MCStreamer &Out,
3663                                               uint64_t &ErrorInfo,
3664                                               bool MatchingInlineAsm) {
3665   MCInst Inst;
3666   unsigned Result = Match_Success;
3667   for (auto Variant : getMatchedVariants()) {
3668     uint64_t EI;
3669     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3670                                   Variant);
3671     // We order match statuses from least to most specific. We use most specific
3672     // status as resulting
3673     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3674     if ((R == Match_Success) ||
3675         (R == Match_PreferE32) ||
3676         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3677         (R == Match_InvalidOperand && Result != Match_MissingFeature
3678                                    && Result != Match_PreferE32) ||
3679         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3680                                    && Result != Match_MissingFeature
3681                                    && Result != Match_PreferE32)) {
3682       Result = R;
3683       ErrorInfo = EI;
3684     }
3685     if (R == Match_Success)
3686       break;
3687   }
3688 
3689   switch (Result) {
3690   default: break;
3691   case Match_Success:
3692     if (!validateInstruction(Inst, IDLoc, Operands)) {
3693       return true;
3694     }
3695     Inst.setLoc(IDLoc);
3696     Out.emitInstruction(Inst, getSTI());
3697     return false;
3698 
3699   case Match_MissingFeature:
3700     return Error(IDLoc, "instruction not supported on this GPU");
3701 
3702   case Match_MnemonicFail: {
3703     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3704     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3705         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3706     return Error(IDLoc, "invalid instruction" + Suggestion,
3707                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3708   }
3709 
3710   case Match_InvalidOperand: {
3711     SMLoc ErrorLoc = IDLoc;
3712     if (ErrorInfo != ~0ULL) {
3713       if (ErrorInfo >= Operands.size()) {
3714         return Error(IDLoc, "too few operands for instruction");
3715       }
3716       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3717       if (ErrorLoc == SMLoc())
3718         ErrorLoc = IDLoc;
3719     }
3720     return Error(ErrorLoc, "invalid operand for instruction");
3721   }
3722 
3723   case Match_PreferE32:
3724     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3725                         "should be encoded as e32");
3726   }
3727   llvm_unreachable("Implement any new match types added!");
3728 }
3729 
3730 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3731   int64_t Tmp = -1;
3732   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3733     return true;
3734   }
3735   if (getParser().parseAbsoluteExpression(Tmp)) {
3736     return true;
3737   }
3738   Ret = static_cast<uint32_t>(Tmp);
3739   return false;
3740 }
3741 
3742 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3743                                                uint32_t &Minor) {
3744   if (ParseAsAbsoluteExpression(Major))
3745     return TokError("invalid major version");
3746 
3747   if (getLexer().isNot(AsmToken::Comma))
3748     return TokError("minor version number required, comma expected");
3749   Lex();
3750 
3751   if (ParseAsAbsoluteExpression(Minor))
3752     return TokError("invalid minor version");
3753 
3754   return false;
3755 }
3756 
3757 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3758   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3759     return TokError("directive only supported for amdgcn architecture");
3760 
3761   std::string Target;
3762 
3763   SMLoc TargetStart = getTok().getLoc();
3764   if (getParser().parseEscapedString(Target))
3765     return true;
3766   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3767 
3768   std::string ExpectedTarget;
3769   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3770   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3771 
3772   if (Target != ExpectedTargetOS.str())
3773     return getParser().Error(TargetRange.Start, "target must match options",
3774                              TargetRange);
3775 
3776   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3777   return false;
3778 }
3779 
3780 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3781   return getParser().Error(Range.Start, "value out of range", Range);
3782 }
3783 
3784 bool AMDGPUAsmParser::calculateGPRBlocks(
3785     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3786     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3787     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3788     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3789   // TODO(scott.linder): These calculations are duplicated from
3790   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3791   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3792 
3793   unsigned NumVGPRs = NextFreeVGPR;
3794   unsigned NumSGPRs = NextFreeSGPR;
3795 
3796   if (Version.Major >= 10)
3797     NumSGPRs = 0;
3798   else {
3799     unsigned MaxAddressableNumSGPRs =
3800         IsaInfo::getAddressableNumSGPRs(&getSTI());
3801 
3802     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3803         NumSGPRs > MaxAddressableNumSGPRs)
3804       return OutOfRangeError(SGPRRange);
3805 
3806     NumSGPRs +=
3807         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3808 
3809     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3810         NumSGPRs > MaxAddressableNumSGPRs)
3811       return OutOfRangeError(SGPRRange);
3812 
3813     if (Features.test(FeatureSGPRInitBug))
3814       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3815   }
3816 
3817   VGPRBlocks =
3818       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3819   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3820 
3821   return false;
3822 }
3823 
3824 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3825   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3826     return TokError("directive only supported for amdgcn architecture");
3827 
3828   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3829     return TokError("directive only supported for amdhsa OS");
3830 
3831   StringRef KernelName;
3832   if (getParser().parseIdentifier(KernelName))
3833     return true;
3834 
3835   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3836 
3837   StringSet<> Seen;
3838 
3839   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3840 
3841   SMRange VGPRRange;
3842   uint64_t NextFreeVGPR = 0;
3843   SMRange SGPRRange;
3844   uint64_t NextFreeSGPR = 0;
3845   unsigned UserSGPRCount = 0;
3846   bool ReserveVCC = true;
3847   bool ReserveFlatScr = true;
3848   bool ReserveXNACK = hasXNACK();
3849   Optional<bool> EnableWavefrontSize32;
3850 
3851   while (true) {
3852     while (getLexer().is(AsmToken::EndOfStatement))
3853       Lex();
3854 
3855     if (getLexer().isNot(AsmToken::Identifier))
3856       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3857 
3858     StringRef ID = getTok().getIdentifier();
3859     SMRange IDRange = getTok().getLocRange();
3860     Lex();
3861 
3862     if (ID == ".end_amdhsa_kernel")
3863       break;
3864 
3865     if (Seen.find(ID) != Seen.end())
3866       return TokError(".amdhsa_ directives cannot be repeated");
3867     Seen.insert(ID);
3868 
3869     SMLoc ValStart = getTok().getLoc();
3870     int64_t IVal;
3871     if (getParser().parseAbsoluteExpression(IVal))
3872       return true;
3873     SMLoc ValEnd = getTok().getLoc();
3874     SMRange ValRange = SMRange(ValStart, ValEnd);
3875 
3876     if (IVal < 0)
3877       return OutOfRangeError(ValRange);
3878 
3879     uint64_t Val = IVal;
3880 
3881 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3882   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3883     return OutOfRangeError(RANGE);                                             \
3884   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3885 
3886     if (ID == ".amdhsa_group_segment_fixed_size") {
3887       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3888         return OutOfRangeError(ValRange);
3889       KD.group_segment_fixed_size = Val;
3890     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3891       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3892         return OutOfRangeError(ValRange);
3893       KD.private_segment_fixed_size = Val;
3894     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3895       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3896                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3897                        Val, ValRange);
3898       if (Val)
3899         UserSGPRCount += 4;
3900     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3901       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3902                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3903                        ValRange);
3904       if (Val)
3905         UserSGPRCount += 2;
3906     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3907       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3908                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3909                        ValRange);
3910       if (Val)
3911         UserSGPRCount += 2;
3912     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3913       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3914                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3915                        Val, ValRange);
3916       if (Val)
3917         UserSGPRCount += 2;
3918     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3919       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3920                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3921                        ValRange);
3922       if (Val)
3923         UserSGPRCount += 2;
3924     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3925       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3926                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3927                        ValRange);
3928       if (Val)
3929         UserSGPRCount += 2;
3930     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3931       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3932                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3933                        Val, ValRange);
3934       if (Val)
3935         UserSGPRCount += 1;
3936     } else if (ID == ".amdhsa_wavefront_size32") {
3937       if (IVersion.Major < 10)
3938         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3939                                  IDRange);
3940       EnableWavefrontSize32 = Val;
3941       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3942                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3943                        Val, ValRange);
3944     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3945       PARSE_BITS_ENTRY(
3946           KD.compute_pgm_rsrc2,
3947           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3948           ValRange);
3949     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3950       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3951                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3952                        ValRange);
3953     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3954       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3955                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3956                        ValRange);
3957     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3958       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3959                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3960                        ValRange);
3961     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3962       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3963                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3964                        ValRange);
3965     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3966       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3967                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3968                        ValRange);
3969     } else if (ID == ".amdhsa_next_free_vgpr") {
3970       VGPRRange = ValRange;
3971       NextFreeVGPR = Val;
3972     } else if (ID == ".amdhsa_next_free_sgpr") {
3973       SGPRRange = ValRange;
3974       NextFreeSGPR = Val;
3975     } else if (ID == ".amdhsa_reserve_vcc") {
3976       if (!isUInt<1>(Val))
3977         return OutOfRangeError(ValRange);
3978       ReserveVCC = Val;
3979     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3980       if (IVersion.Major < 7)
3981         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3982                                  IDRange);
3983       if (!isUInt<1>(Val))
3984         return OutOfRangeError(ValRange);
3985       ReserveFlatScr = Val;
3986     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3987       if (IVersion.Major < 8)
3988         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3989                                  IDRange);
3990       if (!isUInt<1>(Val))
3991         return OutOfRangeError(ValRange);
3992       ReserveXNACK = Val;
3993     } else if (ID == ".amdhsa_float_round_mode_32") {
3994       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3995                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3996     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3997       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3998                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3999     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4000       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4001                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4002     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4003       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4004                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4005                        ValRange);
4006     } else if (ID == ".amdhsa_dx10_clamp") {
4007       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4008                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4009     } else if (ID == ".amdhsa_ieee_mode") {
4010       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4011                        Val, ValRange);
4012     } else if (ID == ".amdhsa_fp16_overflow") {
4013       if (IVersion.Major < 9)
4014         return getParser().Error(IDRange.Start, "directive requires gfx9+",
4015                                  IDRange);
4016       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4017                        ValRange);
4018     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4019       if (IVersion.Major < 10)
4020         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4021                                  IDRange);
4022       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4023                        ValRange);
4024     } else if (ID == ".amdhsa_memory_ordered") {
4025       if (IVersion.Major < 10)
4026         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4027                                  IDRange);
4028       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4029                        ValRange);
4030     } else if (ID == ".amdhsa_forward_progress") {
4031       if (IVersion.Major < 10)
4032         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4033                                  IDRange);
4034       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4035                        ValRange);
4036     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4037       PARSE_BITS_ENTRY(
4038           KD.compute_pgm_rsrc2,
4039           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4040           ValRange);
4041     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4042       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4043                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4044                        Val, ValRange);
4045     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4046       PARSE_BITS_ENTRY(
4047           KD.compute_pgm_rsrc2,
4048           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4049           ValRange);
4050     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4051       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4052                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4053                        Val, ValRange);
4054     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4055       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4056                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4057                        Val, ValRange);
4058     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4059       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4060                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4061                        Val, ValRange);
4062     } else if (ID == ".amdhsa_exception_int_div_zero") {
4063       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4064                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4065                        Val, ValRange);
4066     } else {
4067       return getParser().Error(IDRange.Start,
4068                                "unknown .amdhsa_kernel directive", IDRange);
4069     }
4070 
4071 #undef PARSE_BITS_ENTRY
4072   }
4073 
4074   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4075     return TokError(".amdhsa_next_free_vgpr directive is required");
4076 
4077   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4078     return TokError(".amdhsa_next_free_sgpr directive is required");
4079 
4080   unsigned VGPRBlocks;
4081   unsigned SGPRBlocks;
4082   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4083                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4084                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4085                          SGPRBlocks))
4086     return true;
4087 
4088   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4089           VGPRBlocks))
4090     return OutOfRangeError(VGPRRange);
4091   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4092                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4093 
4094   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4095           SGPRBlocks))
4096     return OutOfRangeError(SGPRRange);
4097   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4098                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4099                   SGPRBlocks);
4100 
4101   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4102     return TokError("too many user SGPRs enabled");
4103   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4104                   UserSGPRCount);
4105 
4106   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4107       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4108       ReserveFlatScr, ReserveXNACK);
4109   return false;
4110 }
4111 
4112 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4113   uint32_t Major;
4114   uint32_t Minor;
4115 
4116   if (ParseDirectiveMajorMinor(Major, Minor))
4117     return true;
4118 
4119   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4120   return false;
4121 }
4122 
4123 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4124   uint32_t Major;
4125   uint32_t Minor;
4126   uint32_t Stepping;
4127   StringRef VendorName;
4128   StringRef ArchName;
4129 
4130   // If this directive has no arguments, then use the ISA version for the
4131   // targeted GPU.
4132   if (getLexer().is(AsmToken::EndOfStatement)) {
4133     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4134     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4135                                                       ISA.Stepping,
4136                                                       "AMD", "AMDGPU");
4137     return false;
4138   }
4139 
4140   if (ParseDirectiveMajorMinor(Major, Minor))
4141     return true;
4142 
4143   if (getLexer().isNot(AsmToken::Comma))
4144     return TokError("stepping version number required, comma expected");
4145   Lex();
4146 
4147   if (ParseAsAbsoluteExpression(Stepping))
4148     return TokError("invalid stepping version");
4149 
4150   if (getLexer().isNot(AsmToken::Comma))
4151     return TokError("vendor name required, comma expected");
4152   Lex();
4153 
4154   if (getLexer().isNot(AsmToken::String))
4155     return TokError("invalid vendor name");
4156 
4157   VendorName = getLexer().getTok().getStringContents();
4158   Lex();
4159 
4160   if (getLexer().isNot(AsmToken::Comma))
4161     return TokError("arch name required, comma expected");
4162   Lex();
4163 
4164   if (getLexer().isNot(AsmToken::String))
4165     return TokError("invalid arch name");
4166 
4167   ArchName = getLexer().getTok().getStringContents();
4168   Lex();
4169 
4170   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4171                                                     VendorName, ArchName);
4172   return false;
4173 }
4174 
4175 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4176                                                amd_kernel_code_t &Header) {
4177   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4178   // assembly for backwards compatibility.
4179   if (ID == "max_scratch_backing_memory_byte_size") {
4180     Parser.eatToEndOfStatement();
4181     return false;
4182   }
4183 
4184   SmallString<40> ErrStr;
4185   raw_svector_ostream Err(ErrStr);
4186   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4187     return TokError(Err.str());
4188   }
4189   Lex();
4190 
4191   if (ID == "enable_wavefront_size32") {
4192     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4193       if (!isGFX10())
4194         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4195       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4196         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4197     } else {
4198       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4199         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4200     }
4201   }
4202 
4203   if (ID == "wavefront_size") {
4204     if (Header.wavefront_size == 5) {
4205       if (!isGFX10())
4206         return TokError("wavefront_size=5 is only allowed on GFX10+");
4207       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4208         return TokError("wavefront_size=5 requires +WavefrontSize32");
4209     } else if (Header.wavefront_size == 6) {
4210       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4211         return TokError("wavefront_size=6 requires +WavefrontSize64");
4212     }
4213   }
4214 
4215   if (ID == "enable_wgp_mode") {
4216     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4217       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4218   }
4219 
4220   if (ID == "enable_mem_ordered") {
4221     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4222       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4223   }
4224 
4225   if (ID == "enable_fwd_progress") {
4226     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4227       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4228   }
4229 
4230   return false;
4231 }
4232 
4233 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4234   amd_kernel_code_t Header;
4235   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4236 
4237   while (true) {
4238     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4239     // will set the current token to EndOfStatement.
4240     while(getLexer().is(AsmToken::EndOfStatement))
4241       Lex();
4242 
4243     if (getLexer().isNot(AsmToken::Identifier))
4244       return TokError("expected value identifier or .end_amd_kernel_code_t");
4245 
4246     StringRef ID = getLexer().getTok().getIdentifier();
4247     Lex();
4248 
4249     if (ID == ".end_amd_kernel_code_t")
4250       break;
4251 
4252     if (ParseAMDKernelCodeTValue(ID, Header))
4253       return true;
4254   }
4255 
4256   getTargetStreamer().EmitAMDKernelCodeT(Header);
4257 
4258   return false;
4259 }
4260 
4261 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4262   if (getLexer().isNot(AsmToken::Identifier))
4263     return TokError("expected symbol name");
4264 
4265   StringRef KernelName = Parser.getTok().getString();
4266 
4267   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4268                                            ELF::STT_AMDGPU_HSA_KERNEL);
4269   Lex();
4270   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4271     KernelScope.initialize(getContext());
4272   return false;
4273 }
4274 
4275 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4276   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4277     return Error(getParser().getTok().getLoc(),
4278                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4279                  "architectures");
4280   }
4281 
4282   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4283 
4284   std::string ISAVersionStringFromSTI;
4285   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4286   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4287 
4288   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4289     return Error(getParser().getTok().getLoc(),
4290                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4291                  "arguments specified through the command line");
4292   }
4293 
4294   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4295   Lex();
4296 
4297   return false;
4298 }
4299 
4300 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4301   const char *AssemblerDirectiveBegin;
4302   const char *AssemblerDirectiveEnd;
4303   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4304       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4305           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4306                             HSAMD::V3::AssemblerDirectiveEnd)
4307           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4308                             HSAMD::AssemblerDirectiveEnd);
4309 
4310   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4311     return Error(getParser().getTok().getLoc(),
4312                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4313                  "not available on non-amdhsa OSes")).str());
4314   }
4315 
4316   std::string HSAMetadataString;
4317   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4318                           HSAMetadataString))
4319     return true;
4320 
4321   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4322     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4323       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4324   } else {
4325     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4326       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4327   }
4328 
4329   return false;
4330 }
4331 
4332 /// Common code to parse out a block of text (typically YAML) between start and
4333 /// end directives.
4334 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4335                                           const char *AssemblerDirectiveEnd,
4336                                           std::string &CollectString) {
4337 
4338   raw_string_ostream CollectStream(CollectString);
4339 
4340   getLexer().setSkipSpace(false);
4341 
4342   bool FoundEnd = false;
4343   while (!getLexer().is(AsmToken::Eof)) {
4344     while (getLexer().is(AsmToken::Space)) {
4345       CollectStream << getLexer().getTok().getString();
4346       Lex();
4347     }
4348 
4349     if (getLexer().is(AsmToken::Identifier)) {
4350       StringRef ID = getLexer().getTok().getIdentifier();
4351       if (ID == AssemblerDirectiveEnd) {
4352         Lex();
4353         FoundEnd = true;
4354         break;
4355       }
4356     }
4357 
4358     CollectStream << Parser.parseStringToEndOfStatement()
4359                   << getContext().getAsmInfo()->getSeparatorString();
4360 
4361     Parser.eatToEndOfStatement();
4362   }
4363 
4364   getLexer().setSkipSpace(true);
4365 
4366   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4367     return TokError(Twine("expected directive ") +
4368                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4369   }
4370 
4371   CollectStream.flush();
4372   return false;
4373 }
4374 
4375 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4376 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4377   std::string String;
4378   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4379                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4380     return true;
4381 
4382   auto PALMetadata = getTargetStreamer().getPALMetadata();
4383   if (!PALMetadata->setFromString(String))
4384     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4385   return false;
4386 }
4387 
4388 /// Parse the assembler directive for old linear-format PAL metadata.
4389 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4390   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4391     return Error(getParser().getTok().getLoc(),
4392                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4393                  "not available on non-amdpal OSes")).str());
4394   }
4395 
4396   auto PALMetadata = getTargetStreamer().getPALMetadata();
4397   PALMetadata->setLegacy();
4398   for (;;) {
4399     uint32_t Key, Value;
4400     if (ParseAsAbsoluteExpression(Key)) {
4401       return TokError(Twine("invalid value in ") +
4402                       Twine(PALMD::AssemblerDirective));
4403     }
4404     if (getLexer().isNot(AsmToken::Comma)) {
4405       return TokError(Twine("expected an even number of values in ") +
4406                       Twine(PALMD::AssemblerDirective));
4407     }
4408     Lex();
4409     if (ParseAsAbsoluteExpression(Value)) {
4410       return TokError(Twine("invalid value in ") +
4411                       Twine(PALMD::AssemblerDirective));
4412     }
4413     PALMetadata->setRegister(Key, Value);
4414     if (getLexer().isNot(AsmToken::Comma))
4415       break;
4416     Lex();
4417   }
4418   return false;
4419 }
4420 
4421 /// ParseDirectiveAMDGPULDS
4422 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4423 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4424   if (getParser().checkForValidSection())
4425     return true;
4426 
4427   StringRef Name;
4428   SMLoc NameLoc = getLexer().getLoc();
4429   if (getParser().parseIdentifier(Name))
4430     return TokError("expected identifier in directive");
4431 
4432   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4433   if (parseToken(AsmToken::Comma, "expected ','"))
4434     return true;
4435 
4436   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4437 
4438   int64_t Size;
4439   SMLoc SizeLoc = getLexer().getLoc();
4440   if (getParser().parseAbsoluteExpression(Size))
4441     return true;
4442   if (Size < 0)
4443     return Error(SizeLoc, "size must be non-negative");
4444   if (Size > LocalMemorySize)
4445     return Error(SizeLoc, "size is too large");
4446 
4447   int64_t Align = 4;
4448   if (getLexer().is(AsmToken::Comma)) {
4449     Lex();
4450     SMLoc AlignLoc = getLexer().getLoc();
4451     if (getParser().parseAbsoluteExpression(Align))
4452       return true;
4453     if (Align < 0 || !isPowerOf2_64(Align))
4454       return Error(AlignLoc, "alignment must be a power of two");
4455 
4456     // Alignment larger than the size of LDS is possible in theory, as long
4457     // as the linker manages to place to symbol at address 0, but we do want
4458     // to make sure the alignment fits nicely into a 32-bit integer.
4459     if (Align >= 1u << 31)
4460       return Error(AlignLoc, "alignment is too large");
4461   }
4462 
4463   if (parseToken(AsmToken::EndOfStatement,
4464                  "unexpected token in '.amdgpu_lds' directive"))
4465     return true;
4466 
4467   Symbol->redefineIfPossible();
4468   if (!Symbol->isUndefined())
4469     return Error(NameLoc, "invalid symbol redefinition");
4470 
4471   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4472   return false;
4473 }
4474 
4475 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4476   StringRef IDVal = DirectiveID.getString();
4477 
4478   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4479     if (IDVal == ".amdgcn_target")
4480       return ParseDirectiveAMDGCNTarget();
4481 
4482     if (IDVal == ".amdhsa_kernel")
4483       return ParseDirectiveAMDHSAKernel();
4484 
4485     // TODO: Restructure/combine with PAL metadata directive.
4486     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4487       return ParseDirectiveHSAMetadata();
4488   } else {
4489     if (IDVal == ".hsa_code_object_version")
4490       return ParseDirectiveHSACodeObjectVersion();
4491 
4492     if (IDVal == ".hsa_code_object_isa")
4493       return ParseDirectiveHSACodeObjectISA();
4494 
4495     if (IDVal == ".amd_kernel_code_t")
4496       return ParseDirectiveAMDKernelCodeT();
4497 
4498     if (IDVal == ".amdgpu_hsa_kernel")
4499       return ParseDirectiveAMDGPUHsaKernel();
4500 
4501     if (IDVal == ".amd_amdgpu_isa")
4502       return ParseDirectiveISAVersion();
4503 
4504     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4505       return ParseDirectiveHSAMetadata();
4506   }
4507 
4508   if (IDVal == ".amdgpu_lds")
4509     return ParseDirectiveAMDGPULDS();
4510 
4511   if (IDVal == PALMD::AssemblerDirectiveBegin)
4512     return ParseDirectivePALMetadataBegin();
4513 
4514   if (IDVal == PALMD::AssemblerDirective)
4515     return ParseDirectivePALMetadata();
4516 
4517   return true;
4518 }
4519 
4520 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4521                                            unsigned RegNo) const {
4522 
4523   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4524        R.isValid(); ++R) {
4525     if (*R == RegNo)
4526       return isGFX9() || isGFX10();
4527   }
4528 
4529   // GFX10 has 2 more SGPRs 104 and 105.
4530   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4531        R.isValid(); ++R) {
4532     if (*R == RegNo)
4533       return hasSGPR104_SGPR105();
4534   }
4535 
4536   switch (RegNo) {
4537   case AMDGPU::SRC_SHARED_BASE:
4538   case AMDGPU::SRC_SHARED_LIMIT:
4539   case AMDGPU::SRC_PRIVATE_BASE:
4540   case AMDGPU::SRC_PRIVATE_LIMIT:
4541   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4542     return !isCI() && !isSI() && !isVI();
4543   case AMDGPU::TBA:
4544   case AMDGPU::TBA_LO:
4545   case AMDGPU::TBA_HI:
4546   case AMDGPU::TMA:
4547   case AMDGPU::TMA_LO:
4548   case AMDGPU::TMA_HI:
4549     return !isGFX9() && !isGFX10();
4550   case AMDGPU::XNACK_MASK:
4551   case AMDGPU::XNACK_MASK_LO:
4552   case AMDGPU::XNACK_MASK_HI:
4553     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4554   case AMDGPU::SGPR_NULL:
4555     return isGFX10();
4556   default:
4557     break;
4558   }
4559 
4560   if (isCI())
4561     return true;
4562 
4563   if (isSI() || isGFX10()) {
4564     // No flat_scr on SI.
4565     // On GFX10 flat scratch is not a valid register operand and can only be
4566     // accessed with s_setreg/s_getreg.
4567     switch (RegNo) {
4568     case AMDGPU::FLAT_SCR:
4569     case AMDGPU::FLAT_SCR_LO:
4570     case AMDGPU::FLAT_SCR_HI:
4571       return false;
4572     default:
4573       return true;
4574     }
4575   }
4576 
4577   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4578   // SI/CI have.
4579   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4580        R.isValid(); ++R) {
4581     if (*R == RegNo)
4582       return hasSGPR102_SGPR103();
4583   }
4584 
4585   return true;
4586 }
4587 
4588 OperandMatchResultTy
4589 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4590                               OperandMode Mode) {
4591   // Try to parse with a custom parser
4592   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4593 
4594   // If we successfully parsed the operand or if there as an error parsing,
4595   // we are done.
4596   //
4597   // If we are parsing after we reach EndOfStatement then this means we
4598   // are appending default values to the Operands list.  This is only done
4599   // by custom parser, so we shouldn't continue on to the generic parsing.
4600   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4601       getLexer().is(AsmToken::EndOfStatement))
4602     return ResTy;
4603 
4604   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4605     unsigned Prefix = Operands.size();
4606     SMLoc LBraceLoc = getTok().getLoc();
4607     Parser.Lex(); // eat the '['
4608 
4609     for (;;) {
4610       ResTy = parseReg(Operands);
4611       if (ResTy != MatchOperand_Success)
4612         return ResTy;
4613 
4614       if (getLexer().is(AsmToken::RBrac))
4615         break;
4616 
4617       if (getLexer().isNot(AsmToken::Comma))
4618         return MatchOperand_ParseFail;
4619       Parser.Lex();
4620     }
4621 
4622     if (Operands.size() - Prefix > 1) {
4623       Operands.insert(Operands.begin() + Prefix,
4624                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4625       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4626                                                     getTok().getLoc()));
4627     }
4628 
4629     Parser.Lex(); // eat the ']'
4630     return MatchOperand_Success;
4631   }
4632 
4633   return parseRegOrImm(Operands);
4634 }
4635 
4636 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4637   // Clear any forced encodings from the previous instruction.
4638   setForcedEncodingSize(0);
4639   setForcedDPP(false);
4640   setForcedSDWA(false);
4641 
4642   if (Name.endswith("_e64")) {
4643     setForcedEncodingSize(64);
4644     return Name.substr(0, Name.size() - 4);
4645   } else if (Name.endswith("_e32")) {
4646     setForcedEncodingSize(32);
4647     return Name.substr(0, Name.size() - 4);
4648   } else if (Name.endswith("_dpp")) {
4649     setForcedDPP(true);
4650     return Name.substr(0, Name.size() - 4);
4651   } else if (Name.endswith("_sdwa")) {
4652     setForcedSDWA(true);
4653     return Name.substr(0, Name.size() - 5);
4654   }
4655   return Name;
4656 }
4657 
4658 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4659                                        StringRef Name,
4660                                        SMLoc NameLoc, OperandVector &Operands) {
4661   // Add the instruction mnemonic
4662   Name = parseMnemonicSuffix(Name);
4663   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4664 
4665   bool IsMIMG = Name.startswith("image_");
4666 
4667   while (!getLexer().is(AsmToken::EndOfStatement)) {
4668     OperandMode Mode = OperandMode_Default;
4669     if (IsMIMG && isGFX10() && Operands.size() == 2)
4670       Mode = OperandMode_NSA;
4671     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4672 
4673     // Eat the comma or space if there is one.
4674     if (getLexer().is(AsmToken::Comma))
4675       Parser.Lex();
4676 
4677     switch (Res) {
4678       case MatchOperand_Success: break;
4679       case MatchOperand_ParseFail:
4680         // FIXME: use real operand location rather than the current location.
4681         Error(getLexer().getLoc(), "failed parsing operand.");
4682         while (!getLexer().is(AsmToken::EndOfStatement)) {
4683           Parser.Lex();
4684         }
4685         return true;
4686       case MatchOperand_NoMatch:
4687         // FIXME: use real operand location rather than the current location.
4688         Error(getLexer().getLoc(), "not a valid operand.");
4689         while (!getLexer().is(AsmToken::EndOfStatement)) {
4690           Parser.Lex();
4691         }
4692         return true;
4693     }
4694   }
4695 
4696   return false;
4697 }
4698 
4699 //===----------------------------------------------------------------------===//
4700 // Utility functions
4701 //===----------------------------------------------------------------------===//
4702 
4703 OperandMatchResultTy
4704 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4705 
4706   if (!trySkipId(Prefix, AsmToken::Colon))
4707     return MatchOperand_NoMatch;
4708 
4709   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4710 }
4711 
4712 OperandMatchResultTy
4713 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4714                                     AMDGPUOperand::ImmTy ImmTy,
4715                                     bool (*ConvertResult)(int64_t&)) {
4716   SMLoc S = getLoc();
4717   int64_t Value = 0;
4718 
4719   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4720   if (Res != MatchOperand_Success)
4721     return Res;
4722 
4723   if (ConvertResult && !ConvertResult(Value)) {
4724     Error(S, "invalid " + StringRef(Prefix) + " value.");
4725   }
4726 
4727   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4728   return MatchOperand_Success;
4729 }
4730 
4731 OperandMatchResultTy
4732 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4733                                              OperandVector &Operands,
4734                                              AMDGPUOperand::ImmTy ImmTy,
4735                                              bool (*ConvertResult)(int64_t&)) {
4736   SMLoc S = getLoc();
4737   if (!trySkipId(Prefix, AsmToken::Colon))
4738     return MatchOperand_NoMatch;
4739 
4740   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4741     return MatchOperand_ParseFail;
4742 
4743   unsigned Val = 0;
4744   const unsigned MaxSize = 4;
4745 
4746   // FIXME: How to verify the number of elements matches the number of src
4747   // operands?
4748   for (int I = 0; ; ++I) {
4749     int64_t Op;
4750     SMLoc Loc = getLoc();
4751     if (!parseExpr(Op))
4752       return MatchOperand_ParseFail;
4753 
4754     if (Op != 0 && Op != 1) {
4755       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4756       return MatchOperand_ParseFail;
4757     }
4758 
4759     Val |= (Op << I);
4760 
4761     if (trySkipToken(AsmToken::RBrac))
4762       break;
4763 
4764     if (I + 1 == MaxSize) {
4765       Error(getLoc(), "expected a closing square bracket");
4766       return MatchOperand_ParseFail;
4767     }
4768 
4769     if (!skipToken(AsmToken::Comma, "expected a comma"))
4770       return MatchOperand_ParseFail;
4771   }
4772 
4773   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4774   return MatchOperand_Success;
4775 }
4776 
4777 OperandMatchResultTy
4778 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4779                                AMDGPUOperand::ImmTy ImmTy) {
4780   int64_t Bit = 0;
4781   SMLoc S = Parser.getTok().getLoc();
4782 
4783   // We are at the end of the statement, and this is a default argument, so
4784   // use a default value.
4785   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4786     switch(getLexer().getKind()) {
4787       case AsmToken::Identifier: {
4788         StringRef Tok = Parser.getTok().getString();
4789         if (Tok == Name) {
4790           if (Tok == "r128" && !hasMIMG_R128())
4791             Error(S, "r128 modifier is not supported on this GPU");
4792           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
4793             Error(S, "a16 modifier is not supported on this GPU");
4794           Bit = 1;
4795           Parser.Lex();
4796         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4797           Bit = 0;
4798           Parser.Lex();
4799         } else {
4800           return MatchOperand_NoMatch;
4801         }
4802         break;
4803       }
4804       default:
4805         return MatchOperand_NoMatch;
4806     }
4807   }
4808 
4809   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4810     return MatchOperand_ParseFail;
4811 
4812   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
4813     ImmTy = AMDGPUOperand::ImmTyR128A16;
4814 
4815   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4816   return MatchOperand_Success;
4817 }
4818 
4819 static void addOptionalImmOperand(
4820   MCInst& Inst, const OperandVector& Operands,
4821   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4822   AMDGPUOperand::ImmTy ImmT,
4823   int64_t Default = 0) {
4824   auto i = OptionalIdx.find(ImmT);
4825   if (i != OptionalIdx.end()) {
4826     unsigned Idx = i->second;
4827     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4828   } else {
4829     Inst.addOperand(MCOperand::createImm(Default));
4830   }
4831 }
4832 
4833 OperandMatchResultTy
4834 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4835   if (getLexer().isNot(AsmToken::Identifier)) {
4836     return MatchOperand_NoMatch;
4837   }
4838   StringRef Tok = Parser.getTok().getString();
4839   if (Tok != Prefix) {
4840     return MatchOperand_NoMatch;
4841   }
4842 
4843   Parser.Lex();
4844   if (getLexer().isNot(AsmToken::Colon)) {
4845     return MatchOperand_ParseFail;
4846   }
4847 
4848   Parser.Lex();
4849   if (getLexer().isNot(AsmToken::Identifier)) {
4850     return MatchOperand_ParseFail;
4851   }
4852 
4853   Value = Parser.getTok().getString();
4854   return MatchOperand_Success;
4855 }
4856 
4857 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4858 // values to live in a joint format operand in the MCInst encoding.
4859 OperandMatchResultTy
4860 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4861   SMLoc S = Parser.getTok().getLoc();
4862   int64_t Dfmt = 0, Nfmt = 0;
4863   // dfmt and nfmt can appear in either order, and each is optional.
4864   bool GotDfmt = false, GotNfmt = false;
4865   while (!GotDfmt || !GotNfmt) {
4866     if (!GotDfmt) {
4867       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4868       if (Res != MatchOperand_NoMatch) {
4869         if (Res != MatchOperand_Success)
4870           return Res;
4871         if (Dfmt >= 16) {
4872           Error(Parser.getTok().getLoc(), "out of range dfmt");
4873           return MatchOperand_ParseFail;
4874         }
4875         GotDfmt = true;
4876         Parser.Lex();
4877         continue;
4878       }
4879     }
4880     if (!GotNfmt) {
4881       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4882       if (Res != MatchOperand_NoMatch) {
4883         if (Res != MatchOperand_Success)
4884           return Res;
4885         if (Nfmt >= 8) {
4886           Error(Parser.getTok().getLoc(), "out of range nfmt");
4887           return MatchOperand_ParseFail;
4888         }
4889         GotNfmt = true;
4890         Parser.Lex();
4891         continue;
4892       }
4893     }
4894     break;
4895   }
4896   if (!GotDfmt && !GotNfmt)
4897     return MatchOperand_NoMatch;
4898   auto Format = Dfmt | Nfmt << 4;
4899   Operands.push_back(
4900       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4901   return MatchOperand_Success;
4902 }
4903 
4904 //===----------------------------------------------------------------------===//
4905 // ds
4906 //===----------------------------------------------------------------------===//
4907 
4908 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4909                                     const OperandVector &Operands) {
4910   OptionalImmIndexMap OptionalIdx;
4911 
4912   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4913     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4914 
4915     // Add the register arguments
4916     if (Op.isReg()) {
4917       Op.addRegOperands(Inst, 1);
4918       continue;
4919     }
4920 
4921     // Handle optional arguments
4922     OptionalIdx[Op.getImmTy()] = i;
4923   }
4924 
4925   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4926   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4927   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4928 
4929   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4930 }
4931 
4932 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4933                                 bool IsGdsHardcoded) {
4934   OptionalImmIndexMap OptionalIdx;
4935 
4936   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4937     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4938 
4939     // Add the register arguments
4940     if (Op.isReg()) {
4941       Op.addRegOperands(Inst, 1);
4942       continue;
4943     }
4944 
4945     if (Op.isToken() && Op.getToken() == "gds") {
4946       IsGdsHardcoded = true;
4947       continue;
4948     }
4949 
4950     // Handle optional arguments
4951     OptionalIdx[Op.getImmTy()] = i;
4952   }
4953 
4954   AMDGPUOperand::ImmTy OffsetType =
4955     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4956      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4957      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4958                                                       AMDGPUOperand::ImmTyOffset;
4959 
4960   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4961 
4962   if (!IsGdsHardcoded) {
4963     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4964   }
4965   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4966 }
4967 
4968 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4969   OptionalImmIndexMap OptionalIdx;
4970 
4971   unsigned OperandIdx[4];
4972   unsigned EnMask = 0;
4973   int SrcIdx = 0;
4974 
4975   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4976     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4977 
4978     // Add the register arguments
4979     if (Op.isReg()) {
4980       assert(SrcIdx < 4);
4981       OperandIdx[SrcIdx] = Inst.size();
4982       Op.addRegOperands(Inst, 1);
4983       ++SrcIdx;
4984       continue;
4985     }
4986 
4987     if (Op.isOff()) {
4988       assert(SrcIdx < 4);
4989       OperandIdx[SrcIdx] = Inst.size();
4990       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4991       ++SrcIdx;
4992       continue;
4993     }
4994 
4995     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4996       Op.addImmOperands(Inst, 1);
4997       continue;
4998     }
4999 
5000     if (Op.isToken() && Op.getToken() == "done")
5001       continue;
5002 
5003     // Handle optional arguments
5004     OptionalIdx[Op.getImmTy()] = i;
5005   }
5006 
5007   assert(SrcIdx == 4);
5008 
5009   bool Compr = false;
5010   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5011     Compr = true;
5012     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5013     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5014     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5015   }
5016 
5017   for (auto i = 0; i < SrcIdx; ++i) {
5018     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5019       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5020     }
5021   }
5022 
5023   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5024   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5025 
5026   Inst.addOperand(MCOperand::createImm(EnMask));
5027 }
5028 
5029 //===----------------------------------------------------------------------===//
5030 // s_waitcnt
5031 //===----------------------------------------------------------------------===//
5032 
5033 static bool
5034 encodeCnt(
5035   const AMDGPU::IsaVersion ISA,
5036   int64_t &IntVal,
5037   int64_t CntVal,
5038   bool Saturate,
5039   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5040   unsigned (*decode)(const IsaVersion &Version, unsigned))
5041 {
5042   bool Failed = false;
5043 
5044   IntVal = encode(ISA, IntVal, CntVal);
5045   if (CntVal != decode(ISA, IntVal)) {
5046     if (Saturate) {
5047       IntVal = encode(ISA, IntVal, -1);
5048     } else {
5049       Failed = true;
5050     }
5051   }
5052   return Failed;
5053 }
5054 
5055 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5056 
5057   SMLoc CntLoc = getLoc();
5058   StringRef CntName = getTokenStr();
5059 
5060   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5061       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5062     return false;
5063 
5064   int64_t CntVal;
5065   SMLoc ValLoc = getLoc();
5066   if (!parseExpr(CntVal))
5067     return false;
5068 
5069   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5070 
5071   bool Failed = true;
5072   bool Sat = CntName.endswith("_sat");
5073 
5074   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5075     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5076   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5077     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5078   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5079     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5080   } else {
5081     Error(CntLoc, "invalid counter name " + CntName);
5082     return false;
5083   }
5084 
5085   if (Failed) {
5086     Error(ValLoc, "too large value for " + CntName);
5087     return false;
5088   }
5089 
5090   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5091     return false;
5092 
5093   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5094     if (isToken(AsmToken::EndOfStatement)) {
5095       Error(getLoc(), "expected a counter name");
5096       return false;
5097     }
5098   }
5099 
5100   return true;
5101 }
5102 
5103 OperandMatchResultTy
5104 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5105   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5106   int64_t Waitcnt = getWaitcntBitMask(ISA);
5107   SMLoc S = getLoc();
5108 
5109   // If parse failed, do not return error code
5110   // to avoid excessive error messages.
5111   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5112     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
5113   } else {
5114     parseExpr(Waitcnt);
5115   }
5116 
5117   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5118   return MatchOperand_Success;
5119 }
5120 
5121 bool
5122 AMDGPUOperand::isSWaitCnt() const {
5123   return isImm();
5124 }
5125 
5126 //===----------------------------------------------------------------------===//
5127 // hwreg
5128 //===----------------------------------------------------------------------===//
5129 
5130 bool
5131 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5132                                 int64_t &Offset,
5133                                 int64_t &Width) {
5134   using namespace llvm::AMDGPU::Hwreg;
5135 
5136   // The register may be specified by name or using a numeric code
5137   if (isToken(AsmToken::Identifier) &&
5138       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5139     HwReg.IsSymbolic = true;
5140     lex(); // skip message name
5141   } else if (!parseExpr(HwReg.Id)) {
5142     return false;
5143   }
5144 
5145   if (trySkipToken(AsmToken::RParen))
5146     return true;
5147 
5148   // parse optional params
5149   return
5150     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
5151     parseExpr(Offset) &&
5152     skipToken(AsmToken::Comma, "expected a comma") &&
5153     parseExpr(Width) &&
5154     skipToken(AsmToken::RParen, "expected a closing parenthesis");
5155 }
5156 
5157 bool
5158 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5159                                const int64_t Offset,
5160                                const int64_t Width,
5161                                const SMLoc Loc) {
5162 
5163   using namespace llvm::AMDGPU::Hwreg;
5164 
5165   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5166     Error(Loc, "specified hardware register is not supported on this GPU");
5167     return false;
5168   } else if (!isValidHwreg(HwReg.Id)) {
5169     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
5170     return false;
5171   } else if (!isValidHwregOffset(Offset)) {
5172     Error(Loc, "invalid bit offset: only 5-bit values are legal");
5173     return false;
5174   } else if (!isValidHwregWidth(Width)) {
5175     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
5176     return false;
5177   }
5178   return true;
5179 }
5180 
5181 OperandMatchResultTy
5182 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5183   using namespace llvm::AMDGPU::Hwreg;
5184 
5185   int64_t ImmVal = 0;
5186   SMLoc Loc = getLoc();
5187 
5188   // If parse failed, do not return error code
5189   // to avoid excessive error messages.
5190   if (trySkipId("hwreg", AsmToken::LParen)) {
5191     OperandInfoTy HwReg(ID_UNKNOWN_);
5192     int64_t Offset = OFFSET_DEFAULT_;
5193     int64_t Width = WIDTH_DEFAULT_;
5194     if (parseHwregBody(HwReg, Offset, Width) &&
5195         validateHwreg(HwReg, Offset, Width, Loc)) {
5196       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5197     }
5198   } else if (parseExpr(ImmVal)) {
5199     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5200       Error(Loc, "invalid immediate: only 16-bit values are legal");
5201   }
5202 
5203   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5204   return MatchOperand_Success;
5205 }
5206 
5207 bool AMDGPUOperand::isHwreg() const {
5208   return isImmTy(ImmTyHwreg);
5209 }
5210 
5211 //===----------------------------------------------------------------------===//
5212 // sendmsg
5213 //===----------------------------------------------------------------------===//
5214 
5215 bool
5216 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5217                                   OperandInfoTy &Op,
5218                                   OperandInfoTy &Stream) {
5219   using namespace llvm::AMDGPU::SendMsg;
5220 
5221   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5222     Msg.IsSymbolic = true;
5223     lex(); // skip message name
5224   } else if (!parseExpr(Msg.Id)) {
5225     return false;
5226   }
5227 
5228   if (trySkipToken(AsmToken::Comma)) {
5229     Op.IsDefined = true;
5230     if (isToken(AsmToken::Identifier) &&
5231         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5232       lex(); // skip operation name
5233     } else if (!parseExpr(Op.Id)) {
5234       return false;
5235     }
5236 
5237     if (trySkipToken(AsmToken::Comma)) {
5238       Stream.IsDefined = true;
5239       if (!parseExpr(Stream.Id))
5240         return false;
5241     }
5242   }
5243 
5244   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5245 }
5246 
5247 bool
5248 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5249                                  const OperandInfoTy &Op,
5250                                  const OperandInfoTy &Stream,
5251                                  const SMLoc S) {
5252   using namespace llvm::AMDGPU::SendMsg;
5253 
5254   // Validation strictness depends on whether message is specified
5255   // in a symbolc or in a numeric form. In the latter case
5256   // only encoding possibility is checked.
5257   bool Strict = Msg.IsSymbolic;
5258 
5259   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5260     Error(S, "invalid message id");
5261     return false;
5262   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5263     Error(S, Op.IsDefined ?
5264              "message does not support operations" :
5265              "missing message operation");
5266     return false;
5267   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5268     Error(S, "invalid operation id");
5269     return false;
5270   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5271     Error(S, "message operation does not support streams");
5272     return false;
5273   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5274     Error(S, "invalid message stream id");
5275     return false;
5276   }
5277   return true;
5278 }
5279 
5280 OperandMatchResultTy
5281 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5282   using namespace llvm::AMDGPU::SendMsg;
5283 
5284   int64_t ImmVal = 0;
5285   SMLoc Loc = getLoc();
5286 
5287   // If parse failed, do not return error code
5288   // to avoid excessive error messages.
5289   if (trySkipId("sendmsg", AsmToken::LParen)) {
5290     OperandInfoTy Msg(ID_UNKNOWN_);
5291     OperandInfoTy Op(OP_NONE_);
5292     OperandInfoTy Stream(STREAM_ID_NONE_);
5293     if (parseSendMsgBody(Msg, Op, Stream) &&
5294         validateSendMsg(Msg, Op, Stream, Loc)) {
5295       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5296     }
5297   } else if (parseExpr(ImmVal)) {
5298     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5299       Error(Loc, "invalid immediate: only 16-bit values are legal");
5300   }
5301 
5302   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5303   return MatchOperand_Success;
5304 }
5305 
5306 bool AMDGPUOperand::isSendMsg() const {
5307   return isImmTy(ImmTySendMsg);
5308 }
5309 
5310 //===----------------------------------------------------------------------===//
5311 // v_interp
5312 //===----------------------------------------------------------------------===//
5313 
5314 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5315   if (getLexer().getKind() != AsmToken::Identifier)
5316     return MatchOperand_NoMatch;
5317 
5318   StringRef Str = Parser.getTok().getString();
5319   int Slot = StringSwitch<int>(Str)
5320     .Case("p10", 0)
5321     .Case("p20", 1)
5322     .Case("p0", 2)
5323     .Default(-1);
5324 
5325   SMLoc S = Parser.getTok().getLoc();
5326   if (Slot == -1)
5327     return MatchOperand_ParseFail;
5328 
5329   Parser.Lex();
5330   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5331                                               AMDGPUOperand::ImmTyInterpSlot));
5332   return MatchOperand_Success;
5333 }
5334 
5335 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5336   if (getLexer().getKind() != AsmToken::Identifier)
5337     return MatchOperand_NoMatch;
5338 
5339   StringRef Str = Parser.getTok().getString();
5340   if (!Str.startswith("attr"))
5341     return MatchOperand_NoMatch;
5342 
5343   StringRef Chan = Str.take_back(2);
5344   int AttrChan = StringSwitch<int>(Chan)
5345     .Case(".x", 0)
5346     .Case(".y", 1)
5347     .Case(".z", 2)
5348     .Case(".w", 3)
5349     .Default(-1);
5350   if (AttrChan == -1)
5351     return MatchOperand_ParseFail;
5352 
5353   Str = Str.drop_back(2).drop_front(4);
5354 
5355   uint8_t Attr;
5356   if (Str.getAsInteger(10, Attr))
5357     return MatchOperand_ParseFail;
5358 
5359   SMLoc S = Parser.getTok().getLoc();
5360   Parser.Lex();
5361   if (Attr > 63) {
5362     Error(S, "out of bounds attr");
5363     return MatchOperand_Success;
5364   }
5365 
5366   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5367 
5368   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5369                                               AMDGPUOperand::ImmTyInterpAttr));
5370   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5371                                               AMDGPUOperand::ImmTyAttrChan));
5372   return MatchOperand_Success;
5373 }
5374 
5375 //===----------------------------------------------------------------------===//
5376 // exp
5377 //===----------------------------------------------------------------------===//
5378 
5379 void AMDGPUAsmParser::errorExpTgt() {
5380   Error(Parser.getTok().getLoc(), "invalid exp target");
5381 }
5382 
5383 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5384                                                       uint8_t &Val) {
5385   if (Str == "null") {
5386     Val = 9;
5387     return MatchOperand_Success;
5388   }
5389 
5390   if (Str.startswith("mrt")) {
5391     Str = Str.drop_front(3);
5392     if (Str == "z") { // == mrtz
5393       Val = 8;
5394       return MatchOperand_Success;
5395     }
5396 
5397     if (Str.getAsInteger(10, Val))
5398       return MatchOperand_ParseFail;
5399 
5400     if (Val > 7)
5401       errorExpTgt();
5402 
5403     return MatchOperand_Success;
5404   }
5405 
5406   if (Str.startswith("pos")) {
5407     Str = Str.drop_front(3);
5408     if (Str.getAsInteger(10, Val))
5409       return MatchOperand_ParseFail;
5410 
5411     if (Val > 4 || (Val == 4 && !isGFX10()))
5412       errorExpTgt();
5413 
5414     Val += 12;
5415     return MatchOperand_Success;
5416   }
5417 
5418   if (isGFX10() && Str == "prim") {
5419     Val = 20;
5420     return MatchOperand_Success;
5421   }
5422 
5423   if (Str.startswith("param")) {
5424     Str = Str.drop_front(5);
5425     if (Str.getAsInteger(10, Val))
5426       return MatchOperand_ParseFail;
5427 
5428     if (Val >= 32)
5429       errorExpTgt();
5430 
5431     Val += 32;
5432     return MatchOperand_Success;
5433   }
5434 
5435   if (Str.startswith("invalid_target_")) {
5436     Str = Str.drop_front(15);
5437     if (Str.getAsInteger(10, Val))
5438       return MatchOperand_ParseFail;
5439 
5440     errorExpTgt();
5441     return MatchOperand_Success;
5442   }
5443 
5444   return MatchOperand_NoMatch;
5445 }
5446 
5447 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5448   uint8_t Val;
5449   StringRef Str = Parser.getTok().getString();
5450 
5451   auto Res = parseExpTgtImpl(Str, Val);
5452   if (Res != MatchOperand_Success)
5453     return Res;
5454 
5455   SMLoc S = Parser.getTok().getLoc();
5456   Parser.Lex();
5457 
5458   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5459                                               AMDGPUOperand::ImmTyExpTgt));
5460   return MatchOperand_Success;
5461 }
5462 
5463 //===----------------------------------------------------------------------===//
5464 // parser helpers
5465 //===----------------------------------------------------------------------===//
5466 
5467 bool
5468 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5469   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5470 }
5471 
5472 bool
5473 AMDGPUAsmParser::isId(const StringRef Id) const {
5474   return isId(getToken(), Id);
5475 }
5476 
5477 bool
5478 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5479   return getTokenKind() == Kind;
5480 }
5481 
5482 bool
5483 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5484   if (isId(Id)) {
5485     lex();
5486     return true;
5487   }
5488   return false;
5489 }
5490 
5491 bool
5492 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5493   if (isId(Id) && peekToken().is(Kind)) {
5494     lex();
5495     lex();
5496     return true;
5497   }
5498   return false;
5499 }
5500 
5501 bool
5502 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5503   if (isToken(Kind)) {
5504     lex();
5505     return true;
5506   }
5507   return false;
5508 }
5509 
5510 bool
5511 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5512                            const StringRef ErrMsg) {
5513   if (!trySkipToken(Kind)) {
5514     Error(getLoc(), ErrMsg);
5515     return false;
5516   }
5517   return true;
5518 }
5519 
5520 bool
5521 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5522   return !getParser().parseAbsoluteExpression(Imm);
5523 }
5524 
5525 bool
5526 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5527   SMLoc S = getLoc();
5528 
5529   const MCExpr *Expr;
5530   if (Parser.parseExpression(Expr))
5531     return false;
5532 
5533   int64_t IntVal;
5534   if (Expr->evaluateAsAbsolute(IntVal)) {
5535     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5536   } else {
5537     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5538   }
5539   return true;
5540 }
5541 
5542 bool
5543 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5544   if (isToken(AsmToken::String)) {
5545     Val = getToken().getStringContents();
5546     lex();
5547     return true;
5548   } else {
5549     Error(getLoc(), ErrMsg);
5550     return false;
5551   }
5552 }
5553 
5554 AsmToken
5555 AMDGPUAsmParser::getToken() const {
5556   return Parser.getTok();
5557 }
5558 
5559 AsmToken
5560 AMDGPUAsmParser::peekToken() {
5561   return getLexer().peekTok();
5562 }
5563 
5564 void
5565 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5566   auto TokCount = getLexer().peekTokens(Tokens);
5567 
5568   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5569     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5570 }
5571 
5572 AsmToken::TokenKind
5573 AMDGPUAsmParser::getTokenKind() const {
5574   return getLexer().getKind();
5575 }
5576 
5577 SMLoc
5578 AMDGPUAsmParser::getLoc() const {
5579   return getToken().getLoc();
5580 }
5581 
5582 StringRef
5583 AMDGPUAsmParser::getTokenStr() const {
5584   return getToken().getString();
5585 }
5586 
5587 void
5588 AMDGPUAsmParser::lex() {
5589   Parser.Lex();
5590 }
5591 
5592 //===----------------------------------------------------------------------===//
5593 // swizzle
5594 //===----------------------------------------------------------------------===//
5595 
5596 LLVM_READNONE
5597 static unsigned
5598 encodeBitmaskPerm(const unsigned AndMask,
5599                   const unsigned OrMask,
5600                   const unsigned XorMask) {
5601   using namespace llvm::AMDGPU::Swizzle;
5602 
5603   return BITMASK_PERM_ENC |
5604          (AndMask << BITMASK_AND_SHIFT) |
5605          (OrMask  << BITMASK_OR_SHIFT)  |
5606          (XorMask << BITMASK_XOR_SHIFT);
5607 }
5608 
5609 bool
5610 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5611                                       const unsigned MinVal,
5612                                       const unsigned MaxVal,
5613                                       const StringRef ErrMsg) {
5614   for (unsigned i = 0; i < OpNum; ++i) {
5615     if (!skipToken(AsmToken::Comma, "expected a comma")){
5616       return false;
5617     }
5618     SMLoc ExprLoc = Parser.getTok().getLoc();
5619     if (!parseExpr(Op[i])) {
5620       return false;
5621     }
5622     if (Op[i] < MinVal || Op[i] > MaxVal) {
5623       Error(ExprLoc, ErrMsg);
5624       return false;
5625     }
5626   }
5627 
5628   return true;
5629 }
5630 
5631 bool
5632 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5633   using namespace llvm::AMDGPU::Swizzle;
5634 
5635   int64_t Lane[LANE_NUM];
5636   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5637                            "expected a 2-bit lane id")) {
5638     Imm = QUAD_PERM_ENC;
5639     for (unsigned I = 0; I < LANE_NUM; ++I) {
5640       Imm |= Lane[I] << (LANE_SHIFT * I);
5641     }
5642     return true;
5643   }
5644   return false;
5645 }
5646 
5647 bool
5648 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5649   using namespace llvm::AMDGPU::Swizzle;
5650 
5651   SMLoc S = Parser.getTok().getLoc();
5652   int64_t GroupSize;
5653   int64_t LaneIdx;
5654 
5655   if (!parseSwizzleOperands(1, &GroupSize,
5656                             2, 32,
5657                             "group size must be in the interval [2,32]")) {
5658     return false;
5659   }
5660   if (!isPowerOf2_64(GroupSize)) {
5661     Error(S, "group size must be a power of two");
5662     return false;
5663   }
5664   if (parseSwizzleOperands(1, &LaneIdx,
5665                            0, GroupSize - 1,
5666                            "lane id must be in the interval [0,group size - 1]")) {
5667     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5668     return true;
5669   }
5670   return false;
5671 }
5672 
5673 bool
5674 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5675   using namespace llvm::AMDGPU::Swizzle;
5676 
5677   SMLoc S = Parser.getTok().getLoc();
5678   int64_t GroupSize;
5679 
5680   if (!parseSwizzleOperands(1, &GroupSize,
5681       2, 32, "group size must be in the interval [2,32]")) {
5682     return false;
5683   }
5684   if (!isPowerOf2_64(GroupSize)) {
5685     Error(S, "group size must be a power of two");
5686     return false;
5687   }
5688 
5689   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5690   return true;
5691 }
5692 
5693 bool
5694 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5695   using namespace llvm::AMDGPU::Swizzle;
5696 
5697   SMLoc S = Parser.getTok().getLoc();
5698   int64_t GroupSize;
5699 
5700   if (!parseSwizzleOperands(1, &GroupSize,
5701       1, 16, "group size must be in the interval [1,16]")) {
5702     return false;
5703   }
5704   if (!isPowerOf2_64(GroupSize)) {
5705     Error(S, "group size must be a power of two");
5706     return false;
5707   }
5708 
5709   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5710   return true;
5711 }
5712 
5713 bool
5714 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5715   using namespace llvm::AMDGPU::Swizzle;
5716 
5717   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5718     return false;
5719   }
5720 
5721   StringRef Ctl;
5722   SMLoc StrLoc = Parser.getTok().getLoc();
5723   if (!parseString(Ctl)) {
5724     return false;
5725   }
5726   if (Ctl.size() != BITMASK_WIDTH) {
5727     Error(StrLoc, "expected a 5-character mask");
5728     return false;
5729   }
5730 
5731   unsigned AndMask = 0;
5732   unsigned OrMask = 0;
5733   unsigned XorMask = 0;
5734 
5735   for (size_t i = 0; i < Ctl.size(); ++i) {
5736     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5737     switch(Ctl[i]) {
5738     default:
5739       Error(StrLoc, "invalid mask");
5740       return false;
5741     case '0':
5742       break;
5743     case '1':
5744       OrMask |= Mask;
5745       break;
5746     case 'p':
5747       AndMask |= Mask;
5748       break;
5749     case 'i':
5750       AndMask |= Mask;
5751       XorMask |= Mask;
5752       break;
5753     }
5754   }
5755 
5756   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5757   return true;
5758 }
5759 
5760 bool
5761 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5762 
5763   SMLoc OffsetLoc = Parser.getTok().getLoc();
5764 
5765   if (!parseExpr(Imm)) {
5766     return false;
5767   }
5768   if (!isUInt<16>(Imm)) {
5769     Error(OffsetLoc, "expected a 16-bit offset");
5770     return false;
5771   }
5772   return true;
5773 }
5774 
5775 bool
5776 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5777   using namespace llvm::AMDGPU::Swizzle;
5778 
5779   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5780 
5781     SMLoc ModeLoc = Parser.getTok().getLoc();
5782     bool Ok = false;
5783 
5784     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5785       Ok = parseSwizzleQuadPerm(Imm);
5786     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5787       Ok = parseSwizzleBitmaskPerm(Imm);
5788     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5789       Ok = parseSwizzleBroadcast(Imm);
5790     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5791       Ok = parseSwizzleSwap(Imm);
5792     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5793       Ok = parseSwizzleReverse(Imm);
5794     } else {
5795       Error(ModeLoc, "expected a swizzle mode");
5796     }
5797 
5798     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5799   }
5800 
5801   return false;
5802 }
5803 
5804 OperandMatchResultTy
5805 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5806   SMLoc S = Parser.getTok().getLoc();
5807   int64_t Imm = 0;
5808 
5809   if (trySkipId("offset")) {
5810 
5811     bool Ok = false;
5812     if (skipToken(AsmToken::Colon, "expected a colon")) {
5813       if (trySkipId("swizzle")) {
5814         Ok = parseSwizzleMacro(Imm);
5815       } else {
5816         Ok = parseSwizzleOffset(Imm);
5817       }
5818     }
5819 
5820     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5821 
5822     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5823   } else {
5824     // Swizzle "offset" operand is optional.
5825     // If it is omitted, try parsing other optional operands.
5826     return parseOptionalOpr(Operands);
5827   }
5828 }
5829 
5830 bool
5831 AMDGPUOperand::isSwizzle() const {
5832   return isImmTy(ImmTySwizzle);
5833 }
5834 
5835 //===----------------------------------------------------------------------===//
5836 // VGPR Index Mode
5837 //===----------------------------------------------------------------------===//
5838 
5839 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5840 
5841   using namespace llvm::AMDGPU::VGPRIndexMode;
5842 
5843   if (trySkipToken(AsmToken::RParen)) {
5844     return OFF;
5845   }
5846 
5847   int64_t Imm = 0;
5848 
5849   while (true) {
5850     unsigned Mode = 0;
5851     SMLoc S = Parser.getTok().getLoc();
5852 
5853     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5854       if (trySkipId(IdSymbolic[ModeId])) {
5855         Mode = 1 << ModeId;
5856         break;
5857       }
5858     }
5859 
5860     if (Mode == 0) {
5861       Error(S, (Imm == 0)?
5862                "expected a VGPR index mode or a closing parenthesis" :
5863                "expected a VGPR index mode");
5864       break;
5865     }
5866 
5867     if (Imm & Mode) {
5868       Error(S, "duplicate VGPR index mode");
5869       break;
5870     }
5871     Imm |= Mode;
5872 
5873     if (trySkipToken(AsmToken::RParen))
5874       break;
5875     if (!skipToken(AsmToken::Comma,
5876                    "expected a comma or a closing parenthesis"))
5877       break;
5878   }
5879 
5880   return Imm;
5881 }
5882 
5883 OperandMatchResultTy
5884 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5885 
5886   int64_t Imm = 0;
5887   SMLoc S = Parser.getTok().getLoc();
5888 
5889   if (getLexer().getKind() == AsmToken::Identifier &&
5890       Parser.getTok().getString() == "gpr_idx" &&
5891       getLexer().peekTok().is(AsmToken::LParen)) {
5892 
5893     Parser.Lex();
5894     Parser.Lex();
5895 
5896     // If parse failed, trigger an error but do not return error code
5897     // to avoid excessive error messages.
5898     Imm = parseGPRIdxMacro();
5899 
5900   } else {
5901     if (getParser().parseAbsoluteExpression(Imm))
5902       return MatchOperand_NoMatch;
5903     if (Imm < 0 || !isUInt<4>(Imm)) {
5904       Error(S, "invalid immediate: only 4-bit values are legal");
5905     }
5906   }
5907 
5908   Operands.push_back(
5909       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5910   return MatchOperand_Success;
5911 }
5912 
5913 bool AMDGPUOperand::isGPRIdxMode() const {
5914   return isImmTy(ImmTyGprIdxMode);
5915 }
5916 
5917 //===----------------------------------------------------------------------===//
5918 // sopp branch targets
5919 //===----------------------------------------------------------------------===//
5920 
5921 OperandMatchResultTy
5922 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5923 
5924   // Make sure we are not parsing something
5925   // that looks like a label or an expression but is not.
5926   // This will improve error messages.
5927   if (isRegister() || isModifier())
5928     return MatchOperand_NoMatch;
5929 
5930   if (parseExpr(Operands)) {
5931 
5932     AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5933     assert(Opr.isImm() || Opr.isExpr());
5934     SMLoc Loc = Opr.getStartLoc();
5935 
5936     // Currently we do not support arbitrary expressions as branch targets.
5937     // Only labels and absolute expressions are accepted.
5938     if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5939       Error(Loc, "expected an absolute expression or a label");
5940     } else if (Opr.isImm() && !Opr.isS16Imm()) {
5941       Error(Loc, "expected a 16-bit signed jump offset");
5942     }
5943   }
5944 
5945   return MatchOperand_Success; // avoid excessive error messages
5946 }
5947 
5948 //===----------------------------------------------------------------------===//
5949 // Boolean holding registers
5950 //===----------------------------------------------------------------------===//
5951 
5952 OperandMatchResultTy
5953 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5954   return parseReg(Operands);
5955 }
5956 
5957 //===----------------------------------------------------------------------===//
5958 // mubuf
5959 //===----------------------------------------------------------------------===//
5960 
5961 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5962   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5963 }
5964 
5965 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5966   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5967 }
5968 
5969 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5970   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5971 }
5972 
5973 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5974                                const OperandVector &Operands,
5975                                bool IsAtomic,
5976                                bool IsAtomicReturn,
5977                                bool IsLds) {
5978   bool IsLdsOpcode = IsLds;
5979   bool HasLdsModifier = false;
5980   OptionalImmIndexMap OptionalIdx;
5981   assert(IsAtomicReturn ? IsAtomic : true);
5982   unsigned FirstOperandIdx = 1;
5983 
5984   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5985     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5986 
5987     // Add the register arguments
5988     if (Op.isReg()) {
5989       Op.addRegOperands(Inst, 1);
5990       // Insert a tied src for atomic return dst.
5991       // This cannot be postponed as subsequent calls to
5992       // addImmOperands rely on correct number of MC operands.
5993       if (IsAtomicReturn && i == FirstOperandIdx)
5994         Op.addRegOperands(Inst, 1);
5995       continue;
5996     }
5997 
5998     // Handle the case where soffset is an immediate
5999     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6000       Op.addImmOperands(Inst, 1);
6001       continue;
6002     }
6003 
6004     HasLdsModifier |= Op.isLDS();
6005 
6006     // Handle tokens like 'offen' which are sometimes hard-coded into the
6007     // asm string.  There are no MCInst operands for these.
6008     if (Op.isToken()) {
6009       continue;
6010     }
6011     assert(Op.isImm());
6012 
6013     // Handle optional arguments
6014     OptionalIdx[Op.getImmTy()] = i;
6015   }
6016 
6017   // This is a workaround for an llvm quirk which may result in an
6018   // incorrect instruction selection. Lds and non-lds versions of
6019   // MUBUF instructions are identical except that lds versions
6020   // have mandatory 'lds' modifier. However this modifier follows
6021   // optional modifiers and llvm asm matcher regards this 'lds'
6022   // modifier as an optional one. As a result, an lds version
6023   // of opcode may be selected even if it has no 'lds' modifier.
6024   if (IsLdsOpcode && !HasLdsModifier) {
6025     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6026     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6027       Inst.setOpcode(NoLdsOpcode);
6028       IsLdsOpcode = false;
6029     }
6030   }
6031 
6032   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6033   if (!IsAtomic) { // glc is hard-coded.
6034     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6035   }
6036   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6037 
6038   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6039     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6040   }
6041 
6042   if (isGFX10())
6043     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6044 }
6045 
6046 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6047   OptionalImmIndexMap OptionalIdx;
6048 
6049   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6050     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6051 
6052     // Add the register arguments
6053     if (Op.isReg()) {
6054       Op.addRegOperands(Inst, 1);
6055       continue;
6056     }
6057 
6058     // Handle the case where soffset is an immediate
6059     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6060       Op.addImmOperands(Inst, 1);
6061       continue;
6062     }
6063 
6064     // Handle tokens like 'offen' which are sometimes hard-coded into the
6065     // asm string.  There are no MCInst operands for these.
6066     if (Op.isToken()) {
6067       continue;
6068     }
6069     assert(Op.isImm());
6070 
6071     // Handle optional arguments
6072     OptionalIdx[Op.getImmTy()] = i;
6073   }
6074 
6075   addOptionalImmOperand(Inst, Operands, OptionalIdx,
6076                         AMDGPUOperand::ImmTyOffset);
6077   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6078   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6079   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6080   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6081 
6082   if (isGFX10())
6083     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6084 }
6085 
6086 //===----------------------------------------------------------------------===//
6087 // mimg
6088 //===----------------------------------------------------------------------===//
6089 
6090 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6091                               bool IsAtomic) {
6092   unsigned I = 1;
6093   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6094   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6095     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6096   }
6097 
6098   if (IsAtomic) {
6099     // Add src, same as dst
6100     assert(Desc.getNumDefs() == 1);
6101     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6102   }
6103 
6104   OptionalImmIndexMap OptionalIdx;
6105 
6106   for (unsigned E = Operands.size(); I != E; ++I) {
6107     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6108 
6109     // Add the register arguments
6110     if (Op.isReg()) {
6111       Op.addRegOperands(Inst, 1);
6112     } else if (Op.isImmModifier()) {
6113       OptionalIdx[Op.getImmTy()] = I;
6114     } else if (!Op.isToken()) {
6115       llvm_unreachable("unexpected operand type");
6116     }
6117   }
6118 
6119   bool IsGFX10 = isGFX10();
6120 
6121   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6122   if (IsGFX10)
6123     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6124   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6125   if (IsGFX10)
6126     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6127   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6128   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6129   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6130   if (IsGFX10)
6131     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6132   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6133   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6134   if (!IsGFX10)
6135     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6136   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6137 }
6138 
6139 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6140   cvtMIMG(Inst, Operands, true);
6141 }
6142 
6143 //===----------------------------------------------------------------------===//
6144 // smrd
6145 //===----------------------------------------------------------------------===//
6146 
6147 bool AMDGPUOperand::isSMRDOffset8() const {
6148   return isImm() && isUInt<8>(getImm());
6149 }
6150 
6151 bool AMDGPUOperand::isSMEMOffset() const {
6152   return isImm(); // Offset range is checked later by validator.
6153 }
6154 
6155 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6156   // 32-bit literals are only supported on CI and we only want to use them
6157   // when the offset is > 8-bits.
6158   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6159 }
6160 
6161 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6162   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6163 }
6164 
6165 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
6166   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6167 }
6168 
6169 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6170   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6171 }
6172 
6173 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6174   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6175 }
6176 
6177 //===----------------------------------------------------------------------===//
6178 // vop3
6179 //===----------------------------------------------------------------------===//
6180 
6181 static bool ConvertOmodMul(int64_t &Mul) {
6182   if (Mul != 1 && Mul != 2 && Mul != 4)
6183     return false;
6184 
6185   Mul >>= 1;
6186   return true;
6187 }
6188 
6189 static bool ConvertOmodDiv(int64_t &Div) {
6190   if (Div == 1) {
6191     Div = 0;
6192     return true;
6193   }
6194 
6195   if (Div == 2) {
6196     Div = 3;
6197     return true;
6198   }
6199 
6200   return false;
6201 }
6202 
6203 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6204   if (BoundCtrl == 0) {
6205     BoundCtrl = 1;
6206     return true;
6207   }
6208 
6209   if (BoundCtrl == -1) {
6210     BoundCtrl = 0;
6211     return true;
6212   }
6213 
6214   return false;
6215 }
6216 
6217 // Note: the order in this table matches the order of operands in AsmString.
6218 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6219   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6220   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6221   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6222   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6223   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6224   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6225   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6226   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6227   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6228   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6229   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
6230   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6231   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6232   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6233   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6234   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6235   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6236   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6237   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6238   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6239   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6240   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6241   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6242   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6243   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6244   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6245   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6246   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6247   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6248   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6249   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6250   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6251   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6252   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6253   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6254   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6255   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6256   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6257   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6258   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6259   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6260   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6261   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6262   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6263 };
6264 
6265 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6266 
6267   OperandMatchResultTy res = parseOptionalOpr(Operands);
6268 
6269   // This is a hack to enable hardcoded mandatory operands which follow
6270   // optional operands.
6271   //
6272   // Current design assumes that all operands after the first optional operand
6273   // are also optional. However implementation of some instructions violates
6274   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6275   //
6276   // To alleviate this problem, we have to (implicitly) parse extra operands
6277   // to make sure autogenerated parser of custom operands never hit hardcoded
6278   // mandatory operands.
6279 
6280   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6281     if (res != MatchOperand_Success ||
6282         isToken(AsmToken::EndOfStatement))
6283       break;
6284 
6285     trySkipToken(AsmToken::Comma);
6286     res = parseOptionalOpr(Operands);
6287   }
6288 
6289   return res;
6290 }
6291 
6292 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6293   OperandMatchResultTy res;
6294   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6295     // try to parse any optional operand here
6296     if (Op.IsBit) {
6297       res = parseNamedBit(Op.Name, Operands, Op.Type);
6298     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6299       res = parseOModOperand(Operands);
6300     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6301                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6302                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6303       res = parseSDWASel(Operands, Op.Name, Op.Type);
6304     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6305       res = parseSDWADstUnused(Operands);
6306     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6307                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6308                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6309                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6310       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6311                                         Op.ConvertResult);
6312     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6313       res = parseDim(Operands);
6314     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6315       res = parseDfmtNfmt(Operands);
6316     } else {
6317       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6318     }
6319     if (res != MatchOperand_NoMatch) {
6320       return res;
6321     }
6322   }
6323   return MatchOperand_NoMatch;
6324 }
6325 
6326 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6327   StringRef Name = Parser.getTok().getString();
6328   if (Name == "mul") {
6329     return parseIntWithPrefix("mul", Operands,
6330                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6331   }
6332 
6333   if (Name == "div") {
6334     return parseIntWithPrefix("div", Operands,
6335                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6336   }
6337 
6338   return MatchOperand_NoMatch;
6339 }
6340 
6341 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6342   cvtVOP3P(Inst, Operands);
6343 
6344   int Opc = Inst.getOpcode();
6345 
6346   int SrcNum;
6347   const int Ops[] = { AMDGPU::OpName::src0,
6348                       AMDGPU::OpName::src1,
6349                       AMDGPU::OpName::src2 };
6350   for (SrcNum = 0;
6351        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6352        ++SrcNum);
6353   assert(SrcNum > 0);
6354 
6355   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6356   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6357 
6358   if ((OpSel & (1 << SrcNum)) != 0) {
6359     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6360     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6361     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6362   }
6363 }
6364 
6365 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6366       // 1. This operand is input modifiers
6367   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6368       // 2. This is not last operand
6369       && Desc.NumOperands > (OpNum + 1)
6370       // 3. Next operand is register class
6371       && Desc.OpInfo[OpNum + 1].RegClass != -1
6372       // 4. Next register is not tied to any other operand
6373       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6374 }
6375 
6376 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6377 {
6378   OptionalImmIndexMap OptionalIdx;
6379   unsigned Opc = Inst.getOpcode();
6380 
6381   unsigned I = 1;
6382   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6383   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6384     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6385   }
6386 
6387   for (unsigned E = Operands.size(); I != E; ++I) {
6388     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6389     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6390       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6391     } else if (Op.isInterpSlot() ||
6392                Op.isInterpAttr() ||
6393                Op.isAttrChan()) {
6394       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6395     } else if (Op.isImmModifier()) {
6396       OptionalIdx[Op.getImmTy()] = I;
6397     } else {
6398       llvm_unreachable("unhandled operand type");
6399     }
6400   }
6401 
6402   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6403     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6404   }
6405 
6406   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6407     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6408   }
6409 
6410   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6411     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6412   }
6413 }
6414 
6415 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6416                               OptionalImmIndexMap &OptionalIdx) {
6417   unsigned Opc = Inst.getOpcode();
6418 
6419   unsigned I = 1;
6420   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6421   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6422     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6423   }
6424 
6425   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6426     // This instruction has src modifiers
6427     for (unsigned E = Operands.size(); I != E; ++I) {
6428       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6429       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6430         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6431       } else if (Op.isImmModifier()) {
6432         OptionalIdx[Op.getImmTy()] = I;
6433       } else if (Op.isRegOrImm()) {
6434         Op.addRegOrImmOperands(Inst, 1);
6435       } else {
6436         llvm_unreachable("unhandled operand type");
6437       }
6438     }
6439   } else {
6440     // No src modifiers
6441     for (unsigned E = Operands.size(); I != E; ++I) {
6442       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6443       if (Op.isMod()) {
6444         OptionalIdx[Op.getImmTy()] = I;
6445       } else {
6446         Op.addRegOrImmOperands(Inst, 1);
6447       }
6448     }
6449   }
6450 
6451   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6452     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6453   }
6454 
6455   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6456     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6457   }
6458 
6459   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6460   // it has src2 register operand that is tied to dst operand
6461   // we don't allow modifiers for this operand in assembler so src2_modifiers
6462   // should be 0.
6463   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6464       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6465       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6466       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6467       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6468       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6469       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6470     auto it = Inst.begin();
6471     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6472     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6473     ++it;
6474     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6475   }
6476 }
6477 
6478 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6479   OptionalImmIndexMap OptionalIdx;
6480   cvtVOP3(Inst, Operands, OptionalIdx);
6481 }
6482 
6483 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6484                                const OperandVector &Operands) {
6485   OptionalImmIndexMap OptIdx;
6486   const int Opc = Inst.getOpcode();
6487   const MCInstrDesc &Desc = MII.get(Opc);
6488 
6489   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6490 
6491   cvtVOP3(Inst, Operands, OptIdx);
6492 
6493   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6494     assert(!IsPacked);
6495     Inst.addOperand(Inst.getOperand(0));
6496   }
6497 
6498   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6499   // instruction, and then figure out where to actually put the modifiers
6500 
6501   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6502 
6503   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6504   if (OpSelHiIdx != -1) {
6505     int DefaultVal = IsPacked ? -1 : 0;
6506     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6507                           DefaultVal);
6508   }
6509 
6510   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6511   if (NegLoIdx != -1) {
6512     assert(IsPacked);
6513     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6514     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6515   }
6516 
6517   const int Ops[] = { AMDGPU::OpName::src0,
6518                       AMDGPU::OpName::src1,
6519                       AMDGPU::OpName::src2 };
6520   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6521                          AMDGPU::OpName::src1_modifiers,
6522                          AMDGPU::OpName::src2_modifiers };
6523 
6524   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6525 
6526   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6527   unsigned OpSelHi = 0;
6528   unsigned NegLo = 0;
6529   unsigned NegHi = 0;
6530 
6531   if (OpSelHiIdx != -1) {
6532     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6533   }
6534 
6535   if (NegLoIdx != -1) {
6536     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6537     NegLo = Inst.getOperand(NegLoIdx).getImm();
6538     NegHi = Inst.getOperand(NegHiIdx).getImm();
6539   }
6540 
6541   for (int J = 0; J < 3; ++J) {
6542     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6543     if (OpIdx == -1)
6544       break;
6545 
6546     uint32_t ModVal = 0;
6547 
6548     if ((OpSel & (1 << J)) != 0)
6549       ModVal |= SISrcMods::OP_SEL_0;
6550 
6551     if ((OpSelHi & (1 << J)) != 0)
6552       ModVal |= SISrcMods::OP_SEL_1;
6553 
6554     if ((NegLo & (1 << J)) != 0)
6555       ModVal |= SISrcMods::NEG;
6556 
6557     if ((NegHi & (1 << J)) != 0)
6558       ModVal |= SISrcMods::NEG_HI;
6559 
6560     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6561 
6562     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6563   }
6564 }
6565 
6566 //===----------------------------------------------------------------------===//
6567 // dpp
6568 //===----------------------------------------------------------------------===//
6569 
6570 bool AMDGPUOperand::isDPP8() const {
6571   return isImmTy(ImmTyDPP8);
6572 }
6573 
6574 bool AMDGPUOperand::isDPPCtrl() const {
6575   using namespace AMDGPU::DPP;
6576 
6577   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6578   if (result) {
6579     int64_t Imm = getImm();
6580     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6581            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6582            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6583            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6584            (Imm == DppCtrl::WAVE_SHL1) ||
6585            (Imm == DppCtrl::WAVE_ROL1) ||
6586            (Imm == DppCtrl::WAVE_SHR1) ||
6587            (Imm == DppCtrl::WAVE_ROR1) ||
6588            (Imm == DppCtrl::ROW_MIRROR) ||
6589            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6590            (Imm == DppCtrl::BCAST15) ||
6591            (Imm == DppCtrl::BCAST31) ||
6592            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6593            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6594   }
6595   return false;
6596 }
6597 
6598 //===----------------------------------------------------------------------===//
6599 // mAI
6600 //===----------------------------------------------------------------------===//
6601 
6602 bool AMDGPUOperand::isBLGP() const {
6603   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6604 }
6605 
6606 bool AMDGPUOperand::isCBSZ() const {
6607   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6608 }
6609 
6610 bool AMDGPUOperand::isABID() const {
6611   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6612 }
6613 
6614 bool AMDGPUOperand::isS16Imm() const {
6615   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6616 }
6617 
6618 bool AMDGPUOperand::isU16Imm() const {
6619   return isImm() && isUInt<16>(getImm());
6620 }
6621 
6622 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6623   if (!isGFX10())
6624     return MatchOperand_NoMatch;
6625 
6626   SMLoc S = Parser.getTok().getLoc();
6627 
6628   if (getLexer().isNot(AsmToken::Identifier))
6629     return MatchOperand_NoMatch;
6630   if (getLexer().getTok().getString() != "dim")
6631     return MatchOperand_NoMatch;
6632 
6633   Parser.Lex();
6634   if (getLexer().isNot(AsmToken::Colon))
6635     return MatchOperand_ParseFail;
6636 
6637   Parser.Lex();
6638 
6639   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6640   // integer.
6641   std::string Token;
6642   if (getLexer().is(AsmToken::Integer)) {
6643     SMLoc Loc = getLexer().getTok().getEndLoc();
6644     Token = std::string(getLexer().getTok().getString());
6645     Parser.Lex();
6646     if (getLexer().getTok().getLoc() != Loc)
6647       return MatchOperand_ParseFail;
6648   }
6649   if (getLexer().isNot(AsmToken::Identifier))
6650     return MatchOperand_ParseFail;
6651   Token += getLexer().getTok().getString();
6652 
6653   StringRef DimId = Token;
6654   if (DimId.startswith("SQ_RSRC_IMG_"))
6655     DimId = DimId.substr(12);
6656 
6657   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6658   if (!DimInfo)
6659     return MatchOperand_ParseFail;
6660 
6661   Parser.Lex();
6662 
6663   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6664                                               AMDGPUOperand::ImmTyDim));
6665   return MatchOperand_Success;
6666 }
6667 
6668 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6669   SMLoc S = Parser.getTok().getLoc();
6670   StringRef Prefix;
6671 
6672   if (getLexer().getKind() == AsmToken::Identifier) {
6673     Prefix = Parser.getTok().getString();
6674   } else {
6675     return MatchOperand_NoMatch;
6676   }
6677 
6678   if (Prefix != "dpp8")
6679     return parseDPPCtrl(Operands);
6680   if (!isGFX10())
6681     return MatchOperand_NoMatch;
6682 
6683   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6684 
6685   int64_t Sels[8];
6686 
6687   Parser.Lex();
6688   if (getLexer().isNot(AsmToken::Colon))
6689     return MatchOperand_ParseFail;
6690 
6691   Parser.Lex();
6692   if (getLexer().isNot(AsmToken::LBrac))
6693     return MatchOperand_ParseFail;
6694 
6695   Parser.Lex();
6696   if (getParser().parseAbsoluteExpression(Sels[0]))
6697     return MatchOperand_ParseFail;
6698   if (0 > Sels[0] || 7 < Sels[0])
6699     return MatchOperand_ParseFail;
6700 
6701   for (size_t i = 1; i < 8; ++i) {
6702     if (getLexer().isNot(AsmToken::Comma))
6703       return MatchOperand_ParseFail;
6704 
6705     Parser.Lex();
6706     if (getParser().parseAbsoluteExpression(Sels[i]))
6707       return MatchOperand_ParseFail;
6708     if (0 > Sels[i] || 7 < Sels[i])
6709       return MatchOperand_ParseFail;
6710   }
6711 
6712   if (getLexer().isNot(AsmToken::RBrac))
6713     return MatchOperand_ParseFail;
6714   Parser.Lex();
6715 
6716   unsigned DPP8 = 0;
6717   for (size_t i = 0; i < 8; ++i)
6718     DPP8 |= (Sels[i] << (i * 3));
6719 
6720   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6721   return MatchOperand_Success;
6722 }
6723 
6724 OperandMatchResultTy
6725 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6726   using namespace AMDGPU::DPP;
6727 
6728   SMLoc S = Parser.getTok().getLoc();
6729   StringRef Prefix;
6730   int64_t Int;
6731 
6732   if (getLexer().getKind() == AsmToken::Identifier) {
6733     Prefix = Parser.getTok().getString();
6734   } else {
6735     return MatchOperand_NoMatch;
6736   }
6737 
6738   if (Prefix == "row_mirror") {
6739     Int = DppCtrl::ROW_MIRROR;
6740     Parser.Lex();
6741   } else if (Prefix == "row_half_mirror") {
6742     Int = DppCtrl::ROW_HALF_MIRROR;
6743     Parser.Lex();
6744   } else {
6745     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6746     if (Prefix != "quad_perm"
6747         && Prefix != "row_shl"
6748         && Prefix != "row_shr"
6749         && Prefix != "row_ror"
6750         && Prefix != "wave_shl"
6751         && Prefix != "wave_rol"
6752         && Prefix != "wave_shr"
6753         && Prefix != "wave_ror"
6754         && Prefix != "row_bcast"
6755         && Prefix != "row_share"
6756         && Prefix != "row_xmask") {
6757       return MatchOperand_NoMatch;
6758     }
6759 
6760     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6761       return MatchOperand_NoMatch;
6762 
6763     if (!isVI() && !isGFX9() &&
6764         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6765          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6766          Prefix == "row_bcast"))
6767       return MatchOperand_NoMatch;
6768 
6769     Parser.Lex();
6770     if (getLexer().isNot(AsmToken::Colon))
6771       return MatchOperand_ParseFail;
6772 
6773     if (Prefix == "quad_perm") {
6774       // quad_perm:[%d,%d,%d,%d]
6775       Parser.Lex();
6776       if (getLexer().isNot(AsmToken::LBrac))
6777         return MatchOperand_ParseFail;
6778       Parser.Lex();
6779 
6780       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6781         return MatchOperand_ParseFail;
6782 
6783       for (int i = 0; i < 3; ++i) {
6784         if (getLexer().isNot(AsmToken::Comma))
6785           return MatchOperand_ParseFail;
6786         Parser.Lex();
6787 
6788         int64_t Temp;
6789         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6790           return MatchOperand_ParseFail;
6791         const int shift = i*2 + 2;
6792         Int += (Temp << shift);
6793       }
6794 
6795       if (getLexer().isNot(AsmToken::RBrac))
6796         return MatchOperand_ParseFail;
6797       Parser.Lex();
6798     } else {
6799       // sel:%d
6800       Parser.Lex();
6801       if (getParser().parseAbsoluteExpression(Int))
6802         return MatchOperand_ParseFail;
6803 
6804       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6805         Int |= DppCtrl::ROW_SHL0;
6806       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6807         Int |= DppCtrl::ROW_SHR0;
6808       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6809         Int |= DppCtrl::ROW_ROR0;
6810       } else if (Prefix == "wave_shl" && 1 == Int) {
6811         Int = DppCtrl::WAVE_SHL1;
6812       } else if (Prefix == "wave_rol" && 1 == Int) {
6813         Int = DppCtrl::WAVE_ROL1;
6814       } else if (Prefix == "wave_shr" && 1 == Int) {
6815         Int = DppCtrl::WAVE_SHR1;
6816       } else if (Prefix == "wave_ror" && 1 == Int) {
6817         Int = DppCtrl::WAVE_ROR1;
6818       } else if (Prefix == "row_bcast") {
6819         if (Int == 15) {
6820           Int = DppCtrl::BCAST15;
6821         } else if (Int == 31) {
6822           Int = DppCtrl::BCAST31;
6823         } else {
6824           return MatchOperand_ParseFail;
6825         }
6826       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6827         Int |= DppCtrl::ROW_SHARE_FIRST;
6828       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6829         Int |= DppCtrl::ROW_XMASK_FIRST;
6830       } else {
6831         return MatchOperand_ParseFail;
6832       }
6833     }
6834   }
6835 
6836   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6837   return MatchOperand_Success;
6838 }
6839 
6840 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6841   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6842 }
6843 
6844 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6845   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6846 }
6847 
6848 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6849   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6850 }
6851 
6852 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6853   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6854 }
6855 
6856 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6857   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6858 }
6859 
6860 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6861   OptionalImmIndexMap OptionalIdx;
6862 
6863   unsigned I = 1;
6864   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6865   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6866     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6867   }
6868 
6869   int Fi = 0;
6870   for (unsigned E = Operands.size(); I != E; ++I) {
6871     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6872                                             MCOI::TIED_TO);
6873     if (TiedTo != -1) {
6874       assert((unsigned)TiedTo < Inst.getNumOperands());
6875       // handle tied old or src2 for MAC instructions
6876       Inst.addOperand(Inst.getOperand(TiedTo));
6877     }
6878     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6879     // Add the register arguments
6880     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6881       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6882       // Skip it.
6883       continue;
6884     }
6885 
6886     if (IsDPP8) {
6887       if (Op.isDPP8()) {
6888         Op.addImmOperands(Inst, 1);
6889       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6890         Op.addRegWithFPInputModsOperands(Inst, 2);
6891       } else if (Op.isFI()) {
6892         Fi = Op.getImm();
6893       } else if (Op.isReg()) {
6894         Op.addRegOperands(Inst, 1);
6895       } else {
6896         llvm_unreachable("Invalid operand type");
6897       }
6898     } else {
6899       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6900         Op.addRegWithFPInputModsOperands(Inst, 2);
6901       } else if (Op.isDPPCtrl()) {
6902         Op.addImmOperands(Inst, 1);
6903       } else if (Op.isImm()) {
6904         // Handle optional arguments
6905         OptionalIdx[Op.getImmTy()] = I;
6906       } else {
6907         llvm_unreachable("Invalid operand type");
6908       }
6909     }
6910   }
6911 
6912   if (IsDPP8) {
6913     using namespace llvm::AMDGPU::DPP;
6914     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6915   } else {
6916     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6917     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6918     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6919     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6920       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6921     }
6922   }
6923 }
6924 
6925 //===----------------------------------------------------------------------===//
6926 // sdwa
6927 //===----------------------------------------------------------------------===//
6928 
6929 OperandMatchResultTy
6930 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6931                               AMDGPUOperand::ImmTy Type) {
6932   using namespace llvm::AMDGPU::SDWA;
6933 
6934   SMLoc S = Parser.getTok().getLoc();
6935   StringRef Value;
6936   OperandMatchResultTy res;
6937 
6938   res = parseStringWithPrefix(Prefix, Value);
6939   if (res != MatchOperand_Success) {
6940     return res;
6941   }
6942 
6943   int64_t Int;
6944   Int = StringSwitch<int64_t>(Value)
6945         .Case("BYTE_0", SdwaSel::BYTE_0)
6946         .Case("BYTE_1", SdwaSel::BYTE_1)
6947         .Case("BYTE_2", SdwaSel::BYTE_2)
6948         .Case("BYTE_3", SdwaSel::BYTE_3)
6949         .Case("WORD_0", SdwaSel::WORD_0)
6950         .Case("WORD_1", SdwaSel::WORD_1)
6951         .Case("DWORD", SdwaSel::DWORD)
6952         .Default(0xffffffff);
6953   Parser.Lex(); // eat last token
6954 
6955   if (Int == 0xffffffff) {
6956     return MatchOperand_ParseFail;
6957   }
6958 
6959   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6960   return MatchOperand_Success;
6961 }
6962 
6963 OperandMatchResultTy
6964 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6965   using namespace llvm::AMDGPU::SDWA;
6966 
6967   SMLoc S = Parser.getTok().getLoc();
6968   StringRef Value;
6969   OperandMatchResultTy res;
6970 
6971   res = parseStringWithPrefix("dst_unused", Value);
6972   if (res != MatchOperand_Success) {
6973     return res;
6974   }
6975 
6976   int64_t Int;
6977   Int = StringSwitch<int64_t>(Value)
6978         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6979         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6980         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6981         .Default(0xffffffff);
6982   Parser.Lex(); // eat last token
6983 
6984   if (Int == 0xffffffff) {
6985     return MatchOperand_ParseFail;
6986   }
6987 
6988   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6989   return MatchOperand_Success;
6990 }
6991 
6992 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6993   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6994 }
6995 
6996 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6997   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6998 }
6999 
7000 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
7001   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
7002 }
7003 
7004 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
7005   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
7006 }
7007 
7008 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
7009   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
7010 }
7011 
7012 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
7013                               uint64_t BasicInstType,
7014                               bool SkipDstVcc,
7015                               bool SkipSrcVcc) {
7016   using namespace llvm::AMDGPU::SDWA;
7017 
7018   OptionalImmIndexMap OptionalIdx;
7019   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
7020   bool SkippedVcc = false;
7021 
7022   unsigned I = 1;
7023   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7024   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7025     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7026   }
7027 
7028   for (unsigned E = Operands.size(); I != E; ++I) {
7029     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7030     if (SkipVcc && !SkippedVcc && Op.isReg() &&
7031         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7032       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7033       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7034       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7035       // Skip VCC only if we didn't skip it on previous iteration.
7036       // Note that src0 and src1 occupy 2 slots each because of modifiers.
7037       if (BasicInstType == SIInstrFlags::VOP2 &&
7038           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7039            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7040         SkippedVcc = true;
7041         continue;
7042       } else if (BasicInstType == SIInstrFlags::VOPC &&
7043                  Inst.getNumOperands() == 0) {
7044         SkippedVcc = true;
7045         continue;
7046       }
7047     }
7048     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7049       Op.addRegOrImmWithInputModsOperands(Inst, 2);
7050     } else if (Op.isImm()) {
7051       // Handle optional arguments
7052       OptionalIdx[Op.getImmTy()] = I;
7053     } else {
7054       llvm_unreachable("Invalid operand type");
7055     }
7056     SkippedVcc = false;
7057   }
7058 
7059   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7060       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7061       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7062     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7063     switch (BasicInstType) {
7064     case SIInstrFlags::VOP1:
7065       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7066       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7067         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7068       }
7069       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7070       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7071       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7072       break;
7073 
7074     case SIInstrFlags::VOP2:
7075       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7076       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7077         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7078       }
7079       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7080       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7081       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7082       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7083       break;
7084 
7085     case SIInstrFlags::VOPC:
7086       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7087         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7088       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7089       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7090       break;
7091 
7092     default:
7093       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7094     }
7095   }
7096 
7097   // special case v_mac_{f16, f32}:
7098   // it has src2 register operand that is tied to dst operand
7099   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7100       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7101     auto it = Inst.begin();
7102     std::advance(
7103       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7104     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7105   }
7106 }
7107 
7108 //===----------------------------------------------------------------------===//
7109 // mAI
7110 //===----------------------------------------------------------------------===//
7111 
7112 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7113   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7114 }
7115 
7116 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7117   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7118 }
7119 
7120 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7121   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7122 }
7123 
7124 /// Force static initialization.
7125 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7126   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7127   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7128 }
7129 
7130 #define GET_REGISTER_MATCHER
7131 #define GET_MATCHER_IMPLEMENTATION
7132 #define GET_MNEMONIC_SPELL_CHECKER
7133 #include "AMDGPUGenAsmMatcher.inc"
7134 
7135 // This fuction should be defined after auto-generated include so that we have
7136 // MatchClassKind enum defined
7137 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7138                                                      unsigned Kind) {
7139   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7140   // But MatchInstructionImpl() expects to meet token and fails to validate
7141   // operand. This method checks if we are given immediate operand but expect to
7142   // get corresponding token.
7143   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7144   switch (Kind) {
7145   case MCK_addr64:
7146     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7147   case MCK_gds:
7148     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7149   case MCK_lds:
7150     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7151   case MCK_glc:
7152     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7153   case MCK_idxen:
7154     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7155   case MCK_offen:
7156     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7157   case MCK_SSrcB32:
7158     // When operands have expression values, they will return true for isToken,
7159     // because it is not possible to distinguish between a token and an
7160     // expression at parse time. MatchInstructionImpl() will always try to
7161     // match an operand as a token, when isToken returns true, and when the
7162     // name of the expression is not a valid token, the match will fail,
7163     // so we need to handle it here.
7164     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7165   case MCK_SSrcF32:
7166     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7167   case MCK_SoppBrTarget:
7168     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7169   case MCK_VReg32OrOff:
7170     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7171   case MCK_InterpSlot:
7172     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7173   case MCK_Attr:
7174     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7175   case MCK_AttrChan:
7176     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7177   case MCK_ImmSMEMOffset:
7178     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
7179   case MCK_SReg_64:
7180   case MCK_SReg_64_XEXEC:
7181     // Null is defined as a 32-bit register but
7182     // it should also be enabled with 64-bit operands.
7183     // The following code enables it for SReg_64 operands
7184     // used as source and destination. Remaining source
7185     // operands are handled in isInlinableImm.
7186     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7187   default:
7188     return Match_InvalidOperand;
7189   }
7190 }
7191 
7192 //===----------------------------------------------------------------------===//
7193 // endpgm
7194 //===----------------------------------------------------------------------===//
7195 
7196 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7197   SMLoc S = Parser.getTok().getLoc();
7198   int64_t Imm = 0;
7199 
7200   if (!parseExpr(Imm)) {
7201     // The operand is optional, if not present default to 0
7202     Imm = 0;
7203   }
7204 
7205   if (!isUInt<16>(Imm)) {
7206     Error(S, "expected a 16-bit value");
7207     return MatchOperand_ParseFail;
7208   }
7209 
7210   Operands.push_back(
7211       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7212   return MatchOperand_Success;
7213 }
7214 
7215 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7216