1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyA16,
167     ImmTyLWE,
168     ImmTyExpTgt,
169     ImmTyExpCompr,
170     ImmTyExpVM,
171     ImmTyFORMAT,
172     ImmTyHwreg,
173     ImmTyOff,
174     ImmTySendMsg,
175     ImmTyInterpSlot,
176     ImmTyInterpAttr,
177     ImmTyAttrChan,
178     ImmTyOpSel,
179     ImmTyOpSelHi,
180     ImmTyNegLo,
181     ImmTyNegHi,
182     ImmTySwizzle,
183     ImmTyGprIdxMode,
184     ImmTyHigh,
185     ImmTyBLGP,
186     ImmTyCBSZ,
187     ImmTyABID,
188     ImmTyEndpgm,
189   };
190 
191 private:
192   struct TokOp {
193     const char *Data;
194     unsigned Length;
195   };
196 
197   struct ImmOp {
198     int64_t Val;
199     ImmTy Type;
200     bool IsFPImm;
201     Modifiers Mods;
202   };
203 
204   struct RegOp {
205     unsigned RegNo;
206     Modifiers Mods;
207   };
208 
209   union {
210     TokOp Tok;
211     ImmOp Imm;
212     RegOp Reg;
213     const MCExpr *Expr;
214   };
215 
216 public:
217   bool isToken() const override {
218     if (Kind == Token)
219       return true;
220 
221     // When parsing operands, we can't always tell if something was meant to be
222     // a token, like 'gds', or an expression that references a global variable.
223     // In this case, we assume the string is an expression, and if we need to
224     // interpret is a token, then we treat the symbol name as the token.
225     return isSymbolRefExpr();
226   }
227 
228   bool isSymbolRefExpr() const {
229     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230   }
231 
232   bool isImm() const override {
233     return Kind == Immediate;
234   }
235 
236   bool isInlinableImm(MVT type) const;
237   bool isLiteralImm(MVT type) const;
238 
239   bool isRegKind() const {
240     return Kind == Register;
241   }
242 
243   bool isReg() const override {
244     return isRegKind() && !hasModifiers();
245   }
246 
247   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
248     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
249   }
250 
251   bool isRegOrImmWithInt16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
253   }
254 
255   bool isRegOrImmWithInt32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
257   }
258 
259   bool isRegOrImmWithInt64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
261   }
262 
263   bool isRegOrImmWithFP16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
265   }
266 
267   bool isRegOrImmWithFP32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
269   }
270 
271   bool isRegOrImmWithFP64InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
273   }
274 
275   bool isVReg() const {
276     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
277            isRegClass(AMDGPU::VReg_64RegClassID) ||
278            isRegClass(AMDGPU::VReg_96RegClassID) ||
279            isRegClass(AMDGPU::VReg_128RegClassID) ||
280            isRegClass(AMDGPU::VReg_160RegClassID) ||
281            isRegClass(AMDGPU::VReg_192RegClassID) ||
282            isRegClass(AMDGPU::VReg_256RegClassID) ||
283            isRegClass(AMDGPU::VReg_512RegClassID) ||
284            isRegClass(AMDGPU::VReg_1024RegClassID);
285   }
286 
287   bool isVReg32() const {
288     return isRegClass(AMDGPU::VGPR_32RegClassID);
289   }
290 
291   bool isVReg32OrOff() const {
292     return isOff() || isVReg32();
293   }
294 
295   bool isNull() const {
296     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
297   }
298 
299   bool isSDWAOperand(MVT type) const;
300   bool isSDWAFP16Operand() const;
301   bool isSDWAFP32Operand() const;
302   bool isSDWAInt16Operand() const;
303   bool isSDWAInt32Operand() const;
304 
305   bool isImmTy(ImmTy ImmT) const {
306     return isImm() && Imm.Type == ImmT;
307   }
308 
309   bool isImmModifier() const {
310     return isImm() && Imm.Type != ImmTyNone;
311   }
312 
313   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
314   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
315   bool isDMask() const { return isImmTy(ImmTyDMask); }
316   bool isDim() const { return isImmTy(ImmTyDim); }
317   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
318   bool isDA() const { return isImmTy(ImmTyDA); }
319   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
320   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
321   bool isLWE() const { return isImmTy(ImmTyLWE); }
322   bool isOff() const { return isImmTy(ImmTyOff); }
323   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
324   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
325   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
326   bool isOffen() const { return isImmTy(ImmTyOffen); }
327   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
328   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
329   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
330   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
331   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
332 
333   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
334   bool isGDS() const { return isImmTy(ImmTyGDS); }
335   bool isLDS() const { return isImmTy(ImmTyLDS); }
336   bool isDLC() const { return isImmTy(ImmTyDLC); }
337   bool isGLC() const { return isImmTy(ImmTyGLC); }
338   bool isSLC() const { return isImmTy(ImmTySLC); }
339   bool isSWZ() const { return isImmTy(ImmTySWZ); }
340   bool isTFE() const { return isImmTy(ImmTyTFE); }
341   bool isD16() const { return isImmTy(ImmTyD16); }
342   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
343   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
344   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
345   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
346   bool isFI() const { return isImmTy(ImmTyDppFi); }
347   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
348   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
349   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
350   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
351   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
352   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
353   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
354   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
355   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
356   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
357   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
358   bool isHigh() const { return isImmTy(ImmTyHigh); }
359 
360   bool isMod() const {
361     return isClampSI() || isOModSI();
362   }
363 
364   bool isRegOrImm() const {
365     return isReg() || isImm();
366   }
367 
368   bool isRegClass(unsigned RCID) const;
369 
370   bool isInlineValue() const;
371 
372   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
373     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
374   }
375 
376   bool isSCSrcB16() const {
377     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
378   }
379 
380   bool isSCSrcV2B16() const {
381     return isSCSrcB16();
382   }
383 
384   bool isSCSrcB32() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
386   }
387 
388   bool isSCSrcB64() const {
389     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
390   }
391 
392   bool isBoolReg() const;
393 
394   bool isSCSrcF16() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
396   }
397 
398   bool isSCSrcV2F16() const {
399     return isSCSrcF16();
400   }
401 
402   bool isSCSrcF32() const {
403     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
404   }
405 
406   bool isSCSrcF64() const {
407     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
408   }
409 
410   bool isSSrcB32() const {
411     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
412   }
413 
414   bool isSSrcB16() const {
415     return isSCSrcB16() || isLiteralImm(MVT::i16);
416   }
417 
418   bool isSSrcV2B16() const {
419     llvm_unreachable("cannot happen");
420     return isSSrcB16();
421   }
422 
423   bool isSSrcB64() const {
424     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
425     // See isVSrc64().
426     return isSCSrcB64() || isLiteralImm(MVT::i64);
427   }
428 
429   bool isSSrcF32() const {
430     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
431   }
432 
433   bool isSSrcF64() const {
434     return isSCSrcB64() || isLiteralImm(MVT::f64);
435   }
436 
437   bool isSSrcF16() const {
438     return isSCSrcB16() || isLiteralImm(MVT::f16);
439   }
440 
441   bool isSSrcV2F16() const {
442     llvm_unreachable("cannot happen");
443     return isSSrcF16();
444   }
445 
446   bool isSSrcOrLdsB32() const {
447     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
448            isLiteralImm(MVT::i32) || isExpr();
449   }
450 
451   bool isVCSrcB32() const {
452     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
453   }
454 
455   bool isVCSrcB64() const {
456     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
457   }
458 
459   bool isVCSrcB16() const {
460     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
461   }
462 
463   bool isVCSrcV2B16() const {
464     return isVCSrcB16();
465   }
466 
467   bool isVCSrcF32() const {
468     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
469   }
470 
471   bool isVCSrcF64() const {
472     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
473   }
474 
475   bool isVCSrcF16() const {
476     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
477   }
478 
479   bool isVCSrcV2F16() const {
480     return isVCSrcF16();
481   }
482 
483   bool isVSrcB32() const {
484     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
485   }
486 
487   bool isVSrcB64() const {
488     return isVCSrcF64() || isLiteralImm(MVT::i64);
489   }
490 
491   bool isVSrcB16() const {
492     return isVCSrcF16() || isLiteralImm(MVT::i16);
493   }
494 
495   bool isVSrcV2B16() const {
496     return isVSrcB16() || isLiteralImm(MVT::v2i16);
497   }
498 
499   bool isVSrcF32() const {
500     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
501   }
502 
503   bool isVSrcF64() const {
504     return isVCSrcF64() || isLiteralImm(MVT::f64);
505   }
506 
507   bool isVSrcF16() const {
508     return isVCSrcF16() || isLiteralImm(MVT::f16);
509   }
510 
511   bool isVSrcV2F16() const {
512     return isVSrcF16() || isLiteralImm(MVT::v2f16);
513   }
514 
515   bool isVISrcB32() const {
516     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
517   }
518 
519   bool isVISrcB16() const {
520     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
521   }
522 
523   bool isVISrcV2B16() const {
524     return isVISrcB16();
525   }
526 
527   bool isVISrcF32() const {
528     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
529   }
530 
531   bool isVISrcF16() const {
532     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
533   }
534 
535   bool isVISrcV2F16() const {
536     return isVISrcF16() || isVISrcB32();
537   }
538 
539   bool isAISrcB32() const {
540     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
541   }
542 
543   bool isAISrcB16() const {
544     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
545   }
546 
547   bool isAISrcV2B16() const {
548     return isAISrcB16();
549   }
550 
551   bool isAISrcF32() const {
552     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
553   }
554 
555   bool isAISrcF16() const {
556     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
557   }
558 
559   bool isAISrcV2F16() const {
560     return isAISrcF16() || isAISrcB32();
561   }
562 
563   bool isAISrc_128B32() const {
564     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
565   }
566 
567   bool isAISrc_128B16() const {
568     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
569   }
570 
571   bool isAISrc_128V2B16() const {
572     return isAISrc_128B16();
573   }
574 
575   bool isAISrc_128F32() const {
576     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
577   }
578 
579   bool isAISrc_128F16() const {
580     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
581   }
582 
583   bool isAISrc_128V2F16() const {
584     return isAISrc_128F16() || isAISrc_128B32();
585   }
586 
587   bool isAISrc_512B32() const {
588     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
589   }
590 
591   bool isAISrc_512B16() const {
592     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
593   }
594 
595   bool isAISrc_512V2B16() const {
596     return isAISrc_512B16();
597   }
598 
599   bool isAISrc_512F32() const {
600     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
601   }
602 
603   bool isAISrc_512F16() const {
604     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
605   }
606 
607   bool isAISrc_512V2F16() const {
608     return isAISrc_512F16() || isAISrc_512B32();
609   }
610 
611   bool isAISrc_1024B32() const {
612     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
613   }
614 
615   bool isAISrc_1024B16() const {
616     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
617   }
618 
619   bool isAISrc_1024V2B16() const {
620     return isAISrc_1024B16();
621   }
622 
623   bool isAISrc_1024F32() const {
624     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
625   }
626 
627   bool isAISrc_1024F16() const {
628     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
629   }
630 
631   bool isAISrc_1024V2F16() const {
632     return isAISrc_1024F16() || isAISrc_1024B32();
633   }
634 
635   bool isKImmFP32() const {
636     return isLiteralImm(MVT::f32);
637   }
638 
639   bool isKImmFP16() const {
640     return isLiteralImm(MVT::f16);
641   }
642 
643   bool isMem() const override {
644     return false;
645   }
646 
647   bool isExpr() const {
648     return Kind == Expression;
649   }
650 
651   bool isSoppBrTarget() const {
652     return isExpr() || isImm();
653   }
654 
655   bool isSWaitCnt() const;
656   bool isHwreg() const;
657   bool isSendMsg() const;
658   bool isSwizzle() const;
659   bool isSMRDOffset8() const;
660   bool isSMEMOffset() const;
661   bool isSMRDLiteralOffset() const;
662   bool isDPP8() const;
663   bool isDPPCtrl() const;
664   bool isBLGP() const;
665   bool isCBSZ() const;
666   bool isABID() const;
667   bool isGPRIdxMode() const;
668   bool isS16Imm() const;
669   bool isU16Imm() const;
670   bool isEndpgm() const;
671 
672   StringRef getExpressionAsToken() const {
673     assert(isExpr());
674     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
675     return S->getSymbol().getName();
676   }
677 
678   StringRef getToken() const {
679     assert(isToken());
680 
681     if (Kind == Expression)
682       return getExpressionAsToken();
683 
684     return StringRef(Tok.Data, Tok.Length);
685   }
686 
687   int64_t getImm() const {
688     assert(isImm());
689     return Imm.Val;
690   }
691 
692   ImmTy getImmTy() const {
693     assert(isImm());
694     return Imm.Type;
695   }
696 
697   unsigned getReg() const override {
698     assert(isRegKind());
699     return Reg.RegNo;
700   }
701 
702   SMLoc getStartLoc() const override {
703     return StartLoc;
704   }
705 
706   SMLoc getEndLoc() const override {
707     return EndLoc;
708   }
709 
710   SMRange getLocRange() const {
711     return SMRange(StartLoc, EndLoc);
712   }
713 
714   Modifiers getModifiers() const {
715     assert(isRegKind() || isImmTy(ImmTyNone));
716     return isRegKind() ? Reg.Mods : Imm.Mods;
717   }
718 
719   void setModifiers(Modifiers Mods) {
720     assert(isRegKind() || isImmTy(ImmTyNone));
721     if (isRegKind())
722       Reg.Mods = Mods;
723     else
724       Imm.Mods = Mods;
725   }
726 
727   bool hasModifiers() const {
728     return getModifiers().hasModifiers();
729   }
730 
731   bool hasFPModifiers() const {
732     return getModifiers().hasFPModifiers();
733   }
734 
735   bool hasIntModifiers() const {
736     return getModifiers().hasIntModifiers();
737   }
738 
739   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
740 
741   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
742 
743   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
744 
745   template <unsigned Bitwidth>
746   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
747 
748   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
749     addKImmFPOperands<16>(Inst, N);
750   }
751 
752   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
753     addKImmFPOperands<32>(Inst, N);
754   }
755 
756   void addRegOperands(MCInst &Inst, unsigned N) const;
757 
758   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
759     addRegOperands(Inst, N);
760   }
761 
762   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
763     if (isRegKind())
764       addRegOperands(Inst, N);
765     else if (isExpr())
766       Inst.addOperand(MCOperand::createExpr(Expr));
767     else
768       addImmOperands(Inst, N);
769   }
770 
771   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
772     Modifiers Mods = getModifiers();
773     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
774     if (isRegKind()) {
775       addRegOperands(Inst, N);
776     } else {
777       addImmOperands(Inst, N, false);
778     }
779   }
780 
781   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
782     assert(!hasIntModifiers());
783     addRegOrImmWithInputModsOperands(Inst, N);
784   }
785 
786   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
787     assert(!hasFPModifiers());
788     addRegOrImmWithInputModsOperands(Inst, N);
789   }
790 
791   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
792     Modifiers Mods = getModifiers();
793     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
794     assert(isRegKind());
795     addRegOperands(Inst, N);
796   }
797 
798   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
799     assert(!hasIntModifiers());
800     addRegWithInputModsOperands(Inst, N);
801   }
802 
803   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
804     assert(!hasFPModifiers());
805     addRegWithInputModsOperands(Inst, N);
806   }
807 
808   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
809     if (isImm())
810       addImmOperands(Inst, N);
811     else {
812       assert(isExpr());
813       Inst.addOperand(MCOperand::createExpr(Expr));
814     }
815   }
816 
817   static void printImmTy(raw_ostream& OS, ImmTy Type) {
818     switch (Type) {
819     case ImmTyNone: OS << "None"; break;
820     case ImmTyGDS: OS << "GDS"; break;
821     case ImmTyLDS: OS << "LDS"; break;
822     case ImmTyOffen: OS << "Offen"; break;
823     case ImmTyIdxen: OS << "Idxen"; break;
824     case ImmTyAddr64: OS << "Addr64"; break;
825     case ImmTyOffset: OS << "Offset"; break;
826     case ImmTyInstOffset: OS << "InstOffset"; break;
827     case ImmTyOffset0: OS << "Offset0"; break;
828     case ImmTyOffset1: OS << "Offset1"; break;
829     case ImmTyDLC: OS << "DLC"; break;
830     case ImmTyGLC: OS << "GLC"; break;
831     case ImmTySLC: OS << "SLC"; break;
832     case ImmTySWZ: OS << "SWZ"; break;
833     case ImmTyTFE: OS << "TFE"; break;
834     case ImmTyD16: OS << "D16"; break;
835     case ImmTyFORMAT: OS << "FORMAT"; break;
836     case ImmTyClampSI: OS << "ClampSI"; break;
837     case ImmTyOModSI: OS << "OModSI"; break;
838     case ImmTyDPP8: OS << "DPP8"; break;
839     case ImmTyDppCtrl: OS << "DppCtrl"; break;
840     case ImmTyDppRowMask: OS << "DppRowMask"; break;
841     case ImmTyDppBankMask: OS << "DppBankMask"; break;
842     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
843     case ImmTyDppFi: OS << "FI"; break;
844     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
845     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
846     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
847     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
848     case ImmTyDMask: OS << "DMask"; break;
849     case ImmTyDim: OS << "Dim"; break;
850     case ImmTyUNorm: OS << "UNorm"; break;
851     case ImmTyDA: OS << "DA"; break;
852     case ImmTyR128A16: OS << "R128A16"; break;
853     case ImmTyA16: OS << "A16"; break;
854     case ImmTyLWE: OS << "LWE"; break;
855     case ImmTyOff: OS << "Off"; break;
856     case ImmTyExpTgt: OS << "ExpTgt"; break;
857     case ImmTyExpCompr: OS << "ExpCompr"; break;
858     case ImmTyExpVM: OS << "ExpVM"; break;
859     case ImmTyHwreg: OS << "Hwreg"; break;
860     case ImmTySendMsg: OS << "SendMsg"; break;
861     case ImmTyInterpSlot: OS << "InterpSlot"; break;
862     case ImmTyInterpAttr: OS << "InterpAttr"; break;
863     case ImmTyAttrChan: OS << "AttrChan"; break;
864     case ImmTyOpSel: OS << "OpSel"; break;
865     case ImmTyOpSelHi: OS << "OpSelHi"; break;
866     case ImmTyNegLo: OS << "NegLo"; break;
867     case ImmTyNegHi: OS << "NegHi"; break;
868     case ImmTySwizzle: OS << "Swizzle"; break;
869     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
870     case ImmTyHigh: OS << "High"; break;
871     case ImmTyBLGP: OS << "BLGP"; break;
872     case ImmTyCBSZ: OS << "CBSZ"; break;
873     case ImmTyABID: OS << "ABID"; break;
874     case ImmTyEndpgm: OS << "Endpgm"; break;
875     }
876   }
877 
878   void print(raw_ostream &OS) const override {
879     switch (Kind) {
880     case Register:
881       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
882       break;
883     case Immediate:
884       OS << '<' << getImm();
885       if (getImmTy() != ImmTyNone) {
886         OS << " type: "; printImmTy(OS, getImmTy());
887       }
888       OS << " mods: " << Imm.Mods << '>';
889       break;
890     case Token:
891       OS << '\'' << getToken() << '\'';
892       break;
893     case Expression:
894       OS << "<expr " << *Expr << '>';
895       break;
896     }
897   }
898 
899   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
900                                       int64_t Val, SMLoc Loc,
901                                       ImmTy Type = ImmTyNone,
902                                       bool IsFPImm = false) {
903     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
904     Op->Imm.Val = Val;
905     Op->Imm.IsFPImm = IsFPImm;
906     Op->Imm.Type = Type;
907     Op->Imm.Mods = Modifiers();
908     Op->StartLoc = Loc;
909     Op->EndLoc = Loc;
910     return Op;
911   }
912 
913   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
914                                         StringRef Str, SMLoc Loc,
915                                         bool HasExplicitEncodingSize = true) {
916     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
917     Res->Tok.Data = Str.data();
918     Res->Tok.Length = Str.size();
919     Res->StartLoc = Loc;
920     Res->EndLoc = Loc;
921     return Res;
922   }
923 
924   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
925                                       unsigned RegNo, SMLoc S,
926                                       SMLoc E) {
927     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
928     Op->Reg.RegNo = RegNo;
929     Op->Reg.Mods = Modifiers();
930     Op->StartLoc = S;
931     Op->EndLoc = E;
932     return Op;
933   }
934 
935   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
936                                        const class MCExpr *Expr, SMLoc S) {
937     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
938     Op->Expr = Expr;
939     Op->StartLoc = S;
940     Op->EndLoc = S;
941     return Op;
942   }
943 };
944 
945 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
946   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
947   return OS;
948 }
949 
950 //===----------------------------------------------------------------------===//
951 // AsmParser
952 //===----------------------------------------------------------------------===//
953 
954 // Holds info related to the current kernel, e.g. count of SGPRs used.
955 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
956 // .amdgpu_hsa_kernel or at EOF.
957 class KernelScopeInfo {
958   int SgprIndexUnusedMin = -1;
959   int VgprIndexUnusedMin = -1;
960   MCContext *Ctx = nullptr;
961 
962   void usesSgprAt(int i) {
963     if (i >= SgprIndexUnusedMin) {
964       SgprIndexUnusedMin = ++i;
965       if (Ctx) {
966         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
967         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
968       }
969     }
970   }
971 
972   void usesVgprAt(int i) {
973     if (i >= VgprIndexUnusedMin) {
974       VgprIndexUnusedMin = ++i;
975       if (Ctx) {
976         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
977         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
978       }
979     }
980   }
981 
982 public:
983   KernelScopeInfo() = default;
984 
985   void initialize(MCContext &Context) {
986     Ctx = &Context;
987     usesSgprAt(SgprIndexUnusedMin = -1);
988     usesVgprAt(VgprIndexUnusedMin = -1);
989   }
990 
991   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
992     switch (RegKind) {
993       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
994       case IS_AGPR: // fall through
995       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
996       default: break;
997     }
998   }
999 };
1000 
1001 class AMDGPUAsmParser : public MCTargetAsmParser {
1002   MCAsmParser &Parser;
1003 
1004   // Number of extra operands parsed after the first optional operand.
1005   // This may be necessary to skip hardcoded mandatory operands.
1006   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1007 
1008   unsigned ForcedEncodingSize = 0;
1009   bool ForcedDPP = false;
1010   bool ForcedSDWA = false;
1011   KernelScopeInfo KernelScope;
1012 
1013   /// @name Auto-generated Match Functions
1014   /// {
1015 
1016 #define GET_ASSEMBLER_HEADER
1017 #include "AMDGPUGenAsmMatcher.inc"
1018 
1019   /// }
1020 
1021 private:
1022   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1023   bool OutOfRangeError(SMRange Range);
1024   /// Calculate VGPR/SGPR blocks required for given target, reserved
1025   /// registers, and user-specified NextFreeXGPR values.
1026   ///
1027   /// \param Features [in] Target features, used for bug corrections.
1028   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1029   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1030   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1031   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1032   /// descriptor field, if valid.
1033   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1034   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1035   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1036   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1037   /// \param VGPRBlocks [out] Result VGPR block count.
1038   /// \param SGPRBlocks [out] Result SGPR block count.
1039   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1040                           bool FlatScrUsed, bool XNACKUsed,
1041                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1042                           SMRange VGPRRange, unsigned NextFreeSGPR,
1043                           SMRange SGPRRange, unsigned &VGPRBlocks,
1044                           unsigned &SGPRBlocks);
1045   bool ParseDirectiveAMDGCNTarget();
1046   bool ParseDirectiveAMDHSAKernel();
1047   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1048   bool ParseDirectiveHSACodeObjectVersion();
1049   bool ParseDirectiveHSACodeObjectISA();
1050   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1051   bool ParseDirectiveAMDKernelCodeT();
1052   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1053   bool ParseDirectiveAMDGPUHsaKernel();
1054 
1055   bool ParseDirectiveISAVersion();
1056   bool ParseDirectiveHSAMetadata();
1057   bool ParseDirectivePALMetadataBegin();
1058   bool ParseDirectivePALMetadata();
1059   bool ParseDirectiveAMDGPULDS();
1060 
1061   /// Common code to parse out a block of text (typically YAML) between start and
1062   /// end directives.
1063   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1064                            const char *AssemblerDirectiveEnd,
1065                            std::string &CollectString);
1066 
1067   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1068                              RegisterKind RegKind, unsigned Reg1);
1069   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1070                            unsigned &RegNum, unsigned &RegWidth,
1071                            bool RestoreOnFailure = false);
1072   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1073                            unsigned &RegNum, unsigned &RegWidth,
1074                            SmallVectorImpl<AsmToken> &Tokens);
1075   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1076                            unsigned &RegWidth,
1077                            SmallVectorImpl<AsmToken> &Tokens);
1078   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1079                            unsigned &RegWidth,
1080                            SmallVectorImpl<AsmToken> &Tokens);
1081   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1082                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1083   bool ParseRegRange(unsigned& Num, unsigned& Width);
1084   unsigned getRegularReg(RegisterKind RegKind,
1085                          unsigned RegNum,
1086                          unsigned RegWidth);
1087 
1088   bool isRegister();
1089   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1090   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1091   void initializeGprCountSymbol(RegisterKind RegKind);
1092   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1093                              unsigned RegWidth);
1094   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1095                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1096   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1097                  bool IsGdsHardcoded);
1098 
1099 public:
1100   enum AMDGPUMatchResultTy {
1101     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1102   };
1103   enum OperandMode {
1104     OperandMode_Default,
1105     OperandMode_NSA,
1106   };
1107 
1108   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1109 
1110   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1111                const MCInstrInfo &MII,
1112                const MCTargetOptions &Options)
1113       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1114     MCAsmParserExtension::Initialize(Parser);
1115 
1116     if (getFeatureBits().none()) {
1117       // Set default features.
1118       copySTI().ToggleFeature("southern-islands");
1119     }
1120 
1121     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1122 
1123     {
1124       // TODO: make those pre-defined variables read-only.
1125       // Currently there is none suitable machinery in the core llvm-mc for this.
1126       // MCSymbol::isRedefinable is intended for another purpose, and
1127       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1128       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1129       MCContext &Ctx = getContext();
1130       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1131         MCSymbol *Sym =
1132             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1133         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1134         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1135         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1136         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1137         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1138       } else {
1139         MCSymbol *Sym =
1140             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1141         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1142         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1143         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1144         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1145         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1146       }
1147       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1148         initializeGprCountSymbol(IS_VGPR);
1149         initializeGprCountSymbol(IS_SGPR);
1150       } else
1151         KernelScope.initialize(getContext());
1152     }
1153   }
1154 
1155   bool hasXNACK() const {
1156     return AMDGPU::hasXNACK(getSTI());
1157   }
1158 
1159   bool hasMIMG_R128() const {
1160     return AMDGPU::hasMIMG_R128(getSTI());
1161   }
1162 
1163   bool hasPackedD16() const {
1164     return AMDGPU::hasPackedD16(getSTI());
1165   }
1166 
1167   bool hasGFX10A16() const {
1168     return AMDGPU::hasGFX10A16(getSTI());
1169   }
1170 
1171   bool isSI() const {
1172     return AMDGPU::isSI(getSTI());
1173   }
1174 
1175   bool isCI() const {
1176     return AMDGPU::isCI(getSTI());
1177   }
1178 
1179   bool isVI() const {
1180     return AMDGPU::isVI(getSTI());
1181   }
1182 
1183   bool isGFX9() const {
1184     return AMDGPU::isGFX9(getSTI());
1185   }
1186 
1187   bool isGFX10() const {
1188     return AMDGPU::isGFX10(getSTI());
1189   }
1190 
1191   bool hasInv2PiInlineImm() const {
1192     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1193   }
1194 
1195   bool hasFlatOffsets() const {
1196     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1197   }
1198 
1199   bool hasSGPR102_SGPR103() const {
1200     return !isVI() && !isGFX9();
1201   }
1202 
1203   bool hasSGPR104_SGPR105() const {
1204     return isGFX10();
1205   }
1206 
1207   bool hasIntClamp() const {
1208     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1209   }
1210 
1211   AMDGPUTargetStreamer &getTargetStreamer() {
1212     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1213     return static_cast<AMDGPUTargetStreamer &>(TS);
1214   }
1215 
1216   const MCRegisterInfo *getMRI() const {
1217     // We need this const_cast because for some reason getContext() is not const
1218     // in MCAsmParser.
1219     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1220   }
1221 
1222   const MCInstrInfo *getMII() const {
1223     return &MII;
1224   }
1225 
1226   const FeatureBitset &getFeatureBits() const {
1227     return getSTI().getFeatureBits();
1228   }
1229 
1230   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1231   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1232   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1233 
1234   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1235   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1236   bool isForcedDPP() const { return ForcedDPP; }
1237   bool isForcedSDWA() const { return ForcedSDWA; }
1238   ArrayRef<unsigned> getMatchedVariants() const;
1239 
1240   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1241   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1242                      bool RestoreOnFailure);
1243   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1244   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1245                                         SMLoc &EndLoc) override;
1246   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1247   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1248                                       unsigned Kind) override;
1249   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1250                                OperandVector &Operands, MCStreamer &Out,
1251                                uint64_t &ErrorInfo,
1252                                bool MatchingInlineAsm) override;
1253   bool ParseDirective(AsmToken DirectiveID) override;
1254   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1255                                     OperandMode Mode = OperandMode_Default);
1256   StringRef parseMnemonicSuffix(StringRef Name);
1257   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1258                         SMLoc NameLoc, OperandVector &Operands) override;
1259   //bool ProcessInstruction(MCInst &Inst);
1260 
1261   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1262 
1263   OperandMatchResultTy
1264   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1265                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1266                      bool (*ConvertResult)(int64_t &) = nullptr);
1267 
1268   OperandMatchResultTy
1269   parseOperandArrayWithPrefix(const char *Prefix,
1270                               OperandVector &Operands,
1271                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1272                               bool (*ConvertResult)(int64_t&) = nullptr);
1273 
1274   OperandMatchResultTy
1275   parseNamedBit(const char *Name, OperandVector &Operands,
1276                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1277   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1278                                              StringRef &Value);
1279 
1280   bool isModifier();
1281   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1282   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1283   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1284   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1285   bool parseSP3NegModifier();
1286   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1287   OperandMatchResultTy parseReg(OperandVector &Operands);
1288   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1289   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1290   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1291   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1292   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1293   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1294   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1295 
1296   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1297   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1298   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1299   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1300 
1301   bool parseCnt(int64_t &IntVal);
1302   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1303   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1304 
1305 private:
1306   struct OperandInfoTy {
1307     int64_t Id;
1308     bool IsSymbolic = false;
1309     bool IsDefined = false;
1310 
1311     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1312   };
1313 
1314   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1315   bool validateSendMsg(const OperandInfoTy &Msg,
1316                        const OperandInfoTy &Op,
1317                        const OperandInfoTy &Stream,
1318                        const SMLoc Loc);
1319 
1320   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1321   bool validateHwreg(const OperandInfoTy &HwReg,
1322                      const int64_t Offset,
1323                      const int64_t Width,
1324                      const SMLoc Loc);
1325 
1326   void errorExpTgt();
1327   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1328   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1329   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1330 
1331   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1332   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1333   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1334   bool validateSOPLiteral(const MCInst &Inst) const;
1335   bool validateConstantBusLimitations(const MCInst &Inst);
1336   bool validateEarlyClobberLimitations(const MCInst &Inst);
1337   bool validateIntClampSupported(const MCInst &Inst);
1338   bool validateMIMGAtomicDMask(const MCInst &Inst);
1339   bool validateMIMGGatherDMask(const MCInst &Inst);
1340   bool validateMovrels(const MCInst &Inst);
1341   bool validateMIMGDataSize(const MCInst &Inst);
1342   bool validateMIMGAddrSize(const MCInst &Inst);
1343   bool validateMIMGD16(const MCInst &Inst);
1344   bool validateMIMGDim(const MCInst &Inst);
1345   bool validateLdsDirect(const MCInst &Inst);
1346   bool validateOpSel(const MCInst &Inst);
1347   bool validateVccOperand(unsigned Reg) const;
1348   bool validateVOP3Literal(const MCInst &Inst) const;
1349   bool validateMAIAccWrite(const MCInst &Inst);
1350   unsigned getConstantBusLimit(unsigned Opcode) const;
1351   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1352   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1353   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1354 
1355   bool isId(const StringRef Id) const;
1356   bool isId(const AsmToken &Token, const StringRef Id) const;
1357   bool isToken(const AsmToken::TokenKind Kind) const;
1358   bool trySkipId(const StringRef Id);
1359   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1360   bool trySkipToken(const AsmToken::TokenKind Kind);
1361   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1362   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1363   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1364   AsmToken::TokenKind getTokenKind() const;
1365   bool parseExpr(int64_t &Imm);
1366   bool parseExpr(OperandVector &Operands);
1367   StringRef getTokenStr() const;
1368   AsmToken peekToken();
1369   AsmToken getToken() const;
1370   SMLoc getLoc() const;
1371   void lex();
1372 
1373 public:
1374   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1375   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1376 
1377   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1378   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1379   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1380   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1381   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1382   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1383 
1384   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1385                             const unsigned MinVal,
1386                             const unsigned MaxVal,
1387                             const StringRef ErrMsg);
1388   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1389   bool parseSwizzleOffset(int64_t &Imm);
1390   bool parseSwizzleMacro(int64_t &Imm);
1391   bool parseSwizzleQuadPerm(int64_t &Imm);
1392   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1393   bool parseSwizzleBroadcast(int64_t &Imm);
1394   bool parseSwizzleSwap(int64_t &Imm);
1395   bool parseSwizzleReverse(int64_t &Imm);
1396 
1397   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1398   int64_t parseGPRIdxMacro();
1399 
1400   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1401   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1402   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1403   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1404   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1405 
1406   AMDGPUOperand::Ptr defaultDLC() const;
1407   AMDGPUOperand::Ptr defaultGLC() const;
1408   AMDGPUOperand::Ptr defaultSLC() const;
1409 
1410   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1411   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1412   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1413   AMDGPUOperand::Ptr defaultFlatOffset() const;
1414 
1415   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1416 
1417   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1418                OptionalImmIndexMap &OptionalIdx);
1419   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1420   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1421   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1422 
1423   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1424 
1425   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1426                bool IsAtomic = false);
1427   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1428 
1429   OperandMatchResultTy parseDim(OperandVector &Operands);
1430   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1431   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1432   AMDGPUOperand::Ptr defaultRowMask() const;
1433   AMDGPUOperand::Ptr defaultBankMask() const;
1434   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1435   AMDGPUOperand::Ptr defaultFI() const;
1436   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1437   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1438 
1439   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1440                                     AMDGPUOperand::ImmTy Type);
1441   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1442   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1443   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1444   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1445   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1446   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1447   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1448                uint64_t BasicInstType,
1449                bool SkipDstVcc = false,
1450                bool SkipSrcVcc = false);
1451 
1452   AMDGPUOperand::Ptr defaultBLGP() const;
1453   AMDGPUOperand::Ptr defaultCBSZ() const;
1454   AMDGPUOperand::Ptr defaultABID() const;
1455 
1456   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1457   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1458 };
1459 
1460 struct OptionalOperand {
1461   const char *Name;
1462   AMDGPUOperand::ImmTy Type;
1463   bool IsBit;
1464   bool (*ConvertResult)(int64_t&);
1465 };
1466 
1467 } // end anonymous namespace
1468 
1469 // May be called with integer type with equivalent bitwidth.
1470 static const fltSemantics *getFltSemantics(unsigned Size) {
1471   switch (Size) {
1472   case 4:
1473     return &APFloat::IEEEsingle();
1474   case 8:
1475     return &APFloat::IEEEdouble();
1476   case 2:
1477     return &APFloat::IEEEhalf();
1478   default:
1479     llvm_unreachable("unsupported fp type");
1480   }
1481 }
1482 
1483 static const fltSemantics *getFltSemantics(MVT VT) {
1484   return getFltSemantics(VT.getSizeInBits() / 8);
1485 }
1486 
1487 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1488   switch (OperandType) {
1489   case AMDGPU::OPERAND_REG_IMM_INT32:
1490   case AMDGPU::OPERAND_REG_IMM_FP32:
1491   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1492   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1493   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1494   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1495     return &APFloat::IEEEsingle();
1496   case AMDGPU::OPERAND_REG_IMM_INT64:
1497   case AMDGPU::OPERAND_REG_IMM_FP64:
1498   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1499   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1500     return &APFloat::IEEEdouble();
1501   case AMDGPU::OPERAND_REG_IMM_INT16:
1502   case AMDGPU::OPERAND_REG_IMM_FP16:
1503   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1504   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1505   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1506   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1507   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1508   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1509   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1510   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1511   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1512   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1513     return &APFloat::IEEEhalf();
1514   default:
1515     llvm_unreachable("unsupported fp type");
1516   }
1517 }
1518 
1519 //===----------------------------------------------------------------------===//
1520 // Operand
1521 //===----------------------------------------------------------------------===//
1522 
1523 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1524   bool Lost;
1525 
1526   // Convert literal to single precision
1527   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1528                                                APFloat::rmNearestTiesToEven,
1529                                                &Lost);
1530   // We allow precision lost but not overflow or underflow
1531   if (Status != APFloat::opOK &&
1532       Lost &&
1533       ((Status & APFloat::opOverflow)  != 0 ||
1534        (Status & APFloat::opUnderflow) != 0)) {
1535     return false;
1536   }
1537 
1538   return true;
1539 }
1540 
1541 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1542   return isUIntN(Size, Val) || isIntN(Size, Val);
1543 }
1544 
1545 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1546 
1547   // This is a hack to enable named inline values like
1548   // shared_base with both 32-bit and 64-bit operands.
1549   // Note that these values are defined as
1550   // 32-bit operands only.
1551   if (isInlineValue()) {
1552     return true;
1553   }
1554 
1555   if (!isImmTy(ImmTyNone)) {
1556     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1557     return false;
1558   }
1559   // TODO: We should avoid using host float here. It would be better to
1560   // check the float bit values which is what a few other places do.
1561   // We've had bot failures before due to weird NaN support on mips hosts.
1562 
1563   APInt Literal(64, Imm.Val);
1564 
1565   if (Imm.IsFPImm) { // We got fp literal token
1566     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1567       return AMDGPU::isInlinableLiteral64(Imm.Val,
1568                                           AsmParser->hasInv2PiInlineImm());
1569     }
1570 
1571     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1572     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1573       return false;
1574 
1575     if (type.getScalarSizeInBits() == 16) {
1576       return AMDGPU::isInlinableLiteral16(
1577         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1578         AsmParser->hasInv2PiInlineImm());
1579     }
1580 
1581     // Check if single precision literal is inlinable
1582     return AMDGPU::isInlinableLiteral32(
1583       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1584       AsmParser->hasInv2PiInlineImm());
1585   }
1586 
1587   // We got int literal token.
1588   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1589     return AMDGPU::isInlinableLiteral64(Imm.Val,
1590                                         AsmParser->hasInv2PiInlineImm());
1591   }
1592 
1593   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1594     return false;
1595   }
1596 
1597   if (type.getScalarSizeInBits() == 16) {
1598     return AMDGPU::isInlinableLiteral16(
1599       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1600       AsmParser->hasInv2PiInlineImm());
1601   }
1602 
1603   return AMDGPU::isInlinableLiteral32(
1604     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1605     AsmParser->hasInv2PiInlineImm());
1606 }
1607 
1608 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1609   // Check that this immediate can be added as literal
1610   if (!isImmTy(ImmTyNone)) {
1611     return false;
1612   }
1613 
1614   if (!Imm.IsFPImm) {
1615     // We got int literal token.
1616 
1617     if (type == MVT::f64 && hasFPModifiers()) {
1618       // Cannot apply fp modifiers to int literals preserving the same semantics
1619       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1620       // disable these cases.
1621       return false;
1622     }
1623 
1624     unsigned Size = type.getSizeInBits();
1625     if (Size == 64)
1626       Size = 32;
1627 
1628     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1629     // types.
1630     return isSafeTruncation(Imm.Val, Size);
1631   }
1632 
1633   // We got fp literal token
1634   if (type == MVT::f64) { // Expected 64-bit fp operand
1635     // We would set low 64-bits of literal to zeroes but we accept this literals
1636     return true;
1637   }
1638 
1639   if (type == MVT::i64) { // Expected 64-bit int operand
1640     // We don't allow fp literals in 64-bit integer instructions. It is
1641     // unclear how we should encode them.
1642     return false;
1643   }
1644 
1645   // We allow fp literals with f16x2 operands assuming that the specified
1646   // literal goes into the lower half and the upper half is zero. We also
1647   // require that the literal may be losslesly converted to f16.
1648   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1649                      (type == MVT::v2i16)? MVT::i16 : type;
1650 
1651   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1652   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1653 }
1654 
1655 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1656   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1657 }
1658 
1659 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1660   if (AsmParser->isVI())
1661     return isVReg32();
1662   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1663     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1664   else
1665     return false;
1666 }
1667 
1668 bool AMDGPUOperand::isSDWAFP16Operand() const {
1669   return isSDWAOperand(MVT::f16);
1670 }
1671 
1672 bool AMDGPUOperand::isSDWAFP32Operand() const {
1673   return isSDWAOperand(MVT::f32);
1674 }
1675 
1676 bool AMDGPUOperand::isSDWAInt16Operand() const {
1677   return isSDWAOperand(MVT::i16);
1678 }
1679 
1680 bool AMDGPUOperand::isSDWAInt32Operand() const {
1681   return isSDWAOperand(MVT::i32);
1682 }
1683 
1684 bool AMDGPUOperand::isBoolReg() const {
1685   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1686          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1687 }
1688 
1689 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1690 {
1691   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1692   assert(Size == 2 || Size == 4 || Size == 8);
1693 
1694   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1695 
1696   if (Imm.Mods.Abs) {
1697     Val &= ~FpSignMask;
1698   }
1699   if (Imm.Mods.Neg) {
1700     Val ^= FpSignMask;
1701   }
1702 
1703   return Val;
1704 }
1705 
1706 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1707   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1708                              Inst.getNumOperands())) {
1709     addLiteralImmOperand(Inst, Imm.Val,
1710                          ApplyModifiers &
1711                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1712   } else {
1713     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1714     Inst.addOperand(MCOperand::createImm(Imm.Val));
1715   }
1716 }
1717 
1718 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1719   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1720   auto OpNum = Inst.getNumOperands();
1721   // Check that this operand accepts literals
1722   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1723 
1724   if (ApplyModifiers) {
1725     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1726     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1727     Val = applyInputFPModifiers(Val, Size);
1728   }
1729 
1730   APInt Literal(64, Val);
1731   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1732 
1733   if (Imm.IsFPImm) { // We got fp literal token
1734     switch (OpTy) {
1735     case AMDGPU::OPERAND_REG_IMM_INT64:
1736     case AMDGPU::OPERAND_REG_IMM_FP64:
1737     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1738     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1739       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1740                                        AsmParser->hasInv2PiInlineImm())) {
1741         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1742         return;
1743       }
1744 
1745       // Non-inlineable
1746       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1747         // For fp operands we check if low 32 bits are zeros
1748         if (Literal.getLoBits(32) != 0) {
1749           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1750           "Can't encode literal as exact 64-bit floating-point operand. "
1751           "Low 32-bits will be set to zero");
1752         }
1753 
1754         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1755         return;
1756       }
1757 
1758       // We don't allow fp literals in 64-bit integer instructions. It is
1759       // unclear how we should encode them. This case should be checked earlier
1760       // in predicate methods (isLiteralImm())
1761       llvm_unreachable("fp literal in 64-bit integer instruction.");
1762 
1763     case AMDGPU::OPERAND_REG_IMM_INT32:
1764     case AMDGPU::OPERAND_REG_IMM_FP32:
1765     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1766     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1767     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1768     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1769     case AMDGPU::OPERAND_REG_IMM_INT16:
1770     case AMDGPU::OPERAND_REG_IMM_FP16:
1771     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1772     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1773     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1774     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1775     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1776     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1777     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1778     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1779     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1780     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1781       bool lost;
1782       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1783       // Convert literal to single precision
1784       FPLiteral.convert(*getOpFltSemantics(OpTy),
1785                         APFloat::rmNearestTiesToEven, &lost);
1786       // We allow precision lost but not overflow or underflow. This should be
1787       // checked earlier in isLiteralImm()
1788 
1789       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1790       Inst.addOperand(MCOperand::createImm(ImmVal));
1791       return;
1792     }
1793     default:
1794       llvm_unreachable("invalid operand size");
1795     }
1796 
1797     return;
1798   }
1799 
1800   // We got int literal token.
1801   // Only sign extend inline immediates.
1802   switch (OpTy) {
1803   case AMDGPU::OPERAND_REG_IMM_INT32:
1804   case AMDGPU::OPERAND_REG_IMM_FP32:
1805   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1806   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1807   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1808   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1809   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1810   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1811     if (isSafeTruncation(Val, 32) &&
1812         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1813                                      AsmParser->hasInv2PiInlineImm())) {
1814       Inst.addOperand(MCOperand::createImm(Val));
1815       return;
1816     }
1817 
1818     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1819     return;
1820 
1821   case AMDGPU::OPERAND_REG_IMM_INT64:
1822   case AMDGPU::OPERAND_REG_IMM_FP64:
1823   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1824   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1825     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1826       Inst.addOperand(MCOperand::createImm(Val));
1827       return;
1828     }
1829 
1830     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1831     return;
1832 
1833   case AMDGPU::OPERAND_REG_IMM_INT16:
1834   case AMDGPU::OPERAND_REG_IMM_FP16:
1835   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1836   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1837   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1838   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1839     if (isSafeTruncation(Val, 16) &&
1840         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1841                                      AsmParser->hasInv2PiInlineImm())) {
1842       Inst.addOperand(MCOperand::createImm(Val));
1843       return;
1844     }
1845 
1846     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1847     return;
1848 
1849   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1850   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1851   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1852   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1853     assert(isSafeTruncation(Val, 16));
1854     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1855                                         AsmParser->hasInv2PiInlineImm()));
1856 
1857     Inst.addOperand(MCOperand::createImm(Val));
1858     return;
1859   }
1860   default:
1861     llvm_unreachable("invalid operand size");
1862   }
1863 }
1864 
1865 template <unsigned Bitwidth>
1866 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1867   APInt Literal(64, Imm.Val);
1868 
1869   if (!Imm.IsFPImm) {
1870     // We got int literal token.
1871     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1872     return;
1873   }
1874 
1875   bool Lost;
1876   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1877   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1878                     APFloat::rmNearestTiesToEven, &Lost);
1879   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1880 }
1881 
1882 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1883   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1884 }
1885 
1886 static bool isInlineValue(unsigned Reg) {
1887   switch (Reg) {
1888   case AMDGPU::SRC_SHARED_BASE:
1889   case AMDGPU::SRC_SHARED_LIMIT:
1890   case AMDGPU::SRC_PRIVATE_BASE:
1891   case AMDGPU::SRC_PRIVATE_LIMIT:
1892   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1893     return true;
1894   case AMDGPU::SRC_VCCZ:
1895   case AMDGPU::SRC_EXECZ:
1896   case AMDGPU::SRC_SCC:
1897     return true;
1898   case AMDGPU::SGPR_NULL:
1899     return true;
1900   default:
1901     return false;
1902   }
1903 }
1904 
1905 bool AMDGPUOperand::isInlineValue() const {
1906   return isRegKind() && ::isInlineValue(getReg());
1907 }
1908 
1909 //===----------------------------------------------------------------------===//
1910 // AsmParser
1911 //===----------------------------------------------------------------------===//
1912 
1913 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1914   if (Is == IS_VGPR) {
1915     switch (RegWidth) {
1916       default: return -1;
1917       case 1: return AMDGPU::VGPR_32RegClassID;
1918       case 2: return AMDGPU::VReg_64RegClassID;
1919       case 3: return AMDGPU::VReg_96RegClassID;
1920       case 4: return AMDGPU::VReg_128RegClassID;
1921       case 5: return AMDGPU::VReg_160RegClassID;
1922       case 6: return AMDGPU::VReg_192RegClassID;
1923       case 8: return AMDGPU::VReg_256RegClassID;
1924       case 16: return AMDGPU::VReg_512RegClassID;
1925       case 32: return AMDGPU::VReg_1024RegClassID;
1926     }
1927   } else if (Is == IS_TTMP) {
1928     switch (RegWidth) {
1929       default: return -1;
1930       case 1: return AMDGPU::TTMP_32RegClassID;
1931       case 2: return AMDGPU::TTMP_64RegClassID;
1932       case 4: return AMDGPU::TTMP_128RegClassID;
1933       case 8: return AMDGPU::TTMP_256RegClassID;
1934       case 16: return AMDGPU::TTMP_512RegClassID;
1935     }
1936   } else if (Is == IS_SGPR) {
1937     switch (RegWidth) {
1938       default: return -1;
1939       case 1: return AMDGPU::SGPR_32RegClassID;
1940       case 2: return AMDGPU::SGPR_64RegClassID;
1941       case 3: return AMDGPU::SGPR_96RegClassID;
1942       case 4: return AMDGPU::SGPR_128RegClassID;
1943       case 5: return AMDGPU::SGPR_160RegClassID;
1944       case 6: return AMDGPU::SGPR_192RegClassID;
1945       case 8: return AMDGPU::SGPR_256RegClassID;
1946       case 16: return AMDGPU::SGPR_512RegClassID;
1947     }
1948   } else if (Is == IS_AGPR) {
1949     switch (RegWidth) {
1950       default: return -1;
1951       case 1: return AMDGPU::AGPR_32RegClassID;
1952       case 2: return AMDGPU::AReg_64RegClassID;
1953       case 3: return AMDGPU::AReg_96RegClassID;
1954       case 4: return AMDGPU::AReg_128RegClassID;
1955       case 5: return AMDGPU::AReg_160RegClassID;
1956       case 6: return AMDGPU::AReg_192RegClassID;
1957       case 8: return AMDGPU::AReg_256RegClassID;
1958       case 16: return AMDGPU::AReg_512RegClassID;
1959       case 32: return AMDGPU::AReg_1024RegClassID;
1960     }
1961   }
1962   return -1;
1963 }
1964 
1965 static unsigned getSpecialRegForName(StringRef RegName) {
1966   return StringSwitch<unsigned>(RegName)
1967     .Case("exec", AMDGPU::EXEC)
1968     .Case("vcc", AMDGPU::VCC)
1969     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1970     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1971     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1972     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1973     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1974     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1975     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1976     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1977     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1978     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1979     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1980     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1981     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1982     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1983     .Case("m0", AMDGPU::M0)
1984     .Case("vccz", AMDGPU::SRC_VCCZ)
1985     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1986     .Case("execz", AMDGPU::SRC_EXECZ)
1987     .Case("src_execz", AMDGPU::SRC_EXECZ)
1988     .Case("scc", AMDGPU::SRC_SCC)
1989     .Case("src_scc", AMDGPU::SRC_SCC)
1990     .Case("tba", AMDGPU::TBA)
1991     .Case("tma", AMDGPU::TMA)
1992     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1993     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1994     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1995     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1996     .Case("vcc_lo", AMDGPU::VCC_LO)
1997     .Case("vcc_hi", AMDGPU::VCC_HI)
1998     .Case("exec_lo", AMDGPU::EXEC_LO)
1999     .Case("exec_hi", AMDGPU::EXEC_HI)
2000     .Case("tma_lo", AMDGPU::TMA_LO)
2001     .Case("tma_hi", AMDGPU::TMA_HI)
2002     .Case("tba_lo", AMDGPU::TBA_LO)
2003     .Case("tba_hi", AMDGPU::TBA_HI)
2004     .Case("pc", AMDGPU::PC_REG)
2005     .Case("null", AMDGPU::SGPR_NULL)
2006     .Default(AMDGPU::NoRegister);
2007 }
2008 
2009 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2010                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2011   auto R = parseRegister();
2012   if (!R) return true;
2013   assert(R->isReg());
2014   RegNo = R->getReg();
2015   StartLoc = R->getStartLoc();
2016   EndLoc = R->getEndLoc();
2017   return false;
2018 }
2019 
2020 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2021                                     SMLoc &EndLoc) {
2022   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2023 }
2024 
2025 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2026                                                        SMLoc &StartLoc,
2027                                                        SMLoc &EndLoc) {
2028   bool Result =
2029       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2030   bool PendingErrors = getParser().hasPendingError();
2031   getParser().clearPendingErrors();
2032   if (PendingErrors)
2033     return MatchOperand_ParseFail;
2034   if (Result)
2035     return MatchOperand_NoMatch;
2036   return MatchOperand_Success;
2037 }
2038 
2039 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2040                                             RegisterKind RegKind, unsigned Reg1) {
2041   switch (RegKind) {
2042   case IS_SPECIAL:
2043     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2044       Reg = AMDGPU::EXEC;
2045       RegWidth = 2;
2046       return true;
2047     }
2048     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2049       Reg = AMDGPU::FLAT_SCR;
2050       RegWidth = 2;
2051       return true;
2052     }
2053     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2054       Reg = AMDGPU::XNACK_MASK;
2055       RegWidth = 2;
2056       return true;
2057     }
2058     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2059       Reg = AMDGPU::VCC;
2060       RegWidth = 2;
2061       return true;
2062     }
2063     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2064       Reg = AMDGPU::TBA;
2065       RegWidth = 2;
2066       return true;
2067     }
2068     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2069       Reg = AMDGPU::TMA;
2070       RegWidth = 2;
2071       return true;
2072     }
2073     return false;
2074   case IS_VGPR:
2075   case IS_SGPR:
2076   case IS_AGPR:
2077   case IS_TTMP:
2078     if (Reg1 != Reg + RegWidth) {
2079       return false;
2080     }
2081     RegWidth++;
2082     return true;
2083   default:
2084     llvm_unreachable("unexpected register kind");
2085   }
2086 }
2087 
2088 struct RegInfo {
2089   StringLiteral Name;
2090   RegisterKind Kind;
2091 };
2092 
2093 static constexpr RegInfo RegularRegisters[] = {
2094   {{"v"},    IS_VGPR},
2095   {{"s"},    IS_SGPR},
2096   {{"ttmp"}, IS_TTMP},
2097   {{"acc"},  IS_AGPR},
2098   {{"a"},    IS_AGPR},
2099 };
2100 
2101 static bool isRegularReg(RegisterKind Kind) {
2102   return Kind == IS_VGPR ||
2103          Kind == IS_SGPR ||
2104          Kind == IS_TTMP ||
2105          Kind == IS_AGPR;
2106 }
2107 
2108 static const RegInfo* getRegularRegInfo(StringRef Str) {
2109   for (const RegInfo &Reg : RegularRegisters)
2110     if (Str.startswith(Reg.Name))
2111       return &Reg;
2112   return nullptr;
2113 }
2114 
2115 static bool getRegNum(StringRef Str, unsigned& Num) {
2116   return !Str.getAsInteger(10, Num);
2117 }
2118 
2119 bool
2120 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2121                             const AsmToken &NextToken) const {
2122 
2123   // A list of consecutive registers: [s0,s1,s2,s3]
2124   if (Token.is(AsmToken::LBrac))
2125     return true;
2126 
2127   if (!Token.is(AsmToken::Identifier))
2128     return false;
2129 
2130   // A single register like s0 or a range of registers like s[0:1]
2131 
2132   StringRef Str = Token.getString();
2133   const RegInfo *Reg = getRegularRegInfo(Str);
2134   if (Reg) {
2135     StringRef RegName = Reg->Name;
2136     StringRef RegSuffix = Str.substr(RegName.size());
2137     if (!RegSuffix.empty()) {
2138       unsigned Num;
2139       // A single register with an index: rXX
2140       if (getRegNum(RegSuffix, Num))
2141         return true;
2142     } else {
2143       // A range of registers: r[XX:YY].
2144       if (NextToken.is(AsmToken::LBrac))
2145         return true;
2146     }
2147   }
2148 
2149   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2150 }
2151 
2152 bool
2153 AMDGPUAsmParser::isRegister()
2154 {
2155   return isRegister(getToken(), peekToken());
2156 }
2157 
2158 unsigned
2159 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2160                                unsigned RegNum,
2161                                unsigned RegWidth) {
2162 
2163   assert(isRegularReg(RegKind));
2164 
2165   unsigned AlignSize = 1;
2166   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2167     // SGPR and TTMP registers must be aligned.
2168     // Max required alignment is 4 dwords.
2169     AlignSize = std::min(RegWidth, 4u);
2170   }
2171 
2172   if (RegNum % AlignSize != 0)
2173     return AMDGPU::NoRegister;
2174 
2175   unsigned RegIdx = RegNum / AlignSize;
2176   int RCID = getRegClass(RegKind, RegWidth);
2177   if (RCID == -1)
2178     return AMDGPU::NoRegister;
2179 
2180   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2181   const MCRegisterClass RC = TRI->getRegClass(RCID);
2182   if (RegIdx >= RC.getNumRegs())
2183     return AMDGPU::NoRegister;
2184 
2185   return RC.getRegister(RegIdx);
2186 }
2187 
2188 bool
2189 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2190   int64_t RegLo, RegHi;
2191   if (!trySkipToken(AsmToken::LBrac))
2192     return false;
2193 
2194   if (!parseExpr(RegLo))
2195     return false;
2196 
2197   if (trySkipToken(AsmToken::Colon)) {
2198     if (!parseExpr(RegHi))
2199       return false;
2200   } else {
2201     RegHi = RegLo;
2202   }
2203 
2204   if (!trySkipToken(AsmToken::RBrac))
2205     return false;
2206 
2207   if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi)
2208     return false;
2209 
2210   Num = static_cast<unsigned>(RegLo);
2211   Width = (RegHi - RegLo) + 1;
2212   return true;
2213 }
2214 
2215 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2216                                           unsigned &RegNum, unsigned &RegWidth,
2217                                           SmallVectorImpl<AsmToken> &Tokens) {
2218   assert(isToken(AsmToken::Identifier));
2219   unsigned Reg = getSpecialRegForName(getTokenStr());
2220   if (Reg) {
2221     RegNum = 0;
2222     RegWidth = 1;
2223     RegKind = IS_SPECIAL;
2224     Tokens.push_back(getToken());
2225     lex(); // skip register name
2226   }
2227   return Reg;
2228 }
2229 
2230 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2231                                           unsigned &RegNum, unsigned &RegWidth,
2232                                           SmallVectorImpl<AsmToken> &Tokens) {
2233   assert(isToken(AsmToken::Identifier));
2234   StringRef RegName = getTokenStr();
2235 
2236   const RegInfo *RI = getRegularRegInfo(RegName);
2237   if (!RI)
2238     return AMDGPU::NoRegister;
2239   Tokens.push_back(getToken());
2240   lex(); // skip register name
2241 
2242   RegKind = RI->Kind;
2243   StringRef RegSuffix = RegName.substr(RI->Name.size());
2244   if (!RegSuffix.empty()) {
2245     // Single 32-bit register: vXX.
2246     if (!getRegNum(RegSuffix, RegNum))
2247       return AMDGPU::NoRegister;
2248     RegWidth = 1;
2249   } else {
2250     // Range of registers: v[XX:YY]. ":YY" is optional.
2251     if (!ParseRegRange(RegNum, RegWidth))
2252       return AMDGPU::NoRegister;
2253   }
2254 
2255   return getRegularReg(RegKind, RegNum, RegWidth);
2256 }
2257 
2258 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2259                                        unsigned &RegWidth,
2260                                        SmallVectorImpl<AsmToken> &Tokens) {
2261   unsigned Reg = AMDGPU::NoRegister;
2262 
2263   if (!trySkipToken(AsmToken::LBrac))
2264     return AMDGPU::NoRegister;
2265 
2266   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2267 
2268   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2269     return AMDGPU::NoRegister;
2270   if (RegWidth != 1)
2271     return AMDGPU::NoRegister;
2272 
2273   for (; trySkipToken(AsmToken::Comma); ) {
2274     RegisterKind NextRegKind;
2275     unsigned NextReg, NextRegNum, NextRegWidth;
2276 
2277     if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth,
2278                              Tokens))
2279       return AMDGPU::NoRegister;
2280     if (NextRegWidth != 1)
2281       return AMDGPU::NoRegister;
2282     if (NextRegKind != RegKind)
2283       return AMDGPU::NoRegister;
2284     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg))
2285       return AMDGPU::NoRegister;
2286   }
2287 
2288   if (!trySkipToken(AsmToken::RBrac))
2289     return AMDGPU::NoRegister;
2290 
2291   if (isRegularReg(RegKind))
2292     Reg = getRegularReg(RegKind, RegNum, RegWidth);
2293 
2294   return Reg;
2295 }
2296 
2297 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2298                                           unsigned &RegNum, unsigned &RegWidth,
2299                                           SmallVectorImpl<AsmToken> &Tokens) {
2300   Reg = AMDGPU::NoRegister;
2301 
2302   if (isToken(AsmToken::Identifier)) {
2303     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2304     if (Reg == AMDGPU::NoRegister)
2305       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2306   } else {
2307     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2308   }
2309 
2310   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2311   return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg);
2312 }
2313 
2314 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2315                                           unsigned &RegNum, unsigned &RegWidth,
2316                                           bool RestoreOnFailure) {
2317   Reg = AMDGPU::NoRegister;
2318 
2319   SmallVector<AsmToken, 1> Tokens;
2320   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2321     if (RestoreOnFailure) {
2322       while (!Tokens.empty()) {
2323         getLexer().UnLex(Tokens.pop_back_val());
2324       }
2325     }
2326     return true;
2327   }
2328   return false;
2329 }
2330 
2331 Optional<StringRef>
2332 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2333   switch (RegKind) {
2334   case IS_VGPR:
2335     return StringRef(".amdgcn.next_free_vgpr");
2336   case IS_SGPR:
2337     return StringRef(".amdgcn.next_free_sgpr");
2338   default:
2339     return None;
2340   }
2341 }
2342 
2343 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2344   auto SymbolName = getGprCountSymbolName(RegKind);
2345   assert(SymbolName && "initializing invalid register kind");
2346   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2347   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2348 }
2349 
2350 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2351                                             unsigned DwordRegIndex,
2352                                             unsigned RegWidth) {
2353   // Symbols are only defined for GCN targets
2354   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2355     return true;
2356 
2357   auto SymbolName = getGprCountSymbolName(RegKind);
2358   if (!SymbolName)
2359     return true;
2360   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2361 
2362   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2363   int64_t OldCount;
2364 
2365   if (!Sym->isVariable())
2366     return !Error(getParser().getTok().getLoc(),
2367                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2368   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2369     return !Error(
2370         getParser().getTok().getLoc(),
2371         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2372 
2373   if (OldCount <= NewMax)
2374     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2375 
2376   return true;
2377 }
2378 
2379 std::unique_ptr<AMDGPUOperand>
2380 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2381   const auto &Tok = Parser.getTok();
2382   SMLoc StartLoc = Tok.getLoc();
2383   SMLoc EndLoc = Tok.getEndLoc();
2384   RegisterKind RegKind;
2385   unsigned Reg, RegNum, RegWidth;
2386 
2387   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2388     //FIXME: improve error messages (bug 41303).
2389     Error(StartLoc, "not a valid operand.");
2390     return nullptr;
2391   }
2392   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2393     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2394       return nullptr;
2395   } else
2396     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2397   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2398 }
2399 
2400 OperandMatchResultTy
2401 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2402   // TODO: add syntactic sugar for 1/(2*PI)
2403 
2404   assert(!isRegister());
2405   assert(!isModifier());
2406 
2407   const auto& Tok = getToken();
2408   const auto& NextTok = peekToken();
2409   bool IsReal = Tok.is(AsmToken::Real);
2410   SMLoc S = getLoc();
2411   bool Negate = false;
2412 
2413   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2414     lex();
2415     IsReal = true;
2416     Negate = true;
2417   }
2418 
2419   if (IsReal) {
2420     // Floating-point expressions are not supported.
2421     // Can only allow floating-point literals with an
2422     // optional sign.
2423 
2424     StringRef Num = getTokenStr();
2425     lex();
2426 
2427     APFloat RealVal(APFloat::IEEEdouble());
2428     auto roundMode = APFloat::rmNearestTiesToEven;
2429     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2430       return MatchOperand_ParseFail;
2431     }
2432     if (Negate)
2433       RealVal.changeSign();
2434 
2435     Operands.push_back(
2436       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2437                                AMDGPUOperand::ImmTyNone, true));
2438 
2439     return MatchOperand_Success;
2440 
2441   } else {
2442     int64_t IntVal;
2443     const MCExpr *Expr;
2444     SMLoc S = getLoc();
2445 
2446     if (HasSP3AbsModifier) {
2447       // This is a workaround for handling expressions
2448       // as arguments of SP3 'abs' modifier, for example:
2449       //     |1.0|
2450       //     |-1|
2451       //     |1+x|
2452       // This syntax is not compatible with syntax of standard
2453       // MC expressions (due to the trailing '|').
2454       SMLoc EndLoc;
2455       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2456         return MatchOperand_ParseFail;
2457     } else {
2458       if (Parser.parseExpression(Expr))
2459         return MatchOperand_ParseFail;
2460     }
2461 
2462     if (Expr->evaluateAsAbsolute(IntVal)) {
2463       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2464     } else {
2465       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2466     }
2467 
2468     return MatchOperand_Success;
2469   }
2470 
2471   return MatchOperand_NoMatch;
2472 }
2473 
2474 OperandMatchResultTy
2475 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2476   if (!isRegister())
2477     return MatchOperand_NoMatch;
2478 
2479   if (auto R = parseRegister()) {
2480     assert(R->isReg());
2481     Operands.push_back(std::move(R));
2482     return MatchOperand_Success;
2483   }
2484   return MatchOperand_ParseFail;
2485 }
2486 
2487 OperandMatchResultTy
2488 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2489   auto res = parseReg(Operands);
2490   if (res != MatchOperand_NoMatch) {
2491     return res;
2492   } else if (isModifier()) {
2493     return MatchOperand_NoMatch;
2494   } else {
2495     return parseImm(Operands, HasSP3AbsMod);
2496   }
2497 }
2498 
2499 bool
2500 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2501   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2502     const auto &str = Token.getString();
2503     return str == "abs" || str == "neg" || str == "sext";
2504   }
2505   return false;
2506 }
2507 
2508 bool
2509 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2510   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2511 }
2512 
2513 bool
2514 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2515   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2516 }
2517 
2518 bool
2519 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2520   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2521 }
2522 
2523 // Check if this is an operand modifier or an opcode modifier
2524 // which may look like an expression but it is not. We should
2525 // avoid parsing these modifiers as expressions. Currently
2526 // recognized sequences are:
2527 //   |...|
2528 //   abs(...)
2529 //   neg(...)
2530 //   sext(...)
2531 //   -reg
2532 //   -|...|
2533 //   -abs(...)
2534 //   name:...
2535 // Note that simple opcode modifiers like 'gds' may be parsed as
2536 // expressions; this is a special case. See getExpressionAsToken.
2537 //
2538 bool
2539 AMDGPUAsmParser::isModifier() {
2540 
2541   AsmToken Tok = getToken();
2542   AsmToken NextToken[2];
2543   peekTokens(NextToken);
2544 
2545   return isOperandModifier(Tok, NextToken[0]) ||
2546          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2547          isOpcodeModifierWithVal(Tok, NextToken[0]);
2548 }
2549 
2550 // Check if the current token is an SP3 'neg' modifier.
2551 // Currently this modifier is allowed in the following context:
2552 //
2553 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2554 // 2. Before an 'abs' modifier: -abs(...)
2555 // 3. Before an SP3 'abs' modifier: -|...|
2556 //
2557 // In all other cases "-" is handled as a part
2558 // of an expression that follows the sign.
2559 //
2560 // Note: When "-" is followed by an integer literal,
2561 // this is interpreted as integer negation rather
2562 // than a floating-point NEG modifier applied to N.
2563 // Beside being contr-intuitive, such use of floating-point
2564 // NEG modifier would have resulted in different meaning
2565 // of integer literals used with VOP1/2/C and VOP3,
2566 // for example:
2567 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2568 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2569 // Negative fp literals with preceding "-" are
2570 // handled likewise for unifomtity
2571 //
2572 bool
2573 AMDGPUAsmParser::parseSP3NegModifier() {
2574 
2575   AsmToken NextToken[2];
2576   peekTokens(NextToken);
2577 
2578   if (isToken(AsmToken::Minus) &&
2579       (isRegister(NextToken[0], NextToken[1]) ||
2580        NextToken[0].is(AsmToken::Pipe) ||
2581        isId(NextToken[0], "abs"))) {
2582     lex();
2583     return true;
2584   }
2585 
2586   return false;
2587 }
2588 
2589 OperandMatchResultTy
2590 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2591                                               bool AllowImm) {
2592   bool Neg, SP3Neg;
2593   bool Abs, SP3Abs;
2594   SMLoc Loc;
2595 
2596   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2597   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2598     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2599     return MatchOperand_ParseFail;
2600   }
2601 
2602   SP3Neg = parseSP3NegModifier();
2603 
2604   Loc = getLoc();
2605   Neg = trySkipId("neg");
2606   if (Neg && SP3Neg) {
2607     Error(Loc, "expected register or immediate");
2608     return MatchOperand_ParseFail;
2609   }
2610   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2611     return MatchOperand_ParseFail;
2612 
2613   Abs = trySkipId("abs");
2614   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2615     return MatchOperand_ParseFail;
2616 
2617   Loc = getLoc();
2618   SP3Abs = trySkipToken(AsmToken::Pipe);
2619   if (Abs && SP3Abs) {
2620     Error(Loc, "expected register or immediate");
2621     return MatchOperand_ParseFail;
2622   }
2623 
2624   OperandMatchResultTy Res;
2625   if (AllowImm) {
2626     Res = parseRegOrImm(Operands, SP3Abs);
2627   } else {
2628     Res = parseReg(Operands);
2629   }
2630   if (Res != MatchOperand_Success) {
2631     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2632   }
2633 
2634   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2635     return MatchOperand_ParseFail;
2636   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2637     return MatchOperand_ParseFail;
2638   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2639     return MatchOperand_ParseFail;
2640 
2641   AMDGPUOperand::Modifiers Mods;
2642   Mods.Abs = Abs || SP3Abs;
2643   Mods.Neg = Neg || SP3Neg;
2644 
2645   if (Mods.hasFPModifiers()) {
2646     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2647     if (Op.isExpr()) {
2648       Error(Op.getStartLoc(), "expected an absolute expression");
2649       return MatchOperand_ParseFail;
2650     }
2651     Op.setModifiers(Mods);
2652   }
2653   return MatchOperand_Success;
2654 }
2655 
2656 OperandMatchResultTy
2657 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2658                                                bool AllowImm) {
2659   bool Sext = trySkipId("sext");
2660   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2661     return MatchOperand_ParseFail;
2662 
2663   OperandMatchResultTy Res;
2664   if (AllowImm) {
2665     Res = parseRegOrImm(Operands);
2666   } else {
2667     Res = parseReg(Operands);
2668   }
2669   if (Res != MatchOperand_Success) {
2670     return Sext? MatchOperand_ParseFail : Res;
2671   }
2672 
2673   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2674     return MatchOperand_ParseFail;
2675 
2676   AMDGPUOperand::Modifiers Mods;
2677   Mods.Sext = Sext;
2678 
2679   if (Mods.hasIntModifiers()) {
2680     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2681     if (Op.isExpr()) {
2682       Error(Op.getStartLoc(), "expected an absolute expression");
2683       return MatchOperand_ParseFail;
2684     }
2685     Op.setModifiers(Mods);
2686   }
2687 
2688   return MatchOperand_Success;
2689 }
2690 
2691 OperandMatchResultTy
2692 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2693   return parseRegOrImmWithFPInputMods(Operands, false);
2694 }
2695 
2696 OperandMatchResultTy
2697 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2698   return parseRegOrImmWithIntInputMods(Operands, false);
2699 }
2700 
2701 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2702   auto Loc = getLoc();
2703   if (trySkipId("off")) {
2704     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2705                                                 AMDGPUOperand::ImmTyOff, false));
2706     return MatchOperand_Success;
2707   }
2708 
2709   if (!isRegister())
2710     return MatchOperand_NoMatch;
2711 
2712   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2713   if (Reg) {
2714     Operands.push_back(std::move(Reg));
2715     return MatchOperand_Success;
2716   }
2717 
2718   return MatchOperand_ParseFail;
2719 
2720 }
2721 
2722 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2723   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2724 
2725   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2726       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2727       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2728       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2729     return Match_InvalidOperand;
2730 
2731   if ((TSFlags & SIInstrFlags::VOP3) &&
2732       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2733       getForcedEncodingSize() != 64)
2734     return Match_PreferE32;
2735 
2736   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2737       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2738     // v_mac_f32/16 allow only dst_sel == DWORD;
2739     auto OpNum =
2740         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2741     const auto &Op = Inst.getOperand(OpNum);
2742     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2743       return Match_InvalidOperand;
2744     }
2745   }
2746 
2747   return Match_Success;
2748 }
2749 
2750 // What asm variants we should check
2751 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2752   if (getForcedEncodingSize() == 32) {
2753     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2754     return makeArrayRef(Variants);
2755   }
2756 
2757   if (isForcedVOP3()) {
2758     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2759     return makeArrayRef(Variants);
2760   }
2761 
2762   if (isForcedSDWA()) {
2763     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2764                                         AMDGPUAsmVariants::SDWA9};
2765     return makeArrayRef(Variants);
2766   }
2767 
2768   if (isForcedDPP()) {
2769     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2770     return makeArrayRef(Variants);
2771   }
2772 
2773   static const unsigned Variants[] = {
2774     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2775     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2776   };
2777 
2778   return makeArrayRef(Variants);
2779 }
2780 
2781 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2782   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2783   const unsigned Num = Desc.getNumImplicitUses();
2784   for (unsigned i = 0; i < Num; ++i) {
2785     unsigned Reg = Desc.ImplicitUses[i];
2786     switch (Reg) {
2787     case AMDGPU::FLAT_SCR:
2788     case AMDGPU::VCC:
2789     case AMDGPU::VCC_LO:
2790     case AMDGPU::VCC_HI:
2791     case AMDGPU::M0:
2792       return Reg;
2793     default:
2794       break;
2795     }
2796   }
2797   return AMDGPU::NoRegister;
2798 }
2799 
2800 // NB: This code is correct only when used to check constant
2801 // bus limitations because GFX7 support no f16 inline constants.
2802 // Note that there are no cases when a GFX7 opcode violates
2803 // constant bus limitations due to the use of an f16 constant.
2804 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2805                                        unsigned OpIdx) const {
2806   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2807 
2808   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2809     return false;
2810   }
2811 
2812   const MCOperand &MO = Inst.getOperand(OpIdx);
2813 
2814   int64_t Val = MO.getImm();
2815   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2816 
2817   switch (OpSize) { // expected operand size
2818   case 8:
2819     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2820   case 4:
2821     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2822   case 2: {
2823     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2824     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2825         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2826         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2827         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2828         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2829         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2830       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2831     } else {
2832       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2833     }
2834   }
2835   default:
2836     llvm_unreachable("invalid operand size");
2837   }
2838 }
2839 
2840 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2841   if (!isGFX10())
2842     return 1;
2843 
2844   switch (Opcode) {
2845   // 64-bit shift instructions can use only one scalar value input
2846   case AMDGPU::V_LSHLREV_B64:
2847   case AMDGPU::V_LSHLREV_B64_gfx10:
2848   case AMDGPU::V_LSHL_B64:
2849   case AMDGPU::V_LSHRREV_B64:
2850   case AMDGPU::V_LSHRREV_B64_gfx10:
2851   case AMDGPU::V_LSHR_B64:
2852   case AMDGPU::V_ASHRREV_I64:
2853   case AMDGPU::V_ASHRREV_I64_gfx10:
2854   case AMDGPU::V_ASHR_I64:
2855     return 1;
2856   default:
2857     return 2;
2858   }
2859 }
2860 
2861 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2862   const MCOperand &MO = Inst.getOperand(OpIdx);
2863   if (MO.isImm()) {
2864     return !isInlineConstant(Inst, OpIdx);
2865   } else if (MO.isReg()) {
2866     auto Reg = MO.getReg();
2867     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2868     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2869   } else {
2870     return true;
2871   }
2872 }
2873 
2874 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2875   const unsigned Opcode = Inst.getOpcode();
2876   const MCInstrDesc &Desc = MII.get(Opcode);
2877   unsigned ConstantBusUseCount = 0;
2878   unsigned NumLiterals = 0;
2879   unsigned LiteralSize;
2880 
2881   if (Desc.TSFlags &
2882       (SIInstrFlags::VOPC |
2883        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2884        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2885        SIInstrFlags::SDWA)) {
2886     // Check special imm operands (used by madmk, etc)
2887     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2888       ++ConstantBusUseCount;
2889     }
2890 
2891     SmallDenseSet<unsigned> SGPRsUsed;
2892     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2893     if (SGPRUsed != AMDGPU::NoRegister) {
2894       SGPRsUsed.insert(SGPRUsed);
2895       ++ConstantBusUseCount;
2896     }
2897 
2898     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2899     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2900     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2901 
2902     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2903 
2904     for (int OpIdx : OpIndices) {
2905       if (OpIdx == -1) break;
2906 
2907       const MCOperand &MO = Inst.getOperand(OpIdx);
2908       if (usesConstantBus(Inst, OpIdx)) {
2909         if (MO.isReg()) {
2910           const unsigned Reg = mc2PseudoReg(MO.getReg());
2911           // Pairs of registers with a partial intersections like these
2912           //   s0, s[0:1]
2913           //   flat_scratch_lo, flat_scratch
2914           //   flat_scratch_lo, flat_scratch_hi
2915           // are theoretically valid but they are disabled anyway.
2916           // Note that this code mimics SIInstrInfo::verifyInstruction
2917           if (!SGPRsUsed.count(Reg)) {
2918             SGPRsUsed.insert(Reg);
2919             ++ConstantBusUseCount;
2920           }
2921         } else { // Expression or a literal
2922 
2923           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2924             continue; // special operand like VINTERP attr_chan
2925 
2926           // An instruction may use only one literal.
2927           // This has been validated on the previous step.
2928           // See validateVOP3Literal.
2929           // This literal may be used as more than one operand.
2930           // If all these operands are of the same size,
2931           // this literal counts as one scalar value.
2932           // Otherwise it counts as 2 scalar values.
2933           // See "GFX10 Shader Programming", section 3.6.2.3.
2934 
2935           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2936           if (Size < 4) Size = 4;
2937 
2938           if (NumLiterals == 0) {
2939             NumLiterals = 1;
2940             LiteralSize = Size;
2941           } else if (LiteralSize != Size) {
2942             NumLiterals = 2;
2943           }
2944         }
2945       }
2946     }
2947   }
2948   ConstantBusUseCount += NumLiterals;
2949 
2950   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2951 }
2952 
2953 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2954   const unsigned Opcode = Inst.getOpcode();
2955   const MCInstrDesc &Desc = MII.get(Opcode);
2956 
2957   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2958   if (DstIdx == -1 ||
2959       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2960     return true;
2961   }
2962 
2963   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2964 
2965   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2966   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2967   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2968 
2969   assert(DstIdx != -1);
2970   const MCOperand &Dst = Inst.getOperand(DstIdx);
2971   assert(Dst.isReg());
2972   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2973 
2974   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2975 
2976   for (int SrcIdx : SrcIndices) {
2977     if (SrcIdx == -1) break;
2978     const MCOperand &Src = Inst.getOperand(SrcIdx);
2979     if (Src.isReg()) {
2980       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2981       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2982         return false;
2983       }
2984     }
2985   }
2986 
2987   return true;
2988 }
2989 
2990 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2991 
2992   const unsigned Opc = Inst.getOpcode();
2993   const MCInstrDesc &Desc = MII.get(Opc);
2994 
2995   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2996     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2997     assert(ClampIdx != -1);
2998     return Inst.getOperand(ClampIdx).getImm() == 0;
2999   }
3000 
3001   return true;
3002 }
3003 
3004 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3005 
3006   const unsigned Opc = Inst.getOpcode();
3007   const MCInstrDesc &Desc = MII.get(Opc);
3008 
3009   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3010     return true;
3011 
3012   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3013   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3014   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3015 
3016   assert(VDataIdx != -1);
3017   assert(DMaskIdx != -1);
3018   assert(TFEIdx != -1);
3019 
3020   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3021   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3022   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3023   if (DMask == 0)
3024     DMask = 1;
3025 
3026   unsigned DataSize =
3027     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3028   if (hasPackedD16()) {
3029     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3030     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3031       DataSize = (DataSize + 1) / 2;
3032   }
3033 
3034   return (VDataSize / 4) == DataSize + TFESize;
3035 }
3036 
3037 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3038   const unsigned Opc = Inst.getOpcode();
3039   const MCInstrDesc &Desc = MII.get(Opc);
3040 
3041   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
3042     return true;
3043 
3044   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3045   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3046       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3047   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3048   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3049   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3050 
3051   assert(VAddr0Idx != -1);
3052   assert(SrsrcIdx != -1);
3053   assert(DimIdx != -1);
3054   assert(SrsrcIdx > VAddr0Idx);
3055 
3056   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3057   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3058   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3059   unsigned VAddrSize =
3060       IsNSA ? SrsrcIdx - VAddr0Idx
3061             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3062 
3063   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3064                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3065                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3066                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3067   if (!IsNSA) {
3068     if (AddrSize > 8)
3069       AddrSize = 16;
3070     else if (AddrSize > 4)
3071       AddrSize = 8;
3072   }
3073 
3074   return VAddrSize == AddrSize;
3075 }
3076 
3077 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3078 
3079   const unsigned Opc = Inst.getOpcode();
3080   const MCInstrDesc &Desc = MII.get(Opc);
3081 
3082   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3083     return true;
3084   if (!Desc.mayLoad() || !Desc.mayStore())
3085     return true; // Not atomic
3086 
3087   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3088   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3089 
3090   // This is an incomplete check because image_atomic_cmpswap
3091   // may only use 0x3 and 0xf while other atomic operations
3092   // may use 0x1 and 0x3. However these limitations are
3093   // verified when we check that dmask matches dst size.
3094   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3095 }
3096 
3097 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3098 
3099   const unsigned Opc = Inst.getOpcode();
3100   const MCInstrDesc &Desc = MII.get(Opc);
3101 
3102   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3103     return true;
3104 
3105   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3106   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3107 
3108   // GATHER4 instructions use dmask in a different fashion compared to
3109   // other MIMG instructions. The only useful DMASK values are
3110   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3111   // (red,red,red,red) etc.) The ISA document doesn't mention
3112   // this.
3113   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3114 }
3115 
3116 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3117 {
3118   switch (Opcode) {
3119   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3120   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3121   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3122     return true;
3123   default:
3124     return false;
3125   }
3126 }
3127 
3128 // movrels* opcodes should only allow VGPRS as src0.
3129 // This is specified in .td description for vop1/vop3,
3130 // but sdwa is handled differently. See isSDWAOperand.
3131 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3132 
3133   const unsigned Opc = Inst.getOpcode();
3134   const MCInstrDesc &Desc = MII.get(Opc);
3135 
3136   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3137     return true;
3138 
3139   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3140   assert(Src0Idx != -1);
3141 
3142   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3143   if (!Src0.isReg())
3144     return false;
3145 
3146   auto Reg = Src0.getReg();
3147   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3148   return !isSGPR(mc2PseudoReg(Reg), TRI);
3149 }
3150 
3151 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) {
3152 
3153   const unsigned Opc = Inst.getOpcode();
3154 
3155   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3156     return true;
3157 
3158   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3159   assert(Src0Idx != -1);
3160 
3161   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3162   if (!Src0.isReg())
3163     return true;
3164 
3165   auto Reg = Src0.getReg();
3166   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3167   if (isSGPR(mc2PseudoReg(Reg), TRI)) {
3168     Error(getLoc(), "source operand must be either a VGPR or an inline constant");
3169     return false;
3170   }
3171 
3172   return true;
3173 }
3174 
3175 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3176 
3177   const unsigned Opc = Inst.getOpcode();
3178   const MCInstrDesc &Desc = MII.get(Opc);
3179 
3180   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3181     return true;
3182 
3183   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3184   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3185     if (isCI() || isSI())
3186       return false;
3187   }
3188 
3189   return true;
3190 }
3191 
3192 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3193   const unsigned Opc = Inst.getOpcode();
3194   const MCInstrDesc &Desc = MII.get(Opc);
3195 
3196   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3197     return true;
3198 
3199   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3200   if (DimIdx < 0)
3201     return true;
3202 
3203   long Imm = Inst.getOperand(DimIdx).getImm();
3204   if (Imm < 0 || Imm >= 8)
3205     return false;
3206 
3207   return true;
3208 }
3209 
3210 static bool IsRevOpcode(const unsigned Opcode)
3211 {
3212   switch (Opcode) {
3213   case AMDGPU::V_SUBREV_F32_e32:
3214   case AMDGPU::V_SUBREV_F32_e64:
3215   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3216   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3217   case AMDGPU::V_SUBREV_F32_e32_vi:
3218   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3219   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3220   case AMDGPU::V_SUBREV_F32_e64_vi:
3221 
3222   case AMDGPU::V_SUBREV_I32_e32:
3223   case AMDGPU::V_SUBREV_I32_e64:
3224   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3225   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3226 
3227   case AMDGPU::V_SUBBREV_U32_e32:
3228   case AMDGPU::V_SUBBREV_U32_e64:
3229   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3230   case AMDGPU::V_SUBBREV_U32_e32_vi:
3231   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3232   case AMDGPU::V_SUBBREV_U32_e64_vi:
3233 
3234   case AMDGPU::V_SUBREV_U32_e32:
3235   case AMDGPU::V_SUBREV_U32_e64:
3236   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3237   case AMDGPU::V_SUBREV_U32_e32_vi:
3238   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3239   case AMDGPU::V_SUBREV_U32_e64_vi:
3240 
3241   case AMDGPU::V_SUBREV_F16_e32:
3242   case AMDGPU::V_SUBREV_F16_e64:
3243   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3244   case AMDGPU::V_SUBREV_F16_e32_vi:
3245   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3246   case AMDGPU::V_SUBREV_F16_e64_vi:
3247 
3248   case AMDGPU::V_SUBREV_U16_e32:
3249   case AMDGPU::V_SUBREV_U16_e64:
3250   case AMDGPU::V_SUBREV_U16_e32_vi:
3251   case AMDGPU::V_SUBREV_U16_e64_vi:
3252 
3253   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3254   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3255   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3256 
3257   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3258   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3259 
3260   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3261   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3262 
3263   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3264   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3265 
3266   case AMDGPU::V_LSHRREV_B32_e32:
3267   case AMDGPU::V_LSHRREV_B32_e64:
3268   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3269   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3270   case AMDGPU::V_LSHRREV_B32_e32_vi:
3271   case AMDGPU::V_LSHRREV_B32_e64_vi:
3272   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3273   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3274 
3275   case AMDGPU::V_ASHRREV_I32_e32:
3276   case AMDGPU::V_ASHRREV_I32_e64:
3277   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3278   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3279   case AMDGPU::V_ASHRREV_I32_e32_vi:
3280   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3281   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3282   case AMDGPU::V_ASHRREV_I32_e64_vi:
3283 
3284   case AMDGPU::V_LSHLREV_B32_e32:
3285   case AMDGPU::V_LSHLREV_B32_e64:
3286   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3287   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3288   case AMDGPU::V_LSHLREV_B32_e32_vi:
3289   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3290   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3291   case AMDGPU::V_LSHLREV_B32_e64_vi:
3292 
3293   case AMDGPU::V_LSHLREV_B16_e32:
3294   case AMDGPU::V_LSHLREV_B16_e64:
3295   case AMDGPU::V_LSHLREV_B16_e32_vi:
3296   case AMDGPU::V_LSHLREV_B16_e64_vi:
3297   case AMDGPU::V_LSHLREV_B16_gfx10:
3298 
3299   case AMDGPU::V_LSHRREV_B16_e32:
3300   case AMDGPU::V_LSHRREV_B16_e64:
3301   case AMDGPU::V_LSHRREV_B16_e32_vi:
3302   case AMDGPU::V_LSHRREV_B16_e64_vi:
3303   case AMDGPU::V_LSHRREV_B16_gfx10:
3304 
3305   case AMDGPU::V_ASHRREV_I16_e32:
3306   case AMDGPU::V_ASHRREV_I16_e64:
3307   case AMDGPU::V_ASHRREV_I16_e32_vi:
3308   case AMDGPU::V_ASHRREV_I16_e64_vi:
3309   case AMDGPU::V_ASHRREV_I16_gfx10:
3310 
3311   case AMDGPU::V_LSHLREV_B64:
3312   case AMDGPU::V_LSHLREV_B64_gfx10:
3313   case AMDGPU::V_LSHLREV_B64_vi:
3314 
3315   case AMDGPU::V_LSHRREV_B64:
3316   case AMDGPU::V_LSHRREV_B64_gfx10:
3317   case AMDGPU::V_LSHRREV_B64_vi:
3318 
3319   case AMDGPU::V_ASHRREV_I64:
3320   case AMDGPU::V_ASHRREV_I64_gfx10:
3321   case AMDGPU::V_ASHRREV_I64_vi:
3322 
3323   case AMDGPU::V_PK_LSHLREV_B16:
3324   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3325   case AMDGPU::V_PK_LSHLREV_B16_vi:
3326 
3327   case AMDGPU::V_PK_LSHRREV_B16:
3328   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3329   case AMDGPU::V_PK_LSHRREV_B16_vi:
3330   case AMDGPU::V_PK_ASHRREV_I16:
3331   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3332   case AMDGPU::V_PK_ASHRREV_I16_vi:
3333     return true;
3334   default:
3335     return false;
3336   }
3337 }
3338 
3339 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3340 
3341   using namespace SIInstrFlags;
3342   const unsigned Opcode = Inst.getOpcode();
3343   const MCInstrDesc &Desc = MII.get(Opcode);
3344 
3345   // lds_direct register is defined so that it can be used
3346   // with 9-bit operands only. Ignore encodings which do not accept these.
3347   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3348     return true;
3349 
3350   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3351   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3352   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3353 
3354   const int SrcIndices[] = { Src1Idx, Src2Idx };
3355 
3356   // lds_direct cannot be specified as either src1 or src2.
3357   for (int SrcIdx : SrcIndices) {
3358     if (SrcIdx == -1) break;
3359     const MCOperand &Src = Inst.getOperand(SrcIdx);
3360     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3361       return false;
3362     }
3363   }
3364 
3365   if (Src0Idx == -1)
3366     return true;
3367 
3368   const MCOperand &Src = Inst.getOperand(Src0Idx);
3369   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3370     return true;
3371 
3372   // lds_direct is specified as src0. Check additional limitations.
3373   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3374 }
3375 
3376 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3377   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3378     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3379     if (Op.isFlatOffset())
3380       return Op.getStartLoc();
3381   }
3382   return getLoc();
3383 }
3384 
3385 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3386                                          const OperandVector &Operands) {
3387   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3388   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3389     return true;
3390 
3391   auto Opcode = Inst.getOpcode();
3392   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3393   assert(OpNum != -1);
3394 
3395   const auto &Op = Inst.getOperand(OpNum);
3396   if (!hasFlatOffsets() && Op.getImm() != 0) {
3397     Error(getFlatOffsetLoc(Operands),
3398           "flat offset modifier is not supported on this GPU");
3399     return false;
3400   }
3401 
3402   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3403   // For FLAT segment the offset must be positive;
3404   // MSB is ignored and forced to zero.
3405   unsigned OffsetSize = isGFX9() ? 13 : 12;
3406   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3407     if (!isIntN(OffsetSize, Op.getImm())) {
3408       Error(getFlatOffsetLoc(Operands),
3409             isGFX9() ? "expected a 13-bit signed offset" :
3410                        "expected a 12-bit signed offset");
3411       return false;
3412     }
3413   } else {
3414     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3415       Error(getFlatOffsetLoc(Operands),
3416             isGFX9() ? "expected a 12-bit unsigned offset" :
3417                        "expected an 11-bit unsigned offset");
3418       return false;
3419     }
3420   }
3421 
3422   return true;
3423 }
3424 
3425 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3426   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3427     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3428     if (Op.isSMEMOffset())
3429       return Op.getStartLoc();
3430   }
3431   return getLoc();
3432 }
3433 
3434 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3435                                          const OperandVector &Operands) {
3436   if (isCI() || isSI())
3437     return true;
3438 
3439   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3440   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3441     return true;
3442 
3443   auto Opcode = Inst.getOpcode();
3444   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3445   if (OpNum == -1)
3446     return true;
3447 
3448   const auto &Op = Inst.getOperand(OpNum);
3449   if (!Op.isImm())
3450     return true;
3451 
3452   uint64_t Offset = Op.getImm();
3453   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3454   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3455       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3456     return true;
3457 
3458   Error(getSMEMOffsetLoc(Operands),
3459         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3460                                "expected a 21-bit signed offset");
3461 
3462   return false;
3463 }
3464 
3465 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3466   unsigned Opcode = Inst.getOpcode();
3467   const MCInstrDesc &Desc = MII.get(Opcode);
3468   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3469     return true;
3470 
3471   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3472   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3473 
3474   const int OpIndices[] = { Src0Idx, Src1Idx };
3475 
3476   unsigned NumExprs = 0;
3477   unsigned NumLiterals = 0;
3478   uint32_t LiteralValue;
3479 
3480   for (int OpIdx : OpIndices) {
3481     if (OpIdx == -1) break;
3482 
3483     const MCOperand &MO = Inst.getOperand(OpIdx);
3484     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3485     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3486       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3487         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3488         if (NumLiterals == 0 || LiteralValue != Value) {
3489           LiteralValue = Value;
3490           ++NumLiterals;
3491         }
3492       } else if (MO.isExpr()) {
3493         ++NumExprs;
3494       }
3495     }
3496   }
3497 
3498   return NumLiterals + NumExprs <= 1;
3499 }
3500 
3501 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3502   const unsigned Opc = Inst.getOpcode();
3503   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3504       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3505     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3506     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3507 
3508     if (OpSel & ~3)
3509       return false;
3510   }
3511   return true;
3512 }
3513 
3514 // Check if VCC register matches wavefront size
3515 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3516   auto FB = getFeatureBits();
3517   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3518     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3519 }
3520 
3521 // VOP3 literal is only allowed in GFX10+ and only one can be used
3522 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3523   unsigned Opcode = Inst.getOpcode();
3524   const MCInstrDesc &Desc = MII.get(Opcode);
3525   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3526     return true;
3527 
3528   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3529   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3530   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3531 
3532   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3533 
3534   unsigned NumExprs = 0;
3535   unsigned NumLiterals = 0;
3536   uint32_t LiteralValue;
3537 
3538   for (int OpIdx : OpIndices) {
3539     if (OpIdx == -1) break;
3540 
3541     const MCOperand &MO = Inst.getOperand(OpIdx);
3542     if (!MO.isImm() && !MO.isExpr())
3543       continue;
3544     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3545       continue;
3546 
3547     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3548         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3549       return false;
3550 
3551     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3552       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3553       if (NumLiterals == 0 || LiteralValue != Value) {
3554         LiteralValue = Value;
3555         ++NumLiterals;
3556       }
3557     } else if (MO.isExpr()) {
3558       ++NumExprs;
3559     }
3560   }
3561   NumLiterals += NumExprs;
3562 
3563   return !NumLiterals ||
3564          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3565 }
3566 
3567 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3568                                           const SMLoc &IDLoc,
3569                                           const OperandVector &Operands) {
3570   if (!validateLdsDirect(Inst)) {
3571     Error(IDLoc,
3572       "invalid use of lds_direct");
3573     return false;
3574   }
3575   if (!validateSOPLiteral(Inst)) {
3576     Error(IDLoc,
3577       "only one literal operand is allowed");
3578     return false;
3579   }
3580   if (!validateVOP3Literal(Inst)) {
3581     Error(IDLoc,
3582       "invalid literal operand");
3583     return false;
3584   }
3585   if (!validateConstantBusLimitations(Inst)) {
3586     Error(IDLoc,
3587       "invalid operand (violates constant bus restrictions)");
3588     return false;
3589   }
3590   if (!validateEarlyClobberLimitations(Inst)) {
3591     Error(IDLoc,
3592       "destination must be different than all sources");
3593     return false;
3594   }
3595   if (!validateIntClampSupported(Inst)) {
3596     Error(IDLoc,
3597       "integer clamping is not supported on this GPU");
3598     return false;
3599   }
3600   if (!validateOpSel(Inst)) {
3601     Error(IDLoc,
3602       "invalid op_sel operand");
3603     return false;
3604   }
3605   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3606   if (!validateMIMGD16(Inst)) {
3607     Error(IDLoc,
3608       "d16 modifier is not supported on this GPU");
3609     return false;
3610   }
3611   if (!validateMIMGDim(Inst)) {
3612     Error(IDLoc, "dim modifier is required on this GPU");
3613     return false;
3614   }
3615   if (!validateMIMGDataSize(Inst)) {
3616     Error(IDLoc,
3617       "image data size does not match dmask and tfe");
3618     return false;
3619   }
3620   if (!validateMIMGAddrSize(Inst)) {
3621     Error(IDLoc,
3622       "image address size does not match dim and a16");
3623     return false;
3624   }
3625   if (!validateMIMGAtomicDMask(Inst)) {
3626     Error(IDLoc,
3627       "invalid atomic image dmask");
3628     return false;
3629   }
3630   if (!validateMIMGGatherDMask(Inst)) {
3631     Error(IDLoc,
3632       "invalid image_gather dmask: only one bit must be set");
3633     return false;
3634   }
3635   if (!validateMovrels(Inst)) {
3636     Error(IDLoc, "source operand must be a VGPR");
3637     return false;
3638   }
3639   if (!validateFlatOffset(Inst, Operands)) {
3640     return false;
3641   }
3642   if (!validateSMEMOffset(Inst, Operands)) {
3643     return false;
3644   }
3645   if (!validateMAIAccWrite(Inst)) {
3646     return false;
3647   }
3648 
3649   return true;
3650 }
3651 
3652 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3653                                             const FeatureBitset &FBS,
3654                                             unsigned VariantID = 0);
3655 
3656 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3657                                               OperandVector &Operands,
3658                                               MCStreamer &Out,
3659                                               uint64_t &ErrorInfo,
3660                                               bool MatchingInlineAsm) {
3661   MCInst Inst;
3662   unsigned Result = Match_Success;
3663   for (auto Variant : getMatchedVariants()) {
3664     uint64_t EI;
3665     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3666                                   Variant);
3667     // We order match statuses from least to most specific. We use most specific
3668     // status as resulting
3669     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3670     if ((R == Match_Success) ||
3671         (R == Match_PreferE32) ||
3672         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3673         (R == Match_InvalidOperand && Result != Match_MissingFeature
3674                                    && Result != Match_PreferE32) ||
3675         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3676                                    && Result != Match_MissingFeature
3677                                    && Result != Match_PreferE32)) {
3678       Result = R;
3679       ErrorInfo = EI;
3680     }
3681     if (R == Match_Success)
3682       break;
3683   }
3684 
3685   switch (Result) {
3686   default: break;
3687   case Match_Success:
3688     if (!validateInstruction(Inst, IDLoc, Operands)) {
3689       return true;
3690     }
3691     Inst.setLoc(IDLoc);
3692     Out.emitInstruction(Inst, getSTI());
3693     return false;
3694 
3695   case Match_MissingFeature:
3696     return Error(IDLoc, "instruction not supported on this GPU");
3697 
3698   case Match_MnemonicFail: {
3699     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3700     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3701         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3702     return Error(IDLoc, "invalid instruction" + Suggestion,
3703                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3704   }
3705 
3706   case Match_InvalidOperand: {
3707     SMLoc ErrorLoc = IDLoc;
3708     if (ErrorInfo != ~0ULL) {
3709       if (ErrorInfo >= Operands.size()) {
3710         return Error(IDLoc, "too few operands for instruction");
3711       }
3712       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3713       if (ErrorLoc == SMLoc())
3714         ErrorLoc = IDLoc;
3715     }
3716     return Error(ErrorLoc, "invalid operand for instruction");
3717   }
3718 
3719   case Match_PreferE32:
3720     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3721                         "should be encoded as e32");
3722   }
3723   llvm_unreachable("Implement any new match types added!");
3724 }
3725 
3726 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3727   int64_t Tmp = -1;
3728   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3729     return true;
3730   }
3731   if (getParser().parseAbsoluteExpression(Tmp)) {
3732     return true;
3733   }
3734   Ret = static_cast<uint32_t>(Tmp);
3735   return false;
3736 }
3737 
3738 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3739                                                uint32_t &Minor) {
3740   if (ParseAsAbsoluteExpression(Major))
3741     return TokError("invalid major version");
3742 
3743   if (getLexer().isNot(AsmToken::Comma))
3744     return TokError("minor version number required, comma expected");
3745   Lex();
3746 
3747   if (ParseAsAbsoluteExpression(Minor))
3748     return TokError("invalid minor version");
3749 
3750   return false;
3751 }
3752 
3753 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3754   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3755     return TokError("directive only supported for amdgcn architecture");
3756 
3757   std::string Target;
3758 
3759   SMLoc TargetStart = getTok().getLoc();
3760   if (getParser().parseEscapedString(Target))
3761     return true;
3762   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3763 
3764   std::string ExpectedTarget;
3765   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3766   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3767 
3768   if (Target != ExpectedTargetOS.str())
3769     return getParser().Error(TargetRange.Start, "target must match options",
3770                              TargetRange);
3771 
3772   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3773   return false;
3774 }
3775 
3776 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3777   return getParser().Error(Range.Start, "value out of range", Range);
3778 }
3779 
3780 bool AMDGPUAsmParser::calculateGPRBlocks(
3781     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3782     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3783     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3784     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3785   // TODO(scott.linder): These calculations are duplicated from
3786   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3787   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3788 
3789   unsigned NumVGPRs = NextFreeVGPR;
3790   unsigned NumSGPRs = NextFreeSGPR;
3791 
3792   if (Version.Major >= 10)
3793     NumSGPRs = 0;
3794   else {
3795     unsigned MaxAddressableNumSGPRs =
3796         IsaInfo::getAddressableNumSGPRs(&getSTI());
3797 
3798     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3799         NumSGPRs > MaxAddressableNumSGPRs)
3800       return OutOfRangeError(SGPRRange);
3801 
3802     NumSGPRs +=
3803         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3804 
3805     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3806         NumSGPRs > MaxAddressableNumSGPRs)
3807       return OutOfRangeError(SGPRRange);
3808 
3809     if (Features.test(FeatureSGPRInitBug))
3810       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3811   }
3812 
3813   VGPRBlocks =
3814       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3815   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3816 
3817   return false;
3818 }
3819 
3820 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3821   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3822     return TokError("directive only supported for amdgcn architecture");
3823 
3824   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3825     return TokError("directive only supported for amdhsa OS");
3826 
3827   StringRef KernelName;
3828   if (getParser().parseIdentifier(KernelName))
3829     return true;
3830 
3831   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3832 
3833   StringSet<> Seen;
3834 
3835   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3836 
3837   SMRange VGPRRange;
3838   uint64_t NextFreeVGPR = 0;
3839   SMRange SGPRRange;
3840   uint64_t NextFreeSGPR = 0;
3841   unsigned UserSGPRCount = 0;
3842   bool ReserveVCC = true;
3843   bool ReserveFlatScr = true;
3844   bool ReserveXNACK = hasXNACK();
3845   Optional<bool> EnableWavefrontSize32;
3846 
3847   while (true) {
3848     while (getLexer().is(AsmToken::EndOfStatement))
3849       Lex();
3850 
3851     if (getLexer().isNot(AsmToken::Identifier))
3852       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3853 
3854     StringRef ID = getTok().getIdentifier();
3855     SMRange IDRange = getTok().getLocRange();
3856     Lex();
3857 
3858     if (ID == ".end_amdhsa_kernel")
3859       break;
3860 
3861     if (Seen.find(ID) != Seen.end())
3862       return TokError(".amdhsa_ directives cannot be repeated");
3863     Seen.insert(ID);
3864 
3865     SMLoc ValStart = getTok().getLoc();
3866     int64_t IVal;
3867     if (getParser().parseAbsoluteExpression(IVal))
3868       return true;
3869     SMLoc ValEnd = getTok().getLoc();
3870     SMRange ValRange = SMRange(ValStart, ValEnd);
3871 
3872     if (IVal < 0)
3873       return OutOfRangeError(ValRange);
3874 
3875     uint64_t Val = IVal;
3876 
3877 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3878   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3879     return OutOfRangeError(RANGE);                                             \
3880   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3881 
3882     if (ID == ".amdhsa_group_segment_fixed_size") {
3883       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3884         return OutOfRangeError(ValRange);
3885       KD.group_segment_fixed_size = Val;
3886     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3887       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3888         return OutOfRangeError(ValRange);
3889       KD.private_segment_fixed_size = Val;
3890     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3891       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3892                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3893                        Val, ValRange);
3894       if (Val)
3895         UserSGPRCount += 4;
3896     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3897       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3898                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3899                        ValRange);
3900       if (Val)
3901         UserSGPRCount += 2;
3902     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3903       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3904                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3905                        ValRange);
3906       if (Val)
3907         UserSGPRCount += 2;
3908     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3909       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3910                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3911                        Val, ValRange);
3912       if (Val)
3913         UserSGPRCount += 2;
3914     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3915       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3916                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3917                        ValRange);
3918       if (Val)
3919         UserSGPRCount += 2;
3920     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3921       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3922                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3923                        ValRange);
3924       if (Val)
3925         UserSGPRCount += 2;
3926     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3927       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3928                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3929                        Val, ValRange);
3930       if (Val)
3931         UserSGPRCount += 1;
3932     } else if (ID == ".amdhsa_wavefront_size32") {
3933       if (IVersion.Major < 10)
3934         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3935                                  IDRange);
3936       EnableWavefrontSize32 = Val;
3937       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3938                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3939                        Val, ValRange);
3940     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3941       PARSE_BITS_ENTRY(
3942           KD.compute_pgm_rsrc2,
3943           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3944           ValRange);
3945     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3946       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3947                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3948                        ValRange);
3949     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3950       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3951                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3952                        ValRange);
3953     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3954       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3955                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3956                        ValRange);
3957     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3958       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3959                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3960                        ValRange);
3961     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3962       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3963                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3964                        ValRange);
3965     } else if (ID == ".amdhsa_next_free_vgpr") {
3966       VGPRRange = ValRange;
3967       NextFreeVGPR = Val;
3968     } else if (ID == ".amdhsa_next_free_sgpr") {
3969       SGPRRange = ValRange;
3970       NextFreeSGPR = Val;
3971     } else if (ID == ".amdhsa_reserve_vcc") {
3972       if (!isUInt<1>(Val))
3973         return OutOfRangeError(ValRange);
3974       ReserveVCC = Val;
3975     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3976       if (IVersion.Major < 7)
3977         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3978                                  IDRange);
3979       if (!isUInt<1>(Val))
3980         return OutOfRangeError(ValRange);
3981       ReserveFlatScr = Val;
3982     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3983       if (IVersion.Major < 8)
3984         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3985                                  IDRange);
3986       if (!isUInt<1>(Val))
3987         return OutOfRangeError(ValRange);
3988       ReserveXNACK = Val;
3989     } else if (ID == ".amdhsa_float_round_mode_32") {
3990       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3991                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3992     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3993       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3994                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3995     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3996       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3997                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3998     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3999       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4000                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4001                        ValRange);
4002     } else if (ID == ".amdhsa_dx10_clamp") {
4003       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4004                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4005     } else if (ID == ".amdhsa_ieee_mode") {
4006       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4007                        Val, ValRange);
4008     } else if (ID == ".amdhsa_fp16_overflow") {
4009       if (IVersion.Major < 9)
4010         return getParser().Error(IDRange.Start, "directive requires gfx9+",
4011                                  IDRange);
4012       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4013                        ValRange);
4014     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4015       if (IVersion.Major < 10)
4016         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4017                                  IDRange);
4018       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4019                        ValRange);
4020     } else if (ID == ".amdhsa_memory_ordered") {
4021       if (IVersion.Major < 10)
4022         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4023                                  IDRange);
4024       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4025                        ValRange);
4026     } else if (ID == ".amdhsa_forward_progress") {
4027       if (IVersion.Major < 10)
4028         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4029                                  IDRange);
4030       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4031                        ValRange);
4032     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4033       PARSE_BITS_ENTRY(
4034           KD.compute_pgm_rsrc2,
4035           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4036           ValRange);
4037     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4038       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4039                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4040                        Val, ValRange);
4041     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4042       PARSE_BITS_ENTRY(
4043           KD.compute_pgm_rsrc2,
4044           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4045           ValRange);
4046     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4047       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4048                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4049                        Val, ValRange);
4050     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4051       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4052                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4053                        Val, ValRange);
4054     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4055       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4056                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4057                        Val, ValRange);
4058     } else if (ID == ".amdhsa_exception_int_div_zero") {
4059       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4060                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4061                        Val, ValRange);
4062     } else {
4063       return getParser().Error(IDRange.Start,
4064                                "unknown .amdhsa_kernel directive", IDRange);
4065     }
4066 
4067 #undef PARSE_BITS_ENTRY
4068   }
4069 
4070   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4071     return TokError(".amdhsa_next_free_vgpr directive is required");
4072 
4073   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4074     return TokError(".amdhsa_next_free_sgpr directive is required");
4075 
4076   unsigned VGPRBlocks;
4077   unsigned SGPRBlocks;
4078   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4079                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4080                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4081                          SGPRBlocks))
4082     return true;
4083 
4084   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4085           VGPRBlocks))
4086     return OutOfRangeError(VGPRRange);
4087   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4088                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4089 
4090   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4091           SGPRBlocks))
4092     return OutOfRangeError(SGPRRange);
4093   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4094                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4095                   SGPRBlocks);
4096 
4097   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4098     return TokError("too many user SGPRs enabled");
4099   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4100                   UserSGPRCount);
4101 
4102   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4103       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4104       ReserveFlatScr, ReserveXNACK);
4105   return false;
4106 }
4107 
4108 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4109   uint32_t Major;
4110   uint32_t Minor;
4111 
4112   if (ParseDirectiveMajorMinor(Major, Minor))
4113     return true;
4114 
4115   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4116   return false;
4117 }
4118 
4119 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4120   uint32_t Major;
4121   uint32_t Minor;
4122   uint32_t Stepping;
4123   StringRef VendorName;
4124   StringRef ArchName;
4125 
4126   // If this directive has no arguments, then use the ISA version for the
4127   // targeted GPU.
4128   if (getLexer().is(AsmToken::EndOfStatement)) {
4129     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4130     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4131                                                       ISA.Stepping,
4132                                                       "AMD", "AMDGPU");
4133     return false;
4134   }
4135 
4136   if (ParseDirectiveMajorMinor(Major, Minor))
4137     return true;
4138 
4139   if (getLexer().isNot(AsmToken::Comma))
4140     return TokError("stepping version number required, comma expected");
4141   Lex();
4142 
4143   if (ParseAsAbsoluteExpression(Stepping))
4144     return TokError("invalid stepping version");
4145 
4146   if (getLexer().isNot(AsmToken::Comma))
4147     return TokError("vendor name required, comma expected");
4148   Lex();
4149 
4150   if (getLexer().isNot(AsmToken::String))
4151     return TokError("invalid vendor name");
4152 
4153   VendorName = getLexer().getTok().getStringContents();
4154   Lex();
4155 
4156   if (getLexer().isNot(AsmToken::Comma))
4157     return TokError("arch name required, comma expected");
4158   Lex();
4159 
4160   if (getLexer().isNot(AsmToken::String))
4161     return TokError("invalid arch name");
4162 
4163   ArchName = getLexer().getTok().getStringContents();
4164   Lex();
4165 
4166   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4167                                                     VendorName, ArchName);
4168   return false;
4169 }
4170 
4171 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4172                                                amd_kernel_code_t &Header) {
4173   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4174   // assembly for backwards compatibility.
4175   if (ID == "max_scratch_backing_memory_byte_size") {
4176     Parser.eatToEndOfStatement();
4177     return false;
4178   }
4179 
4180   SmallString<40> ErrStr;
4181   raw_svector_ostream Err(ErrStr);
4182   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4183     return TokError(Err.str());
4184   }
4185   Lex();
4186 
4187   if (ID == "enable_wavefront_size32") {
4188     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4189       if (!isGFX10())
4190         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4191       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4192         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4193     } else {
4194       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4195         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4196     }
4197   }
4198 
4199   if (ID == "wavefront_size") {
4200     if (Header.wavefront_size == 5) {
4201       if (!isGFX10())
4202         return TokError("wavefront_size=5 is only allowed on GFX10+");
4203       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4204         return TokError("wavefront_size=5 requires +WavefrontSize32");
4205     } else if (Header.wavefront_size == 6) {
4206       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4207         return TokError("wavefront_size=6 requires +WavefrontSize64");
4208     }
4209   }
4210 
4211   if (ID == "enable_wgp_mode") {
4212     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4213       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4214   }
4215 
4216   if (ID == "enable_mem_ordered") {
4217     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4218       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4219   }
4220 
4221   if (ID == "enable_fwd_progress") {
4222     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4223       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4224   }
4225 
4226   return false;
4227 }
4228 
4229 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4230   amd_kernel_code_t Header;
4231   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4232 
4233   while (true) {
4234     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4235     // will set the current token to EndOfStatement.
4236     while(getLexer().is(AsmToken::EndOfStatement))
4237       Lex();
4238 
4239     if (getLexer().isNot(AsmToken::Identifier))
4240       return TokError("expected value identifier or .end_amd_kernel_code_t");
4241 
4242     StringRef ID = getLexer().getTok().getIdentifier();
4243     Lex();
4244 
4245     if (ID == ".end_amd_kernel_code_t")
4246       break;
4247 
4248     if (ParseAMDKernelCodeTValue(ID, Header))
4249       return true;
4250   }
4251 
4252   getTargetStreamer().EmitAMDKernelCodeT(Header);
4253 
4254   return false;
4255 }
4256 
4257 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4258   if (getLexer().isNot(AsmToken::Identifier))
4259     return TokError("expected symbol name");
4260 
4261   StringRef KernelName = Parser.getTok().getString();
4262 
4263   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4264                                            ELF::STT_AMDGPU_HSA_KERNEL);
4265   Lex();
4266   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4267     KernelScope.initialize(getContext());
4268   return false;
4269 }
4270 
4271 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4272   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4273     return Error(getParser().getTok().getLoc(),
4274                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4275                  "architectures");
4276   }
4277 
4278   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4279 
4280   std::string ISAVersionStringFromSTI;
4281   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4282   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4283 
4284   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4285     return Error(getParser().getTok().getLoc(),
4286                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4287                  "arguments specified through the command line");
4288   }
4289 
4290   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4291   Lex();
4292 
4293   return false;
4294 }
4295 
4296 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4297   const char *AssemblerDirectiveBegin;
4298   const char *AssemblerDirectiveEnd;
4299   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4300       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4301           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4302                             HSAMD::V3::AssemblerDirectiveEnd)
4303           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4304                             HSAMD::AssemblerDirectiveEnd);
4305 
4306   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4307     return Error(getParser().getTok().getLoc(),
4308                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4309                  "not available on non-amdhsa OSes")).str());
4310   }
4311 
4312   std::string HSAMetadataString;
4313   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4314                           HSAMetadataString))
4315     return true;
4316 
4317   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4318     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4319       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4320   } else {
4321     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4322       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4323   }
4324 
4325   return false;
4326 }
4327 
4328 /// Common code to parse out a block of text (typically YAML) between start and
4329 /// end directives.
4330 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4331                                           const char *AssemblerDirectiveEnd,
4332                                           std::string &CollectString) {
4333 
4334   raw_string_ostream CollectStream(CollectString);
4335 
4336   getLexer().setSkipSpace(false);
4337 
4338   bool FoundEnd = false;
4339   while (!getLexer().is(AsmToken::Eof)) {
4340     while (getLexer().is(AsmToken::Space)) {
4341       CollectStream << getLexer().getTok().getString();
4342       Lex();
4343     }
4344 
4345     if (getLexer().is(AsmToken::Identifier)) {
4346       StringRef ID = getLexer().getTok().getIdentifier();
4347       if (ID == AssemblerDirectiveEnd) {
4348         Lex();
4349         FoundEnd = true;
4350         break;
4351       }
4352     }
4353 
4354     CollectStream << Parser.parseStringToEndOfStatement()
4355                   << getContext().getAsmInfo()->getSeparatorString();
4356 
4357     Parser.eatToEndOfStatement();
4358   }
4359 
4360   getLexer().setSkipSpace(true);
4361 
4362   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4363     return TokError(Twine("expected directive ") +
4364                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4365   }
4366 
4367   CollectStream.flush();
4368   return false;
4369 }
4370 
4371 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4372 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4373   std::string String;
4374   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4375                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4376     return true;
4377 
4378   auto PALMetadata = getTargetStreamer().getPALMetadata();
4379   if (!PALMetadata->setFromString(String))
4380     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4381   return false;
4382 }
4383 
4384 /// Parse the assembler directive for old linear-format PAL metadata.
4385 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4386   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4387     return Error(getParser().getTok().getLoc(),
4388                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4389                  "not available on non-amdpal OSes")).str());
4390   }
4391 
4392   auto PALMetadata = getTargetStreamer().getPALMetadata();
4393   PALMetadata->setLegacy();
4394   for (;;) {
4395     uint32_t Key, Value;
4396     if (ParseAsAbsoluteExpression(Key)) {
4397       return TokError(Twine("invalid value in ") +
4398                       Twine(PALMD::AssemblerDirective));
4399     }
4400     if (getLexer().isNot(AsmToken::Comma)) {
4401       return TokError(Twine("expected an even number of values in ") +
4402                       Twine(PALMD::AssemblerDirective));
4403     }
4404     Lex();
4405     if (ParseAsAbsoluteExpression(Value)) {
4406       return TokError(Twine("invalid value in ") +
4407                       Twine(PALMD::AssemblerDirective));
4408     }
4409     PALMetadata->setRegister(Key, Value);
4410     if (getLexer().isNot(AsmToken::Comma))
4411       break;
4412     Lex();
4413   }
4414   return false;
4415 }
4416 
4417 /// ParseDirectiveAMDGPULDS
4418 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4419 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4420   if (getParser().checkForValidSection())
4421     return true;
4422 
4423   StringRef Name;
4424   SMLoc NameLoc = getLexer().getLoc();
4425   if (getParser().parseIdentifier(Name))
4426     return TokError("expected identifier in directive");
4427 
4428   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4429   if (parseToken(AsmToken::Comma, "expected ','"))
4430     return true;
4431 
4432   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4433 
4434   int64_t Size;
4435   SMLoc SizeLoc = getLexer().getLoc();
4436   if (getParser().parseAbsoluteExpression(Size))
4437     return true;
4438   if (Size < 0)
4439     return Error(SizeLoc, "size must be non-negative");
4440   if (Size > LocalMemorySize)
4441     return Error(SizeLoc, "size is too large");
4442 
4443   int64_t Align = 4;
4444   if (getLexer().is(AsmToken::Comma)) {
4445     Lex();
4446     SMLoc AlignLoc = getLexer().getLoc();
4447     if (getParser().parseAbsoluteExpression(Align))
4448       return true;
4449     if (Align < 0 || !isPowerOf2_64(Align))
4450       return Error(AlignLoc, "alignment must be a power of two");
4451 
4452     // Alignment larger than the size of LDS is possible in theory, as long
4453     // as the linker manages to place to symbol at address 0, but we do want
4454     // to make sure the alignment fits nicely into a 32-bit integer.
4455     if (Align >= 1u << 31)
4456       return Error(AlignLoc, "alignment is too large");
4457   }
4458 
4459   if (parseToken(AsmToken::EndOfStatement,
4460                  "unexpected token in '.amdgpu_lds' directive"))
4461     return true;
4462 
4463   Symbol->redefineIfPossible();
4464   if (!Symbol->isUndefined())
4465     return Error(NameLoc, "invalid symbol redefinition");
4466 
4467   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4468   return false;
4469 }
4470 
4471 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4472   StringRef IDVal = DirectiveID.getString();
4473 
4474   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4475     if (IDVal == ".amdgcn_target")
4476       return ParseDirectiveAMDGCNTarget();
4477 
4478     if (IDVal == ".amdhsa_kernel")
4479       return ParseDirectiveAMDHSAKernel();
4480 
4481     // TODO: Restructure/combine with PAL metadata directive.
4482     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4483       return ParseDirectiveHSAMetadata();
4484   } else {
4485     if (IDVal == ".hsa_code_object_version")
4486       return ParseDirectiveHSACodeObjectVersion();
4487 
4488     if (IDVal == ".hsa_code_object_isa")
4489       return ParseDirectiveHSACodeObjectISA();
4490 
4491     if (IDVal == ".amd_kernel_code_t")
4492       return ParseDirectiveAMDKernelCodeT();
4493 
4494     if (IDVal == ".amdgpu_hsa_kernel")
4495       return ParseDirectiveAMDGPUHsaKernel();
4496 
4497     if (IDVal == ".amd_amdgpu_isa")
4498       return ParseDirectiveISAVersion();
4499 
4500     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4501       return ParseDirectiveHSAMetadata();
4502   }
4503 
4504   if (IDVal == ".amdgpu_lds")
4505     return ParseDirectiveAMDGPULDS();
4506 
4507   if (IDVal == PALMD::AssemblerDirectiveBegin)
4508     return ParseDirectivePALMetadataBegin();
4509 
4510   if (IDVal == PALMD::AssemblerDirective)
4511     return ParseDirectivePALMetadata();
4512 
4513   return true;
4514 }
4515 
4516 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4517                                            unsigned RegNo) const {
4518 
4519   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4520        R.isValid(); ++R) {
4521     if (*R == RegNo)
4522       return isGFX9() || isGFX10();
4523   }
4524 
4525   // GFX10 has 2 more SGPRs 104 and 105.
4526   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4527        R.isValid(); ++R) {
4528     if (*R == RegNo)
4529       return hasSGPR104_SGPR105();
4530   }
4531 
4532   switch (RegNo) {
4533   case AMDGPU::SRC_SHARED_BASE:
4534   case AMDGPU::SRC_SHARED_LIMIT:
4535   case AMDGPU::SRC_PRIVATE_BASE:
4536   case AMDGPU::SRC_PRIVATE_LIMIT:
4537   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4538     return !isCI() && !isSI() && !isVI();
4539   case AMDGPU::TBA:
4540   case AMDGPU::TBA_LO:
4541   case AMDGPU::TBA_HI:
4542   case AMDGPU::TMA:
4543   case AMDGPU::TMA_LO:
4544   case AMDGPU::TMA_HI:
4545     return !isGFX9() && !isGFX10();
4546   case AMDGPU::XNACK_MASK:
4547   case AMDGPU::XNACK_MASK_LO:
4548   case AMDGPU::XNACK_MASK_HI:
4549     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4550   case AMDGPU::SGPR_NULL:
4551     return isGFX10();
4552   default:
4553     break;
4554   }
4555 
4556   if (isCI())
4557     return true;
4558 
4559   if (isSI() || isGFX10()) {
4560     // No flat_scr on SI.
4561     // On GFX10 flat scratch is not a valid register operand and can only be
4562     // accessed with s_setreg/s_getreg.
4563     switch (RegNo) {
4564     case AMDGPU::FLAT_SCR:
4565     case AMDGPU::FLAT_SCR_LO:
4566     case AMDGPU::FLAT_SCR_HI:
4567       return false;
4568     default:
4569       return true;
4570     }
4571   }
4572 
4573   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4574   // SI/CI have.
4575   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4576        R.isValid(); ++R) {
4577     if (*R == RegNo)
4578       return hasSGPR102_SGPR103();
4579   }
4580 
4581   return true;
4582 }
4583 
4584 OperandMatchResultTy
4585 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4586                               OperandMode Mode) {
4587   // Try to parse with a custom parser
4588   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4589 
4590   // If we successfully parsed the operand or if there as an error parsing,
4591   // we are done.
4592   //
4593   // If we are parsing after we reach EndOfStatement then this means we
4594   // are appending default values to the Operands list.  This is only done
4595   // by custom parser, so we shouldn't continue on to the generic parsing.
4596   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4597       getLexer().is(AsmToken::EndOfStatement))
4598     return ResTy;
4599 
4600   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4601     unsigned Prefix = Operands.size();
4602     SMLoc LBraceLoc = getTok().getLoc();
4603     Parser.Lex(); // eat the '['
4604 
4605     for (;;) {
4606       ResTy = parseReg(Operands);
4607       if (ResTy != MatchOperand_Success)
4608         return ResTy;
4609 
4610       if (getLexer().is(AsmToken::RBrac))
4611         break;
4612 
4613       if (getLexer().isNot(AsmToken::Comma))
4614         return MatchOperand_ParseFail;
4615       Parser.Lex();
4616     }
4617 
4618     if (Operands.size() - Prefix > 1) {
4619       Operands.insert(Operands.begin() + Prefix,
4620                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4621       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4622                                                     getTok().getLoc()));
4623     }
4624 
4625     Parser.Lex(); // eat the ']'
4626     return MatchOperand_Success;
4627   }
4628 
4629   return parseRegOrImm(Operands);
4630 }
4631 
4632 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4633   // Clear any forced encodings from the previous instruction.
4634   setForcedEncodingSize(0);
4635   setForcedDPP(false);
4636   setForcedSDWA(false);
4637 
4638   if (Name.endswith("_e64")) {
4639     setForcedEncodingSize(64);
4640     return Name.substr(0, Name.size() - 4);
4641   } else if (Name.endswith("_e32")) {
4642     setForcedEncodingSize(32);
4643     return Name.substr(0, Name.size() - 4);
4644   } else if (Name.endswith("_dpp")) {
4645     setForcedDPP(true);
4646     return Name.substr(0, Name.size() - 4);
4647   } else if (Name.endswith("_sdwa")) {
4648     setForcedSDWA(true);
4649     return Name.substr(0, Name.size() - 5);
4650   }
4651   return Name;
4652 }
4653 
4654 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4655                                        StringRef Name,
4656                                        SMLoc NameLoc, OperandVector &Operands) {
4657   // Add the instruction mnemonic
4658   Name = parseMnemonicSuffix(Name);
4659   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4660 
4661   bool IsMIMG = Name.startswith("image_");
4662 
4663   while (!getLexer().is(AsmToken::EndOfStatement)) {
4664     OperandMode Mode = OperandMode_Default;
4665     if (IsMIMG && isGFX10() && Operands.size() == 2)
4666       Mode = OperandMode_NSA;
4667     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4668 
4669     // Eat the comma or space if there is one.
4670     if (getLexer().is(AsmToken::Comma))
4671       Parser.Lex();
4672 
4673     switch (Res) {
4674       case MatchOperand_Success: break;
4675       case MatchOperand_ParseFail:
4676         // FIXME: use real operand location rather than the current location.
4677         Error(getLexer().getLoc(), "failed parsing operand.");
4678         while (!getLexer().is(AsmToken::EndOfStatement)) {
4679           Parser.Lex();
4680         }
4681         return true;
4682       case MatchOperand_NoMatch:
4683         // FIXME: use real operand location rather than the current location.
4684         Error(getLexer().getLoc(), "not a valid operand.");
4685         while (!getLexer().is(AsmToken::EndOfStatement)) {
4686           Parser.Lex();
4687         }
4688         return true;
4689     }
4690   }
4691 
4692   return false;
4693 }
4694 
4695 //===----------------------------------------------------------------------===//
4696 // Utility functions
4697 //===----------------------------------------------------------------------===//
4698 
4699 OperandMatchResultTy
4700 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4701 
4702   if (!trySkipId(Prefix, AsmToken::Colon))
4703     return MatchOperand_NoMatch;
4704 
4705   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4706 }
4707 
4708 OperandMatchResultTy
4709 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4710                                     AMDGPUOperand::ImmTy ImmTy,
4711                                     bool (*ConvertResult)(int64_t&)) {
4712   SMLoc S = getLoc();
4713   int64_t Value = 0;
4714 
4715   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4716   if (Res != MatchOperand_Success)
4717     return Res;
4718 
4719   if (ConvertResult && !ConvertResult(Value)) {
4720     Error(S, "invalid " + StringRef(Prefix) + " value.");
4721   }
4722 
4723   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4724   return MatchOperand_Success;
4725 }
4726 
4727 OperandMatchResultTy
4728 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4729                                              OperandVector &Operands,
4730                                              AMDGPUOperand::ImmTy ImmTy,
4731                                              bool (*ConvertResult)(int64_t&)) {
4732   SMLoc S = getLoc();
4733   if (!trySkipId(Prefix, AsmToken::Colon))
4734     return MatchOperand_NoMatch;
4735 
4736   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4737     return MatchOperand_ParseFail;
4738 
4739   unsigned Val = 0;
4740   const unsigned MaxSize = 4;
4741 
4742   // FIXME: How to verify the number of elements matches the number of src
4743   // operands?
4744   for (int I = 0; ; ++I) {
4745     int64_t Op;
4746     SMLoc Loc = getLoc();
4747     if (!parseExpr(Op))
4748       return MatchOperand_ParseFail;
4749 
4750     if (Op != 0 && Op != 1) {
4751       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4752       return MatchOperand_ParseFail;
4753     }
4754 
4755     Val |= (Op << I);
4756 
4757     if (trySkipToken(AsmToken::RBrac))
4758       break;
4759 
4760     if (I + 1 == MaxSize) {
4761       Error(getLoc(), "expected a closing square bracket");
4762       return MatchOperand_ParseFail;
4763     }
4764 
4765     if (!skipToken(AsmToken::Comma, "expected a comma"))
4766       return MatchOperand_ParseFail;
4767   }
4768 
4769   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4770   return MatchOperand_Success;
4771 }
4772 
4773 OperandMatchResultTy
4774 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4775                                AMDGPUOperand::ImmTy ImmTy) {
4776   int64_t Bit = 0;
4777   SMLoc S = Parser.getTok().getLoc();
4778 
4779   // We are at the end of the statement, and this is a default argument, so
4780   // use a default value.
4781   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4782     switch(getLexer().getKind()) {
4783       case AsmToken::Identifier: {
4784         StringRef Tok = Parser.getTok().getString();
4785         if (Tok == Name) {
4786           if (Tok == "r128" && !hasMIMG_R128())
4787             Error(S, "r128 modifier is not supported on this GPU");
4788           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
4789             Error(S, "a16 modifier is not supported on this GPU");
4790           Bit = 1;
4791           Parser.Lex();
4792         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4793           Bit = 0;
4794           Parser.Lex();
4795         } else {
4796           return MatchOperand_NoMatch;
4797         }
4798         break;
4799       }
4800       default:
4801         return MatchOperand_NoMatch;
4802     }
4803   }
4804 
4805   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4806     return MatchOperand_ParseFail;
4807 
4808   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
4809     ImmTy = AMDGPUOperand::ImmTyR128A16;
4810 
4811   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4812   return MatchOperand_Success;
4813 }
4814 
4815 static void addOptionalImmOperand(
4816   MCInst& Inst, const OperandVector& Operands,
4817   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4818   AMDGPUOperand::ImmTy ImmT,
4819   int64_t Default = 0) {
4820   auto i = OptionalIdx.find(ImmT);
4821   if (i != OptionalIdx.end()) {
4822     unsigned Idx = i->second;
4823     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4824   } else {
4825     Inst.addOperand(MCOperand::createImm(Default));
4826   }
4827 }
4828 
4829 OperandMatchResultTy
4830 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4831   if (getLexer().isNot(AsmToken::Identifier)) {
4832     return MatchOperand_NoMatch;
4833   }
4834   StringRef Tok = Parser.getTok().getString();
4835   if (Tok != Prefix) {
4836     return MatchOperand_NoMatch;
4837   }
4838 
4839   Parser.Lex();
4840   if (getLexer().isNot(AsmToken::Colon)) {
4841     return MatchOperand_ParseFail;
4842   }
4843 
4844   Parser.Lex();
4845   if (getLexer().isNot(AsmToken::Identifier)) {
4846     return MatchOperand_ParseFail;
4847   }
4848 
4849   Value = Parser.getTok().getString();
4850   return MatchOperand_Success;
4851 }
4852 
4853 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4854 // values to live in a joint format operand in the MCInst encoding.
4855 OperandMatchResultTy
4856 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4857   SMLoc S = Parser.getTok().getLoc();
4858   int64_t Dfmt = 0, Nfmt = 0;
4859   // dfmt and nfmt can appear in either order, and each is optional.
4860   bool GotDfmt = false, GotNfmt = false;
4861   while (!GotDfmt || !GotNfmt) {
4862     if (!GotDfmt) {
4863       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4864       if (Res != MatchOperand_NoMatch) {
4865         if (Res != MatchOperand_Success)
4866           return Res;
4867         if (Dfmt >= 16) {
4868           Error(Parser.getTok().getLoc(), "out of range dfmt");
4869           return MatchOperand_ParseFail;
4870         }
4871         GotDfmt = true;
4872         Parser.Lex();
4873         continue;
4874       }
4875     }
4876     if (!GotNfmt) {
4877       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4878       if (Res != MatchOperand_NoMatch) {
4879         if (Res != MatchOperand_Success)
4880           return Res;
4881         if (Nfmt >= 8) {
4882           Error(Parser.getTok().getLoc(), "out of range nfmt");
4883           return MatchOperand_ParseFail;
4884         }
4885         GotNfmt = true;
4886         Parser.Lex();
4887         continue;
4888       }
4889     }
4890     break;
4891   }
4892   if (!GotDfmt && !GotNfmt)
4893     return MatchOperand_NoMatch;
4894   auto Format = Dfmt | Nfmt << 4;
4895   Operands.push_back(
4896       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4897   return MatchOperand_Success;
4898 }
4899 
4900 //===----------------------------------------------------------------------===//
4901 // ds
4902 //===----------------------------------------------------------------------===//
4903 
4904 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4905                                     const OperandVector &Operands) {
4906   OptionalImmIndexMap OptionalIdx;
4907 
4908   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4909     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4910 
4911     // Add the register arguments
4912     if (Op.isReg()) {
4913       Op.addRegOperands(Inst, 1);
4914       continue;
4915     }
4916 
4917     // Handle optional arguments
4918     OptionalIdx[Op.getImmTy()] = i;
4919   }
4920 
4921   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4922   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4923   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4924 
4925   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4926 }
4927 
4928 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4929                                 bool IsGdsHardcoded) {
4930   OptionalImmIndexMap OptionalIdx;
4931 
4932   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4933     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4934 
4935     // Add the register arguments
4936     if (Op.isReg()) {
4937       Op.addRegOperands(Inst, 1);
4938       continue;
4939     }
4940 
4941     if (Op.isToken() && Op.getToken() == "gds") {
4942       IsGdsHardcoded = true;
4943       continue;
4944     }
4945 
4946     // Handle optional arguments
4947     OptionalIdx[Op.getImmTy()] = i;
4948   }
4949 
4950   AMDGPUOperand::ImmTy OffsetType =
4951     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4952      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4953      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4954                                                       AMDGPUOperand::ImmTyOffset;
4955 
4956   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4957 
4958   if (!IsGdsHardcoded) {
4959     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4960   }
4961   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4962 }
4963 
4964 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4965   OptionalImmIndexMap OptionalIdx;
4966 
4967   unsigned OperandIdx[4];
4968   unsigned EnMask = 0;
4969   int SrcIdx = 0;
4970 
4971   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4972     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4973 
4974     // Add the register arguments
4975     if (Op.isReg()) {
4976       assert(SrcIdx < 4);
4977       OperandIdx[SrcIdx] = Inst.size();
4978       Op.addRegOperands(Inst, 1);
4979       ++SrcIdx;
4980       continue;
4981     }
4982 
4983     if (Op.isOff()) {
4984       assert(SrcIdx < 4);
4985       OperandIdx[SrcIdx] = Inst.size();
4986       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4987       ++SrcIdx;
4988       continue;
4989     }
4990 
4991     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4992       Op.addImmOperands(Inst, 1);
4993       continue;
4994     }
4995 
4996     if (Op.isToken() && Op.getToken() == "done")
4997       continue;
4998 
4999     // Handle optional arguments
5000     OptionalIdx[Op.getImmTy()] = i;
5001   }
5002 
5003   assert(SrcIdx == 4);
5004 
5005   bool Compr = false;
5006   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5007     Compr = true;
5008     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5009     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5010     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5011   }
5012 
5013   for (auto i = 0; i < SrcIdx; ++i) {
5014     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5015       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5016     }
5017   }
5018 
5019   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5020   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5021 
5022   Inst.addOperand(MCOperand::createImm(EnMask));
5023 }
5024 
5025 //===----------------------------------------------------------------------===//
5026 // s_waitcnt
5027 //===----------------------------------------------------------------------===//
5028 
5029 static bool
5030 encodeCnt(
5031   const AMDGPU::IsaVersion ISA,
5032   int64_t &IntVal,
5033   int64_t CntVal,
5034   bool Saturate,
5035   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5036   unsigned (*decode)(const IsaVersion &Version, unsigned))
5037 {
5038   bool Failed = false;
5039 
5040   IntVal = encode(ISA, IntVal, CntVal);
5041   if (CntVal != decode(ISA, IntVal)) {
5042     if (Saturate) {
5043       IntVal = encode(ISA, IntVal, -1);
5044     } else {
5045       Failed = true;
5046     }
5047   }
5048   return Failed;
5049 }
5050 
5051 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5052 
5053   SMLoc CntLoc = getLoc();
5054   StringRef CntName = getTokenStr();
5055 
5056   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5057       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5058     return false;
5059 
5060   int64_t CntVal;
5061   SMLoc ValLoc = getLoc();
5062   if (!parseExpr(CntVal))
5063     return false;
5064 
5065   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5066 
5067   bool Failed = true;
5068   bool Sat = CntName.endswith("_sat");
5069 
5070   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5071     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5072   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5073     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5074   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5075     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5076   } else {
5077     Error(CntLoc, "invalid counter name " + CntName);
5078     return false;
5079   }
5080 
5081   if (Failed) {
5082     Error(ValLoc, "too large value for " + CntName);
5083     return false;
5084   }
5085 
5086   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5087     return false;
5088 
5089   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5090     if (isToken(AsmToken::EndOfStatement)) {
5091       Error(getLoc(), "expected a counter name");
5092       return false;
5093     }
5094   }
5095 
5096   return true;
5097 }
5098 
5099 OperandMatchResultTy
5100 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5101   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5102   int64_t Waitcnt = getWaitcntBitMask(ISA);
5103   SMLoc S = getLoc();
5104 
5105   // If parse failed, do not return error code
5106   // to avoid excessive error messages.
5107   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5108     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
5109   } else {
5110     parseExpr(Waitcnt);
5111   }
5112 
5113   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5114   return MatchOperand_Success;
5115 }
5116 
5117 bool
5118 AMDGPUOperand::isSWaitCnt() const {
5119   return isImm();
5120 }
5121 
5122 //===----------------------------------------------------------------------===//
5123 // hwreg
5124 //===----------------------------------------------------------------------===//
5125 
5126 bool
5127 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5128                                 int64_t &Offset,
5129                                 int64_t &Width) {
5130   using namespace llvm::AMDGPU::Hwreg;
5131 
5132   // The register may be specified by name or using a numeric code
5133   if (isToken(AsmToken::Identifier) &&
5134       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5135     HwReg.IsSymbolic = true;
5136     lex(); // skip message name
5137   } else if (!parseExpr(HwReg.Id)) {
5138     return false;
5139   }
5140 
5141   if (trySkipToken(AsmToken::RParen))
5142     return true;
5143 
5144   // parse optional params
5145   return
5146     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
5147     parseExpr(Offset) &&
5148     skipToken(AsmToken::Comma, "expected a comma") &&
5149     parseExpr(Width) &&
5150     skipToken(AsmToken::RParen, "expected a closing parenthesis");
5151 }
5152 
5153 bool
5154 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5155                                const int64_t Offset,
5156                                const int64_t Width,
5157                                const SMLoc Loc) {
5158 
5159   using namespace llvm::AMDGPU::Hwreg;
5160 
5161   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5162     Error(Loc, "specified hardware register is not supported on this GPU");
5163     return false;
5164   } else if (!isValidHwreg(HwReg.Id)) {
5165     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
5166     return false;
5167   } else if (!isValidHwregOffset(Offset)) {
5168     Error(Loc, "invalid bit offset: only 5-bit values are legal");
5169     return false;
5170   } else if (!isValidHwregWidth(Width)) {
5171     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
5172     return false;
5173   }
5174   return true;
5175 }
5176 
5177 OperandMatchResultTy
5178 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5179   using namespace llvm::AMDGPU::Hwreg;
5180 
5181   int64_t ImmVal = 0;
5182   SMLoc Loc = getLoc();
5183 
5184   // If parse failed, do not return error code
5185   // to avoid excessive error messages.
5186   if (trySkipId("hwreg", AsmToken::LParen)) {
5187     OperandInfoTy HwReg(ID_UNKNOWN_);
5188     int64_t Offset = OFFSET_DEFAULT_;
5189     int64_t Width = WIDTH_DEFAULT_;
5190     if (parseHwregBody(HwReg, Offset, Width) &&
5191         validateHwreg(HwReg, Offset, Width, Loc)) {
5192       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5193     }
5194   } else if (parseExpr(ImmVal)) {
5195     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5196       Error(Loc, "invalid immediate: only 16-bit values are legal");
5197   }
5198 
5199   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5200   return MatchOperand_Success;
5201 }
5202 
5203 bool AMDGPUOperand::isHwreg() const {
5204   return isImmTy(ImmTyHwreg);
5205 }
5206 
5207 //===----------------------------------------------------------------------===//
5208 // sendmsg
5209 //===----------------------------------------------------------------------===//
5210 
5211 bool
5212 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5213                                   OperandInfoTy &Op,
5214                                   OperandInfoTy &Stream) {
5215   using namespace llvm::AMDGPU::SendMsg;
5216 
5217   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5218     Msg.IsSymbolic = true;
5219     lex(); // skip message name
5220   } else if (!parseExpr(Msg.Id)) {
5221     return false;
5222   }
5223 
5224   if (trySkipToken(AsmToken::Comma)) {
5225     Op.IsDefined = true;
5226     if (isToken(AsmToken::Identifier) &&
5227         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5228       lex(); // skip operation name
5229     } else if (!parseExpr(Op.Id)) {
5230       return false;
5231     }
5232 
5233     if (trySkipToken(AsmToken::Comma)) {
5234       Stream.IsDefined = true;
5235       if (!parseExpr(Stream.Id))
5236         return false;
5237     }
5238   }
5239 
5240   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5241 }
5242 
5243 bool
5244 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5245                                  const OperandInfoTy &Op,
5246                                  const OperandInfoTy &Stream,
5247                                  const SMLoc S) {
5248   using namespace llvm::AMDGPU::SendMsg;
5249 
5250   // Validation strictness depends on whether message is specified
5251   // in a symbolc or in a numeric form. In the latter case
5252   // only encoding possibility is checked.
5253   bool Strict = Msg.IsSymbolic;
5254 
5255   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5256     Error(S, "invalid message id");
5257     return false;
5258   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5259     Error(S, Op.IsDefined ?
5260              "message does not support operations" :
5261              "missing message operation");
5262     return false;
5263   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5264     Error(S, "invalid operation id");
5265     return false;
5266   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5267     Error(S, "message operation does not support streams");
5268     return false;
5269   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5270     Error(S, "invalid message stream id");
5271     return false;
5272   }
5273   return true;
5274 }
5275 
5276 OperandMatchResultTy
5277 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5278   using namespace llvm::AMDGPU::SendMsg;
5279 
5280   int64_t ImmVal = 0;
5281   SMLoc Loc = getLoc();
5282 
5283   // If parse failed, do not return error code
5284   // to avoid excessive error messages.
5285   if (trySkipId("sendmsg", AsmToken::LParen)) {
5286     OperandInfoTy Msg(ID_UNKNOWN_);
5287     OperandInfoTy Op(OP_NONE_);
5288     OperandInfoTy Stream(STREAM_ID_NONE_);
5289     if (parseSendMsgBody(Msg, Op, Stream) &&
5290         validateSendMsg(Msg, Op, Stream, Loc)) {
5291       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5292     }
5293   } else if (parseExpr(ImmVal)) {
5294     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5295       Error(Loc, "invalid immediate: only 16-bit values are legal");
5296   }
5297 
5298   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5299   return MatchOperand_Success;
5300 }
5301 
5302 bool AMDGPUOperand::isSendMsg() const {
5303   return isImmTy(ImmTySendMsg);
5304 }
5305 
5306 //===----------------------------------------------------------------------===//
5307 // v_interp
5308 //===----------------------------------------------------------------------===//
5309 
5310 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5311   if (getLexer().getKind() != AsmToken::Identifier)
5312     return MatchOperand_NoMatch;
5313 
5314   StringRef Str = Parser.getTok().getString();
5315   int Slot = StringSwitch<int>(Str)
5316     .Case("p10", 0)
5317     .Case("p20", 1)
5318     .Case("p0", 2)
5319     .Default(-1);
5320 
5321   SMLoc S = Parser.getTok().getLoc();
5322   if (Slot == -1)
5323     return MatchOperand_ParseFail;
5324 
5325   Parser.Lex();
5326   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5327                                               AMDGPUOperand::ImmTyInterpSlot));
5328   return MatchOperand_Success;
5329 }
5330 
5331 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5332   if (getLexer().getKind() != AsmToken::Identifier)
5333     return MatchOperand_NoMatch;
5334 
5335   StringRef Str = Parser.getTok().getString();
5336   if (!Str.startswith("attr"))
5337     return MatchOperand_NoMatch;
5338 
5339   StringRef Chan = Str.take_back(2);
5340   int AttrChan = StringSwitch<int>(Chan)
5341     .Case(".x", 0)
5342     .Case(".y", 1)
5343     .Case(".z", 2)
5344     .Case(".w", 3)
5345     .Default(-1);
5346   if (AttrChan == -1)
5347     return MatchOperand_ParseFail;
5348 
5349   Str = Str.drop_back(2).drop_front(4);
5350 
5351   uint8_t Attr;
5352   if (Str.getAsInteger(10, Attr))
5353     return MatchOperand_ParseFail;
5354 
5355   SMLoc S = Parser.getTok().getLoc();
5356   Parser.Lex();
5357   if (Attr > 63) {
5358     Error(S, "out of bounds attr");
5359     return MatchOperand_Success;
5360   }
5361 
5362   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5363 
5364   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5365                                               AMDGPUOperand::ImmTyInterpAttr));
5366   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5367                                               AMDGPUOperand::ImmTyAttrChan));
5368   return MatchOperand_Success;
5369 }
5370 
5371 //===----------------------------------------------------------------------===//
5372 // exp
5373 //===----------------------------------------------------------------------===//
5374 
5375 void AMDGPUAsmParser::errorExpTgt() {
5376   Error(Parser.getTok().getLoc(), "invalid exp target");
5377 }
5378 
5379 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5380                                                       uint8_t &Val) {
5381   if (Str == "null") {
5382     Val = 9;
5383     return MatchOperand_Success;
5384   }
5385 
5386   if (Str.startswith("mrt")) {
5387     Str = Str.drop_front(3);
5388     if (Str == "z") { // == mrtz
5389       Val = 8;
5390       return MatchOperand_Success;
5391     }
5392 
5393     if (Str.getAsInteger(10, Val))
5394       return MatchOperand_ParseFail;
5395 
5396     if (Val > 7)
5397       errorExpTgt();
5398 
5399     return MatchOperand_Success;
5400   }
5401 
5402   if (Str.startswith("pos")) {
5403     Str = Str.drop_front(3);
5404     if (Str.getAsInteger(10, Val))
5405       return MatchOperand_ParseFail;
5406 
5407     if (Val > 4 || (Val == 4 && !isGFX10()))
5408       errorExpTgt();
5409 
5410     Val += 12;
5411     return MatchOperand_Success;
5412   }
5413 
5414   if (isGFX10() && Str == "prim") {
5415     Val = 20;
5416     return MatchOperand_Success;
5417   }
5418 
5419   if (Str.startswith("param")) {
5420     Str = Str.drop_front(5);
5421     if (Str.getAsInteger(10, Val))
5422       return MatchOperand_ParseFail;
5423 
5424     if (Val >= 32)
5425       errorExpTgt();
5426 
5427     Val += 32;
5428     return MatchOperand_Success;
5429   }
5430 
5431   if (Str.startswith("invalid_target_")) {
5432     Str = Str.drop_front(15);
5433     if (Str.getAsInteger(10, Val))
5434       return MatchOperand_ParseFail;
5435 
5436     errorExpTgt();
5437     return MatchOperand_Success;
5438   }
5439 
5440   return MatchOperand_NoMatch;
5441 }
5442 
5443 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5444   uint8_t Val;
5445   StringRef Str = Parser.getTok().getString();
5446 
5447   auto Res = parseExpTgtImpl(Str, Val);
5448   if (Res != MatchOperand_Success)
5449     return Res;
5450 
5451   SMLoc S = Parser.getTok().getLoc();
5452   Parser.Lex();
5453 
5454   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5455                                               AMDGPUOperand::ImmTyExpTgt));
5456   return MatchOperand_Success;
5457 }
5458 
5459 //===----------------------------------------------------------------------===//
5460 // parser helpers
5461 //===----------------------------------------------------------------------===//
5462 
5463 bool
5464 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5465   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5466 }
5467 
5468 bool
5469 AMDGPUAsmParser::isId(const StringRef Id) const {
5470   return isId(getToken(), Id);
5471 }
5472 
5473 bool
5474 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5475   return getTokenKind() == Kind;
5476 }
5477 
5478 bool
5479 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5480   if (isId(Id)) {
5481     lex();
5482     return true;
5483   }
5484   return false;
5485 }
5486 
5487 bool
5488 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5489   if (isId(Id) && peekToken().is(Kind)) {
5490     lex();
5491     lex();
5492     return true;
5493   }
5494   return false;
5495 }
5496 
5497 bool
5498 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5499   if (isToken(Kind)) {
5500     lex();
5501     return true;
5502   }
5503   return false;
5504 }
5505 
5506 bool
5507 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5508                            const StringRef ErrMsg) {
5509   if (!trySkipToken(Kind)) {
5510     Error(getLoc(), ErrMsg);
5511     return false;
5512   }
5513   return true;
5514 }
5515 
5516 bool
5517 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5518   return !getParser().parseAbsoluteExpression(Imm);
5519 }
5520 
5521 bool
5522 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5523   SMLoc S = getLoc();
5524 
5525   const MCExpr *Expr;
5526   if (Parser.parseExpression(Expr))
5527     return false;
5528 
5529   int64_t IntVal;
5530   if (Expr->evaluateAsAbsolute(IntVal)) {
5531     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5532   } else {
5533     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5534   }
5535   return true;
5536 }
5537 
5538 bool
5539 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5540   if (isToken(AsmToken::String)) {
5541     Val = getToken().getStringContents();
5542     lex();
5543     return true;
5544   } else {
5545     Error(getLoc(), ErrMsg);
5546     return false;
5547   }
5548 }
5549 
5550 AsmToken
5551 AMDGPUAsmParser::getToken() const {
5552   return Parser.getTok();
5553 }
5554 
5555 AsmToken
5556 AMDGPUAsmParser::peekToken() {
5557   return getLexer().peekTok();
5558 }
5559 
5560 void
5561 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5562   auto TokCount = getLexer().peekTokens(Tokens);
5563 
5564   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5565     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5566 }
5567 
5568 AsmToken::TokenKind
5569 AMDGPUAsmParser::getTokenKind() const {
5570   return getLexer().getKind();
5571 }
5572 
5573 SMLoc
5574 AMDGPUAsmParser::getLoc() const {
5575   return getToken().getLoc();
5576 }
5577 
5578 StringRef
5579 AMDGPUAsmParser::getTokenStr() const {
5580   return getToken().getString();
5581 }
5582 
5583 void
5584 AMDGPUAsmParser::lex() {
5585   Parser.Lex();
5586 }
5587 
5588 //===----------------------------------------------------------------------===//
5589 // swizzle
5590 //===----------------------------------------------------------------------===//
5591 
5592 LLVM_READNONE
5593 static unsigned
5594 encodeBitmaskPerm(const unsigned AndMask,
5595                   const unsigned OrMask,
5596                   const unsigned XorMask) {
5597   using namespace llvm::AMDGPU::Swizzle;
5598 
5599   return BITMASK_PERM_ENC |
5600          (AndMask << BITMASK_AND_SHIFT) |
5601          (OrMask  << BITMASK_OR_SHIFT)  |
5602          (XorMask << BITMASK_XOR_SHIFT);
5603 }
5604 
5605 bool
5606 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5607                                       const unsigned MinVal,
5608                                       const unsigned MaxVal,
5609                                       const StringRef ErrMsg) {
5610   for (unsigned i = 0; i < OpNum; ++i) {
5611     if (!skipToken(AsmToken::Comma, "expected a comma")){
5612       return false;
5613     }
5614     SMLoc ExprLoc = Parser.getTok().getLoc();
5615     if (!parseExpr(Op[i])) {
5616       return false;
5617     }
5618     if (Op[i] < MinVal || Op[i] > MaxVal) {
5619       Error(ExprLoc, ErrMsg);
5620       return false;
5621     }
5622   }
5623 
5624   return true;
5625 }
5626 
5627 bool
5628 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5629   using namespace llvm::AMDGPU::Swizzle;
5630 
5631   int64_t Lane[LANE_NUM];
5632   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5633                            "expected a 2-bit lane id")) {
5634     Imm = QUAD_PERM_ENC;
5635     for (unsigned I = 0; I < LANE_NUM; ++I) {
5636       Imm |= Lane[I] << (LANE_SHIFT * I);
5637     }
5638     return true;
5639   }
5640   return false;
5641 }
5642 
5643 bool
5644 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5645   using namespace llvm::AMDGPU::Swizzle;
5646 
5647   SMLoc S = Parser.getTok().getLoc();
5648   int64_t GroupSize;
5649   int64_t LaneIdx;
5650 
5651   if (!parseSwizzleOperands(1, &GroupSize,
5652                             2, 32,
5653                             "group size must be in the interval [2,32]")) {
5654     return false;
5655   }
5656   if (!isPowerOf2_64(GroupSize)) {
5657     Error(S, "group size must be a power of two");
5658     return false;
5659   }
5660   if (parseSwizzleOperands(1, &LaneIdx,
5661                            0, GroupSize - 1,
5662                            "lane id must be in the interval [0,group size - 1]")) {
5663     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5664     return true;
5665   }
5666   return false;
5667 }
5668 
5669 bool
5670 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5671   using namespace llvm::AMDGPU::Swizzle;
5672 
5673   SMLoc S = Parser.getTok().getLoc();
5674   int64_t GroupSize;
5675 
5676   if (!parseSwizzleOperands(1, &GroupSize,
5677       2, 32, "group size must be in the interval [2,32]")) {
5678     return false;
5679   }
5680   if (!isPowerOf2_64(GroupSize)) {
5681     Error(S, "group size must be a power of two");
5682     return false;
5683   }
5684 
5685   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5686   return true;
5687 }
5688 
5689 bool
5690 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5691   using namespace llvm::AMDGPU::Swizzle;
5692 
5693   SMLoc S = Parser.getTok().getLoc();
5694   int64_t GroupSize;
5695 
5696   if (!parseSwizzleOperands(1, &GroupSize,
5697       1, 16, "group size must be in the interval [1,16]")) {
5698     return false;
5699   }
5700   if (!isPowerOf2_64(GroupSize)) {
5701     Error(S, "group size must be a power of two");
5702     return false;
5703   }
5704 
5705   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5706   return true;
5707 }
5708 
5709 bool
5710 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5711   using namespace llvm::AMDGPU::Swizzle;
5712 
5713   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5714     return false;
5715   }
5716 
5717   StringRef Ctl;
5718   SMLoc StrLoc = Parser.getTok().getLoc();
5719   if (!parseString(Ctl)) {
5720     return false;
5721   }
5722   if (Ctl.size() != BITMASK_WIDTH) {
5723     Error(StrLoc, "expected a 5-character mask");
5724     return false;
5725   }
5726 
5727   unsigned AndMask = 0;
5728   unsigned OrMask = 0;
5729   unsigned XorMask = 0;
5730 
5731   for (size_t i = 0; i < Ctl.size(); ++i) {
5732     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5733     switch(Ctl[i]) {
5734     default:
5735       Error(StrLoc, "invalid mask");
5736       return false;
5737     case '0':
5738       break;
5739     case '1':
5740       OrMask |= Mask;
5741       break;
5742     case 'p':
5743       AndMask |= Mask;
5744       break;
5745     case 'i':
5746       AndMask |= Mask;
5747       XorMask |= Mask;
5748       break;
5749     }
5750   }
5751 
5752   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5753   return true;
5754 }
5755 
5756 bool
5757 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5758 
5759   SMLoc OffsetLoc = Parser.getTok().getLoc();
5760 
5761   if (!parseExpr(Imm)) {
5762     return false;
5763   }
5764   if (!isUInt<16>(Imm)) {
5765     Error(OffsetLoc, "expected a 16-bit offset");
5766     return false;
5767   }
5768   return true;
5769 }
5770 
5771 bool
5772 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5773   using namespace llvm::AMDGPU::Swizzle;
5774 
5775   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5776 
5777     SMLoc ModeLoc = Parser.getTok().getLoc();
5778     bool Ok = false;
5779 
5780     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5781       Ok = parseSwizzleQuadPerm(Imm);
5782     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5783       Ok = parseSwizzleBitmaskPerm(Imm);
5784     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5785       Ok = parseSwizzleBroadcast(Imm);
5786     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5787       Ok = parseSwizzleSwap(Imm);
5788     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5789       Ok = parseSwizzleReverse(Imm);
5790     } else {
5791       Error(ModeLoc, "expected a swizzle mode");
5792     }
5793 
5794     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5795   }
5796 
5797   return false;
5798 }
5799 
5800 OperandMatchResultTy
5801 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5802   SMLoc S = Parser.getTok().getLoc();
5803   int64_t Imm = 0;
5804 
5805   if (trySkipId("offset")) {
5806 
5807     bool Ok = false;
5808     if (skipToken(AsmToken::Colon, "expected a colon")) {
5809       if (trySkipId("swizzle")) {
5810         Ok = parseSwizzleMacro(Imm);
5811       } else {
5812         Ok = parseSwizzleOffset(Imm);
5813       }
5814     }
5815 
5816     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5817 
5818     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5819   } else {
5820     // Swizzle "offset" operand is optional.
5821     // If it is omitted, try parsing other optional operands.
5822     return parseOptionalOpr(Operands);
5823   }
5824 }
5825 
5826 bool
5827 AMDGPUOperand::isSwizzle() const {
5828   return isImmTy(ImmTySwizzle);
5829 }
5830 
5831 //===----------------------------------------------------------------------===//
5832 // VGPR Index Mode
5833 //===----------------------------------------------------------------------===//
5834 
5835 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5836 
5837   using namespace llvm::AMDGPU::VGPRIndexMode;
5838 
5839   if (trySkipToken(AsmToken::RParen)) {
5840     return OFF;
5841   }
5842 
5843   int64_t Imm = 0;
5844 
5845   while (true) {
5846     unsigned Mode = 0;
5847     SMLoc S = Parser.getTok().getLoc();
5848 
5849     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5850       if (trySkipId(IdSymbolic[ModeId])) {
5851         Mode = 1 << ModeId;
5852         break;
5853       }
5854     }
5855 
5856     if (Mode == 0) {
5857       Error(S, (Imm == 0)?
5858                "expected a VGPR index mode or a closing parenthesis" :
5859                "expected a VGPR index mode");
5860       break;
5861     }
5862 
5863     if (Imm & Mode) {
5864       Error(S, "duplicate VGPR index mode");
5865       break;
5866     }
5867     Imm |= Mode;
5868 
5869     if (trySkipToken(AsmToken::RParen))
5870       break;
5871     if (!skipToken(AsmToken::Comma,
5872                    "expected a comma or a closing parenthesis"))
5873       break;
5874   }
5875 
5876   return Imm;
5877 }
5878 
5879 OperandMatchResultTy
5880 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5881 
5882   int64_t Imm = 0;
5883   SMLoc S = Parser.getTok().getLoc();
5884 
5885   if (getLexer().getKind() == AsmToken::Identifier &&
5886       Parser.getTok().getString() == "gpr_idx" &&
5887       getLexer().peekTok().is(AsmToken::LParen)) {
5888 
5889     Parser.Lex();
5890     Parser.Lex();
5891 
5892     // If parse failed, trigger an error but do not return error code
5893     // to avoid excessive error messages.
5894     Imm = parseGPRIdxMacro();
5895 
5896   } else {
5897     if (getParser().parseAbsoluteExpression(Imm))
5898       return MatchOperand_NoMatch;
5899     if (Imm < 0 || !isUInt<4>(Imm)) {
5900       Error(S, "invalid immediate: only 4-bit values are legal");
5901     }
5902   }
5903 
5904   Operands.push_back(
5905       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5906   return MatchOperand_Success;
5907 }
5908 
5909 bool AMDGPUOperand::isGPRIdxMode() const {
5910   return isImmTy(ImmTyGprIdxMode);
5911 }
5912 
5913 //===----------------------------------------------------------------------===//
5914 // sopp branch targets
5915 //===----------------------------------------------------------------------===//
5916 
5917 OperandMatchResultTy
5918 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5919 
5920   // Make sure we are not parsing something
5921   // that looks like a label or an expression but is not.
5922   // This will improve error messages.
5923   if (isRegister() || isModifier())
5924     return MatchOperand_NoMatch;
5925 
5926   if (parseExpr(Operands)) {
5927 
5928     AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5929     assert(Opr.isImm() || Opr.isExpr());
5930     SMLoc Loc = Opr.getStartLoc();
5931 
5932     // Currently we do not support arbitrary expressions as branch targets.
5933     // Only labels and absolute expressions are accepted.
5934     if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5935       Error(Loc, "expected an absolute expression or a label");
5936     } else if (Opr.isImm() && !Opr.isS16Imm()) {
5937       Error(Loc, "expected a 16-bit signed jump offset");
5938     }
5939   }
5940 
5941   return MatchOperand_Success; // avoid excessive error messages
5942 }
5943 
5944 //===----------------------------------------------------------------------===//
5945 // Boolean holding registers
5946 //===----------------------------------------------------------------------===//
5947 
5948 OperandMatchResultTy
5949 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5950   return parseReg(Operands);
5951 }
5952 
5953 //===----------------------------------------------------------------------===//
5954 // mubuf
5955 //===----------------------------------------------------------------------===//
5956 
5957 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5958   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5959 }
5960 
5961 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5962   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5963 }
5964 
5965 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5966   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5967 }
5968 
5969 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5970                                const OperandVector &Operands,
5971                                bool IsAtomic,
5972                                bool IsAtomicReturn,
5973                                bool IsLds) {
5974   bool IsLdsOpcode = IsLds;
5975   bool HasLdsModifier = false;
5976   OptionalImmIndexMap OptionalIdx;
5977   assert(IsAtomicReturn ? IsAtomic : true);
5978   unsigned FirstOperandIdx = 1;
5979 
5980   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5981     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5982 
5983     // Add the register arguments
5984     if (Op.isReg()) {
5985       Op.addRegOperands(Inst, 1);
5986       // Insert a tied src for atomic return dst.
5987       // This cannot be postponed as subsequent calls to
5988       // addImmOperands rely on correct number of MC operands.
5989       if (IsAtomicReturn && i == FirstOperandIdx)
5990         Op.addRegOperands(Inst, 1);
5991       continue;
5992     }
5993 
5994     // Handle the case where soffset is an immediate
5995     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5996       Op.addImmOperands(Inst, 1);
5997       continue;
5998     }
5999 
6000     HasLdsModifier |= Op.isLDS();
6001 
6002     // Handle tokens like 'offen' which are sometimes hard-coded into the
6003     // asm string.  There are no MCInst operands for these.
6004     if (Op.isToken()) {
6005       continue;
6006     }
6007     assert(Op.isImm());
6008 
6009     // Handle optional arguments
6010     OptionalIdx[Op.getImmTy()] = i;
6011   }
6012 
6013   // This is a workaround for an llvm quirk which may result in an
6014   // incorrect instruction selection. Lds and non-lds versions of
6015   // MUBUF instructions are identical except that lds versions
6016   // have mandatory 'lds' modifier. However this modifier follows
6017   // optional modifiers and llvm asm matcher regards this 'lds'
6018   // modifier as an optional one. As a result, an lds version
6019   // of opcode may be selected even if it has no 'lds' modifier.
6020   if (IsLdsOpcode && !HasLdsModifier) {
6021     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6022     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6023       Inst.setOpcode(NoLdsOpcode);
6024       IsLdsOpcode = false;
6025     }
6026   }
6027 
6028   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6029   if (!IsAtomic) { // glc is hard-coded.
6030     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6031   }
6032   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6033 
6034   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6035     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6036   }
6037 
6038   if (isGFX10())
6039     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6040 }
6041 
6042 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6043   OptionalImmIndexMap OptionalIdx;
6044 
6045   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6046     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6047 
6048     // Add the register arguments
6049     if (Op.isReg()) {
6050       Op.addRegOperands(Inst, 1);
6051       continue;
6052     }
6053 
6054     // Handle the case where soffset is an immediate
6055     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6056       Op.addImmOperands(Inst, 1);
6057       continue;
6058     }
6059 
6060     // Handle tokens like 'offen' which are sometimes hard-coded into the
6061     // asm string.  There are no MCInst operands for these.
6062     if (Op.isToken()) {
6063       continue;
6064     }
6065     assert(Op.isImm());
6066 
6067     // Handle optional arguments
6068     OptionalIdx[Op.getImmTy()] = i;
6069   }
6070 
6071   addOptionalImmOperand(Inst, Operands, OptionalIdx,
6072                         AMDGPUOperand::ImmTyOffset);
6073   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6074   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6075   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6076   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6077 
6078   if (isGFX10())
6079     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6080 }
6081 
6082 //===----------------------------------------------------------------------===//
6083 // mimg
6084 //===----------------------------------------------------------------------===//
6085 
6086 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6087                               bool IsAtomic) {
6088   unsigned I = 1;
6089   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6090   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6091     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6092   }
6093 
6094   if (IsAtomic) {
6095     // Add src, same as dst
6096     assert(Desc.getNumDefs() == 1);
6097     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6098   }
6099 
6100   OptionalImmIndexMap OptionalIdx;
6101 
6102   for (unsigned E = Operands.size(); I != E; ++I) {
6103     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6104 
6105     // Add the register arguments
6106     if (Op.isReg()) {
6107       Op.addRegOperands(Inst, 1);
6108     } else if (Op.isImmModifier()) {
6109       OptionalIdx[Op.getImmTy()] = I;
6110     } else if (!Op.isToken()) {
6111       llvm_unreachable("unexpected operand type");
6112     }
6113   }
6114 
6115   bool IsGFX10 = isGFX10();
6116 
6117   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6118   if (IsGFX10)
6119     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6120   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6121   if (IsGFX10)
6122     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6123   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6124   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6125   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6126   if (IsGFX10)
6127     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6128   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6129   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6130   if (!IsGFX10)
6131     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6132   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6133 }
6134 
6135 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6136   cvtMIMG(Inst, Operands, true);
6137 }
6138 
6139 //===----------------------------------------------------------------------===//
6140 // smrd
6141 //===----------------------------------------------------------------------===//
6142 
6143 bool AMDGPUOperand::isSMRDOffset8() const {
6144   return isImm() && isUInt<8>(getImm());
6145 }
6146 
6147 bool AMDGPUOperand::isSMEMOffset() const {
6148   return isImm(); // Offset range is checked later by validator.
6149 }
6150 
6151 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6152   // 32-bit literals are only supported on CI and we only want to use them
6153   // when the offset is > 8-bits.
6154   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6155 }
6156 
6157 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6158   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6159 }
6160 
6161 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
6162   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6163 }
6164 
6165 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6166   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6167 }
6168 
6169 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6170   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6171 }
6172 
6173 //===----------------------------------------------------------------------===//
6174 // vop3
6175 //===----------------------------------------------------------------------===//
6176 
6177 static bool ConvertOmodMul(int64_t &Mul) {
6178   if (Mul != 1 && Mul != 2 && Mul != 4)
6179     return false;
6180 
6181   Mul >>= 1;
6182   return true;
6183 }
6184 
6185 static bool ConvertOmodDiv(int64_t &Div) {
6186   if (Div == 1) {
6187     Div = 0;
6188     return true;
6189   }
6190 
6191   if (Div == 2) {
6192     Div = 3;
6193     return true;
6194   }
6195 
6196   return false;
6197 }
6198 
6199 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6200   if (BoundCtrl == 0) {
6201     BoundCtrl = 1;
6202     return true;
6203   }
6204 
6205   if (BoundCtrl == -1) {
6206     BoundCtrl = 0;
6207     return true;
6208   }
6209 
6210   return false;
6211 }
6212 
6213 // Note: the order in this table matches the order of operands in AsmString.
6214 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6215   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6216   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6217   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6218   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6219   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6220   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6221   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6222   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6223   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6224   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6225   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
6226   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6227   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6228   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6229   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6230   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6231   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6232   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6233   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6234   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6235   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6236   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6237   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6238   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6239   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6240   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6241   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6242   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6243   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6244   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6245   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6246   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6247   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6248   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6249   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6250   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6251   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6252   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6253   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6254   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6255   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6256   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6257   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6258   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6259 };
6260 
6261 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6262 
6263   OperandMatchResultTy res = parseOptionalOpr(Operands);
6264 
6265   // This is a hack to enable hardcoded mandatory operands which follow
6266   // optional operands.
6267   //
6268   // Current design assumes that all operands after the first optional operand
6269   // are also optional. However implementation of some instructions violates
6270   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6271   //
6272   // To alleviate this problem, we have to (implicitly) parse extra operands
6273   // to make sure autogenerated parser of custom operands never hit hardcoded
6274   // mandatory operands.
6275 
6276   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6277     if (res != MatchOperand_Success ||
6278         isToken(AsmToken::EndOfStatement))
6279       break;
6280 
6281     trySkipToken(AsmToken::Comma);
6282     res = parseOptionalOpr(Operands);
6283   }
6284 
6285   return res;
6286 }
6287 
6288 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6289   OperandMatchResultTy res;
6290   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6291     // try to parse any optional operand here
6292     if (Op.IsBit) {
6293       res = parseNamedBit(Op.Name, Operands, Op.Type);
6294     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6295       res = parseOModOperand(Operands);
6296     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6297                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6298                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6299       res = parseSDWASel(Operands, Op.Name, Op.Type);
6300     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6301       res = parseSDWADstUnused(Operands);
6302     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6303                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6304                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6305                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6306       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6307                                         Op.ConvertResult);
6308     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6309       res = parseDim(Operands);
6310     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6311       res = parseDfmtNfmt(Operands);
6312     } else {
6313       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6314     }
6315     if (res != MatchOperand_NoMatch) {
6316       return res;
6317     }
6318   }
6319   return MatchOperand_NoMatch;
6320 }
6321 
6322 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6323   StringRef Name = Parser.getTok().getString();
6324   if (Name == "mul") {
6325     return parseIntWithPrefix("mul", Operands,
6326                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6327   }
6328 
6329   if (Name == "div") {
6330     return parseIntWithPrefix("div", Operands,
6331                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6332   }
6333 
6334   return MatchOperand_NoMatch;
6335 }
6336 
6337 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6338   cvtVOP3P(Inst, Operands);
6339 
6340   int Opc = Inst.getOpcode();
6341 
6342   int SrcNum;
6343   const int Ops[] = { AMDGPU::OpName::src0,
6344                       AMDGPU::OpName::src1,
6345                       AMDGPU::OpName::src2 };
6346   for (SrcNum = 0;
6347        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6348        ++SrcNum);
6349   assert(SrcNum > 0);
6350 
6351   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6352   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6353 
6354   if ((OpSel & (1 << SrcNum)) != 0) {
6355     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6356     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6357     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6358   }
6359 }
6360 
6361 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6362       // 1. This operand is input modifiers
6363   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6364       // 2. This is not last operand
6365       && Desc.NumOperands > (OpNum + 1)
6366       // 3. Next operand is register class
6367       && Desc.OpInfo[OpNum + 1].RegClass != -1
6368       // 4. Next register is not tied to any other operand
6369       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6370 }
6371 
6372 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6373 {
6374   OptionalImmIndexMap OptionalIdx;
6375   unsigned Opc = Inst.getOpcode();
6376 
6377   unsigned I = 1;
6378   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6379   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6380     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6381   }
6382 
6383   for (unsigned E = Operands.size(); I != E; ++I) {
6384     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6385     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6386       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6387     } else if (Op.isInterpSlot() ||
6388                Op.isInterpAttr() ||
6389                Op.isAttrChan()) {
6390       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6391     } else if (Op.isImmModifier()) {
6392       OptionalIdx[Op.getImmTy()] = I;
6393     } else {
6394       llvm_unreachable("unhandled operand type");
6395     }
6396   }
6397 
6398   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6399     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6400   }
6401 
6402   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6403     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6404   }
6405 
6406   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6407     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6408   }
6409 }
6410 
6411 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6412                               OptionalImmIndexMap &OptionalIdx) {
6413   unsigned Opc = Inst.getOpcode();
6414 
6415   unsigned I = 1;
6416   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6417   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6418     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6419   }
6420 
6421   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6422     // This instruction has src modifiers
6423     for (unsigned E = Operands.size(); I != E; ++I) {
6424       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6425       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6426         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6427       } else if (Op.isImmModifier()) {
6428         OptionalIdx[Op.getImmTy()] = I;
6429       } else if (Op.isRegOrImm()) {
6430         Op.addRegOrImmOperands(Inst, 1);
6431       } else {
6432         llvm_unreachable("unhandled operand type");
6433       }
6434     }
6435   } else {
6436     // No src modifiers
6437     for (unsigned E = Operands.size(); I != E; ++I) {
6438       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6439       if (Op.isMod()) {
6440         OptionalIdx[Op.getImmTy()] = I;
6441       } else {
6442         Op.addRegOrImmOperands(Inst, 1);
6443       }
6444     }
6445   }
6446 
6447   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6448     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6449   }
6450 
6451   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6452     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6453   }
6454 
6455   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6456   // it has src2 register operand that is tied to dst operand
6457   // we don't allow modifiers for this operand in assembler so src2_modifiers
6458   // should be 0.
6459   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6460       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6461       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6462       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6463       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6464       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6465       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6466     auto it = Inst.begin();
6467     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6468     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6469     ++it;
6470     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6471   }
6472 }
6473 
6474 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6475   OptionalImmIndexMap OptionalIdx;
6476   cvtVOP3(Inst, Operands, OptionalIdx);
6477 }
6478 
6479 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6480                                const OperandVector &Operands) {
6481   OptionalImmIndexMap OptIdx;
6482   const int Opc = Inst.getOpcode();
6483   const MCInstrDesc &Desc = MII.get(Opc);
6484 
6485   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6486 
6487   cvtVOP3(Inst, Operands, OptIdx);
6488 
6489   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6490     assert(!IsPacked);
6491     Inst.addOperand(Inst.getOperand(0));
6492   }
6493 
6494   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6495   // instruction, and then figure out where to actually put the modifiers
6496 
6497   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6498 
6499   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6500   if (OpSelHiIdx != -1) {
6501     int DefaultVal = IsPacked ? -1 : 0;
6502     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6503                           DefaultVal);
6504   }
6505 
6506   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6507   if (NegLoIdx != -1) {
6508     assert(IsPacked);
6509     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6510     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6511   }
6512 
6513   const int Ops[] = { AMDGPU::OpName::src0,
6514                       AMDGPU::OpName::src1,
6515                       AMDGPU::OpName::src2 };
6516   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6517                          AMDGPU::OpName::src1_modifiers,
6518                          AMDGPU::OpName::src2_modifiers };
6519 
6520   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6521 
6522   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6523   unsigned OpSelHi = 0;
6524   unsigned NegLo = 0;
6525   unsigned NegHi = 0;
6526 
6527   if (OpSelHiIdx != -1) {
6528     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6529   }
6530 
6531   if (NegLoIdx != -1) {
6532     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6533     NegLo = Inst.getOperand(NegLoIdx).getImm();
6534     NegHi = Inst.getOperand(NegHiIdx).getImm();
6535   }
6536 
6537   for (int J = 0; J < 3; ++J) {
6538     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6539     if (OpIdx == -1)
6540       break;
6541 
6542     uint32_t ModVal = 0;
6543 
6544     if ((OpSel & (1 << J)) != 0)
6545       ModVal |= SISrcMods::OP_SEL_0;
6546 
6547     if ((OpSelHi & (1 << J)) != 0)
6548       ModVal |= SISrcMods::OP_SEL_1;
6549 
6550     if ((NegLo & (1 << J)) != 0)
6551       ModVal |= SISrcMods::NEG;
6552 
6553     if ((NegHi & (1 << J)) != 0)
6554       ModVal |= SISrcMods::NEG_HI;
6555 
6556     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6557 
6558     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6559   }
6560 }
6561 
6562 //===----------------------------------------------------------------------===//
6563 // dpp
6564 //===----------------------------------------------------------------------===//
6565 
6566 bool AMDGPUOperand::isDPP8() const {
6567   return isImmTy(ImmTyDPP8);
6568 }
6569 
6570 bool AMDGPUOperand::isDPPCtrl() const {
6571   using namespace AMDGPU::DPP;
6572 
6573   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6574   if (result) {
6575     int64_t Imm = getImm();
6576     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6577            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6578            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6579            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6580            (Imm == DppCtrl::WAVE_SHL1) ||
6581            (Imm == DppCtrl::WAVE_ROL1) ||
6582            (Imm == DppCtrl::WAVE_SHR1) ||
6583            (Imm == DppCtrl::WAVE_ROR1) ||
6584            (Imm == DppCtrl::ROW_MIRROR) ||
6585            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6586            (Imm == DppCtrl::BCAST15) ||
6587            (Imm == DppCtrl::BCAST31) ||
6588            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6589            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6590   }
6591   return false;
6592 }
6593 
6594 //===----------------------------------------------------------------------===//
6595 // mAI
6596 //===----------------------------------------------------------------------===//
6597 
6598 bool AMDGPUOperand::isBLGP() const {
6599   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6600 }
6601 
6602 bool AMDGPUOperand::isCBSZ() const {
6603   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6604 }
6605 
6606 bool AMDGPUOperand::isABID() const {
6607   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6608 }
6609 
6610 bool AMDGPUOperand::isS16Imm() const {
6611   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6612 }
6613 
6614 bool AMDGPUOperand::isU16Imm() const {
6615   return isImm() && isUInt<16>(getImm());
6616 }
6617 
6618 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6619   if (!isGFX10())
6620     return MatchOperand_NoMatch;
6621 
6622   SMLoc S = Parser.getTok().getLoc();
6623 
6624   if (getLexer().isNot(AsmToken::Identifier))
6625     return MatchOperand_NoMatch;
6626   if (getLexer().getTok().getString() != "dim")
6627     return MatchOperand_NoMatch;
6628 
6629   Parser.Lex();
6630   if (getLexer().isNot(AsmToken::Colon))
6631     return MatchOperand_ParseFail;
6632 
6633   Parser.Lex();
6634 
6635   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6636   // integer.
6637   std::string Token;
6638   if (getLexer().is(AsmToken::Integer)) {
6639     SMLoc Loc = getLexer().getTok().getEndLoc();
6640     Token = std::string(getLexer().getTok().getString());
6641     Parser.Lex();
6642     if (getLexer().getTok().getLoc() != Loc)
6643       return MatchOperand_ParseFail;
6644   }
6645   if (getLexer().isNot(AsmToken::Identifier))
6646     return MatchOperand_ParseFail;
6647   Token += getLexer().getTok().getString();
6648 
6649   StringRef DimId = Token;
6650   if (DimId.startswith("SQ_RSRC_IMG_"))
6651     DimId = DimId.substr(12);
6652 
6653   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6654   if (!DimInfo)
6655     return MatchOperand_ParseFail;
6656 
6657   Parser.Lex();
6658 
6659   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6660                                               AMDGPUOperand::ImmTyDim));
6661   return MatchOperand_Success;
6662 }
6663 
6664 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6665   SMLoc S = Parser.getTok().getLoc();
6666   StringRef Prefix;
6667 
6668   if (getLexer().getKind() == AsmToken::Identifier) {
6669     Prefix = Parser.getTok().getString();
6670   } else {
6671     return MatchOperand_NoMatch;
6672   }
6673 
6674   if (Prefix != "dpp8")
6675     return parseDPPCtrl(Operands);
6676   if (!isGFX10())
6677     return MatchOperand_NoMatch;
6678 
6679   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6680 
6681   int64_t Sels[8];
6682 
6683   Parser.Lex();
6684   if (getLexer().isNot(AsmToken::Colon))
6685     return MatchOperand_ParseFail;
6686 
6687   Parser.Lex();
6688   if (getLexer().isNot(AsmToken::LBrac))
6689     return MatchOperand_ParseFail;
6690 
6691   Parser.Lex();
6692   if (getParser().parseAbsoluteExpression(Sels[0]))
6693     return MatchOperand_ParseFail;
6694   if (0 > Sels[0] || 7 < Sels[0])
6695     return MatchOperand_ParseFail;
6696 
6697   for (size_t i = 1; i < 8; ++i) {
6698     if (getLexer().isNot(AsmToken::Comma))
6699       return MatchOperand_ParseFail;
6700 
6701     Parser.Lex();
6702     if (getParser().parseAbsoluteExpression(Sels[i]))
6703       return MatchOperand_ParseFail;
6704     if (0 > Sels[i] || 7 < Sels[i])
6705       return MatchOperand_ParseFail;
6706   }
6707 
6708   if (getLexer().isNot(AsmToken::RBrac))
6709     return MatchOperand_ParseFail;
6710   Parser.Lex();
6711 
6712   unsigned DPP8 = 0;
6713   for (size_t i = 0; i < 8; ++i)
6714     DPP8 |= (Sels[i] << (i * 3));
6715 
6716   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6717   return MatchOperand_Success;
6718 }
6719 
6720 OperandMatchResultTy
6721 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6722   using namespace AMDGPU::DPP;
6723 
6724   SMLoc S = Parser.getTok().getLoc();
6725   StringRef Prefix;
6726   int64_t Int;
6727 
6728   if (getLexer().getKind() == AsmToken::Identifier) {
6729     Prefix = Parser.getTok().getString();
6730   } else {
6731     return MatchOperand_NoMatch;
6732   }
6733 
6734   if (Prefix == "row_mirror") {
6735     Int = DppCtrl::ROW_MIRROR;
6736     Parser.Lex();
6737   } else if (Prefix == "row_half_mirror") {
6738     Int = DppCtrl::ROW_HALF_MIRROR;
6739     Parser.Lex();
6740   } else {
6741     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6742     if (Prefix != "quad_perm"
6743         && Prefix != "row_shl"
6744         && Prefix != "row_shr"
6745         && Prefix != "row_ror"
6746         && Prefix != "wave_shl"
6747         && Prefix != "wave_rol"
6748         && Prefix != "wave_shr"
6749         && Prefix != "wave_ror"
6750         && Prefix != "row_bcast"
6751         && Prefix != "row_share"
6752         && Prefix != "row_xmask") {
6753       return MatchOperand_NoMatch;
6754     }
6755 
6756     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6757       return MatchOperand_NoMatch;
6758 
6759     if (!isVI() && !isGFX9() &&
6760         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6761          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6762          Prefix == "row_bcast"))
6763       return MatchOperand_NoMatch;
6764 
6765     Parser.Lex();
6766     if (getLexer().isNot(AsmToken::Colon))
6767       return MatchOperand_ParseFail;
6768 
6769     if (Prefix == "quad_perm") {
6770       // quad_perm:[%d,%d,%d,%d]
6771       Parser.Lex();
6772       if (getLexer().isNot(AsmToken::LBrac))
6773         return MatchOperand_ParseFail;
6774       Parser.Lex();
6775 
6776       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6777         return MatchOperand_ParseFail;
6778 
6779       for (int i = 0; i < 3; ++i) {
6780         if (getLexer().isNot(AsmToken::Comma))
6781           return MatchOperand_ParseFail;
6782         Parser.Lex();
6783 
6784         int64_t Temp;
6785         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6786           return MatchOperand_ParseFail;
6787         const int shift = i*2 + 2;
6788         Int += (Temp << shift);
6789       }
6790 
6791       if (getLexer().isNot(AsmToken::RBrac))
6792         return MatchOperand_ParseFail;
6793       Parser.Lex();
6794     } else {
6795       // sel:%d
6796       Parser.Lex();
6797       if (getParser().parseAbsoluteExpression(Int))
6798         return MatchOperand_ParseFail;
6799 
6800       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6801         Int |= DppCtrl::ROW_SHL0;
6802       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6803         Int |= DppCtrl::ROW_SHR0;
6804       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6805         Int |= DppCtrl::ROW_ROR0;
6806       } else if (Prefix == "wave_shl" && 1 == Int) {
6807         Int = DppCtrl::WAVE_SHL1;
6808       } else if (Prefix == "wave_rol" && 1 == Int) {
6809         Int = DppCtrl::WAVE_ROL1;
6810       } else if (Prefix == "wave_shr" && 1 == Int) {
6811         Int = DppCtrl::WAVE_SHR1;
6812       } else if (Prefix == "wave_ror" && 1 == Int) {
6813         Int = DppCtrl::WAVE_ROR1;
6814       } else if (Prefix == "row_bcast") {
6815         if (Int == 15) {
6816           Int = DppCtrl::BCAST15;
6817         } else if (Int == 31) {
6818           Int = DppCtrl::BCAST31;
6819         } else {
6820           return MatchOperand_ParseFail;
6821         }
6822       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6823         Int |= DppCtrl::ROW_SHARE_FIRST;
6824       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6825         Int |= DppCtrl::ROW_XMASK_FIRST;
6826       } else {
6827         return MatchOperand_ParseFail;
6828       }
6829     }
6830   }
6831 
6832   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6833   return MatchOperand_Success;
6834 }
6835 
6836 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6837   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6838 }
6839 
6840 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6841   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6842 }
6843 
6844 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6845   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6846 }
6847 
6848 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6849   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6850 }
6851 
6852 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6853   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6854 }
6855 
6856 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6857   OptionalImmIndexMap OptionalIdx;
6858 
6859   unsigned I = 1;
6860   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6861   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6862     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6863   }
6864 
6865   int Fi = 0;
6866   for (unsigned E = Operands.size(); I != E; ++I) {
6867     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6868                                             MCOI::TIED_TO);
6869     if (TiedTo != -1) {
6870       assert((unsigned)TiedTo < Inst.getNumOperands());
6871       // handle tied old or src2 for MAC instructions
6872       Inst.addOperand(Inst.getOperand(TiedTo));
6873     }
6874     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6875     // Add the register arguments
6876     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6877       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6878       // Skip it.
6879       continue;
6880     }
6881 
6882     if (IsDPP8) {
6883       if (Op.isDPP8()) {
6884         Op.addImmOperands(Inst, 1);
6885       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6886         Op.addRegWithFPInputModsOperands(Inst, 2);
6887       } else if (Op.isFI()) {
6888         Fi = Op.getImm();
6889       } else if (Op.isReg()) {
6890         Op.addRegOperands(Inst, 1);
6891       } else {
6892         llvm_unreachable("Invalid operand type");
6893       }
6894     } else {
6895       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6896         Op.addRegWithFPInputModsOperands(Inst, 2);
6897       } else if (Op.isDPPCtrl()) {
6898         Op.addImmOperands(Inst, 1);
6899       } else if (Op.isImm()) {
6900         // Handle optional arguments
6901         OptionalIdx[Op.getImmTy()] = I;
6902       } else {
6903         llvm_unreachable("Invalid operand type");
6904       }
6905     }
6906   }
6907 
6908   if (IsDPP8) {
6909     using namespace llvm::AMDGPU::DPP;
6910     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6911   } else {
6912     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6913     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6914     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6915     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6916       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6917     }
6918   }
6919 }
6920 
6921 //===----------------------------------------------------------------------===//
6922 // sdwa
6923 //===----------------------------------------------------------------------===//
6924 
6925 OperandMatchResultTy
6926 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6927                               AMDGPUOperand::ImmTy Type) {
6928   using namespace llvm::AMDGPU::SDWA;
6929 
6930   SMLoc S = Parser.getTok().getLoc();
6931   StringRef Value;
6932   OperandMatchResultTy res;
6933 
6934   res = parseStringWithPrefix(Prefix, Value);
6935   if (res != MatchOperand_Success) {
6936     return res;
6937   }
6938 
6939   int64_t Int;
6940   Int = StringSwitch<int64_t>(Value)
6941         .Case("BYTE_0", SdwaSel::BYTE_0)
6942         .Case("BYTE_1", SdwaSel::BYTE_1)
6943         .Case("BYTE_2", SdwaSel::BYTE_2)
6944         .Case("BYTE_3", SdwaSel::BYTE_3)
6945         .Case("WORD_0", SdwaSel::WORD_0)
6946         .Case("WORD_1", SdwaSel::WORD_1)
6947         .Case("DWORD", SdwaSel::DWORD)
6948         .Default(0xffffffff);
6949   Parser.Lex(); // eat last token
6950 
6951   if (Int == 0xffffffff) {
6952     return MatchOperand_ParseFail;
6953   }
6954 
6955   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6956   return MatchOperand_Success;
6957 }
6958 
6959 OperandMatchResultTy
6960 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6961   using namespace llvm::AMDGPU::SDWA;
6962 
6963   SMLoc S = Parser.getTok().getLoc();
6964   StringRef Value;
6965   OperandMatchResultTy res;
6966 
6967   res = parseStringWithPrefix("dst_unused", Value);
6968   if (res != MatchOperand_Success) {
6969     return res;
6970   }
6971 
6972   int64_t Int;
6973   Int = StringSwitch<int64_t>(Value)
6974         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6975         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6976         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6977         .Default(0xffffffff);
6978   Parser.Lex(); // eat last token
6979 
6980   if (Int == 0xffffffff) {
6981     return MatchOperand_ParseFail;
6982   }
6983 
6984   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6985   return MatchOperand_Success;
6986 }
6987 
6988 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6989   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6990 }
6991 
6992 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6993   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6994 }
6995 
6996 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6997   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
6998 }
6999 
7000 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
7001   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
7002 }
7003 
7004 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
7005   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
7006 }
7007 
7008 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
7009                               uint64_t BasicInstType,
7010                               bool SkipDstVcc,
7011                               bool SkipSrcVcc) {
7012   using namespace llvm::AMDGPU::SDWA;
7013 
7014   OptionalImmIndexMap OptionalIdx;
7015   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
7016   bool SkippedVcc = false;
7017 
7018   unsigned I = 1;
7019   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7020   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7021     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7022   }
7023 
7024   for (unsigned E = Operands.size(); I != E; ++I) {
7025     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7026     if (SkipVcc && !SkippedVcc && Op.isReg() &&
7027         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7028       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7029       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7030       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7031       // Skip VCC only if we didn't skip it on previous iteration.
7032       // Note that src0 and src1 occupy 2 slots each because of modifiers.
7033       if (BasicInstType == SIInstrFlags::VOP2 &&
7034           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7035            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7036         SkippedVcc = true;
7037         continue;
7038       } else if (BasicInstType == SIInstrFlags::VOPC &&
7039                  Inst.getNumOperands() == 0) {
7040         SkippedVcc = true;
7041         continue;
7042       }
7043     }
7044     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7045       Op.addRegOrImmWithInputModsOperands(Inst, 2);
7046     } else if (Op.isImm()) {
7047       // Handle optional arguments
7048       OptionalIdx[Op.getImmTy()] = I;
7049     } else {
7050       llvm_unreachable("Invalid operand type");
7051     }
7052     SkippedVcc = false;
7053   }
7054 
7055   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7056       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7057       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7058     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7059     switch (BasicInstType) {
7060     case SIInstrFlags::VOP1:
7061       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7062       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7063         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7064       }
7065       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7066       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7067       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7068       break;
7069 
7070     case SIInstrFlags::VOP2:
7071       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7072       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7073         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7074       }
7075       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7076       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7077       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7078       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7079       break;
7080 
7081     case SIInstrFlags::VOPC:
7082       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7083         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7084       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7085       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7086       break;
7087 
7088     default:
7089       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7090     }
7091   }
7092 
7093   // special case v_mac_{f16, f32}:
7094   // it has src2 register operand that is tied to dst operand
7095   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7096       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7097     auto it = Inst.begin();
7098     std::advance(
7099       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7100     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7101   }
7102 }
7103 
7104 //===----------------------------------------------------------------------===//
7105 // mAI
7106 //===----------------------------------------------------------------------===//
7107 
7108 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7109   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7110 }
7111 
7112 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7113   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7114 }
7115 
7116 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7117   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7118 }
7119 
7120 /// Force static initialization.
7121 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7122   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7123   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7124 }
7125 
7126 #define GET_REGISTER_MATCHER
7127 #define GET_MATCHER_IMPLEMENTATION
7128 #define GET_MNEMONIC_SPELL_CHECKER
7129 #include "AMDGPUGenAsmMatcher.inc"
7130 
7131 // This fuction should be defined after auto-generated include so that we have
7132 // MatchClassKind enum defined
7133 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7134                                                      unsigned Kind) {
7135   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7136   // But MatchInstructionImpl() expects to meet token and fails to validate
7137   // operand. This method checks if we are given immediate operand but expect to
7138   // get corresponding token.
7139   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7140   switch (Kind) {
7141   case MCK_addr64:
7142     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7143   case MCK_gds:
7144     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7145   case MCK_lds:
7146     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7147   case MCK_glc:
7148     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7149   case MCK_idxen:
7150     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7151   case MCK_offen:
7152     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7153   case MCK_SSrcB32:
7154     // When operands have expression values, they will return true for isToken,
7155     // because it is not possible to distinguish between a token and an
7156     // expression at parse time. MatchInstructionImpl() will always try to
7157     // match an operand as a token, when isToken returns true, and when the
7158     // name of the expression is not a valid token, the match will fail,
7159     // so we need to handle it here.
7160     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7161   case MCK_SSrcF32:
7162     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7163   case MCK_SoppBrTarget:
7164     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7165   case MCK_VReg32OrOff:
7166     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7167   case MCK_InterpSlot:
7168     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7169   case MCK_Attr:
7170     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7171   case MCK_AttrChan:
7172     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7173   case MCK_ImmSMEMOffset:
7174     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
7175   case MCK_SReg_64:
7176   case MCK_SReg_64_XEXEC:
7177     // Null is defined as a 32-bit register but
7178     // it should also be enabled with 64-bit operands.
7179     // The following code enables it for SReg_64 operands
7180     // used as source and destination. Remaining source
7181     // operands are handled in isInlinableImm.
7182     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7183   default:
7184     return Match_InvalidOperand;
7185   }
7186 }
7187 
7188 //===----------------------------------------------------------------------===//
7189 // endpgm
7190 //===----------------------------------------------------------------------===//
7191 
7192 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7193   SMLoc S = Parser.getTok().getLoc();
7194   int64_t Imm = 0;
7195 
7196   if (!parseExpr(Imm)) {
7197     // The operand is optional, if not present default to 0
7198     Imm = 0;
7199   }
7200 
7201   if (!isUInt<16>(Imm)) {
7202     Error(S, "expected a 16-bit value");
7203     return MatchOperand_ParseFail;
7204   }
7205 
7206   Operands.push_back(
7207       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7208   return MatchOperand_Success;
7209 }
7210 
7211 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7212