1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyA16,
167     ImmTyLWE,
168     ImmTyExpTgt,
169     ImmTyExpCompr,
170     ImmTyExpVM,
171     ImmTyFORMAT,
172     ImmTyHwreg,
173     ImmTyOff,
174     ImmTySendMsg,
175     ImmTyInterpSlot,
176     ImmTyInterpAttr,
177     ImmTyAttrChan,
178     ImmTyOpSel,
179     ImmTyOpSelHi,
180     ImmTyNegLo,
181     ImmTyNegHi,
182     ImmTySwizzle,
183     ImmTyGprIdxMode,
184     ImmTyHigh,
185     ImmTyBLGP,
186     ImmTyCBSZ,
187     ImmTyABID,
188     ImmTyEndpgm,
189   };
190 
191 private:
192   struct TokOp {
193     const char *Data;
194     unsigned Length;
195   };
196 
197   struct ImmOp {
198     int64_t Val;
199     ImmTy Type;
200     bool IsFPImm;
201     Modifiers Mods;
202   };
203 
204   struct RegOp {
205     unsigned RegNo;
206     Modifiers Mods;
207   };
208 
209   union {
210     TokOp Tok;
211     ImmOp Imm;
212     RegOp Reg;
213     const MCExpr *Expr;
214   };
215 
216 public:
217   bool isToken() const override {
218     if (Kind == Token)
219       return true;
220 
221     // When parsing operands, we can't always tell if something was meant to be
222     // a token, like 'gds', or an expression that references a global variable.
223     // In this case, we assume the string is an expression, and if we need to
224     // interpret is a token, then we treat the symbol name as the token.
225     return isSymbolRefExpr();
226   }
227 
228   bool isSymbolRefExpr() const {
229     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230   }
231 
232   bool isImm() const override {
233     return Kind == Immediate;
234   }
235 
236   bool isInlinableImm(MVT type) const;
237   bool isLiteralImm(MVT type) const;
238 
239   bool isRegKind() const {
240     return Kind == Register;
241   }
242 
243   bool isReg() const override {
244     return isRegKind() && !hasModifiers();
245   }
246 
247   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
248     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
249   }
250 
251   bool isRegOrImmWithInt16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
253   }
254 
255   bool isRegOrImmWithInt32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
257   }
258 
259   bool isRegOrImmWithInt64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
261   }
262 
263   bool isRegOrImmWithFP16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
265   }
266 
267   bool isRegOrImmWithFP32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
269   }
270 
271   bool isRegOrImmWithFP64InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
273   }
274 
275   bool isVReg() const {
276     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
277            isRegClass(AMDGPU::VReg_64RegClassID) ||
278            isRegClass(AMDGPU::VReg_96RegClassID) ||
279            isRegClass(AMDGPU::VReg_128RegClassID) ||
280            isRegClass(AMDGPU::VReg_160RegClassID) ||
281            isRegClass(AMDGPU::VReg_192RegClassID) ||
282            isRegClass(AMDGPU::VReg_256RegClassID) ||
283            isRegClass(AMDGPU::VReg_512RegClassID) ||
284            isRegClass(AMDGPU::VReg_1024RegClassID);
285   }
286 
287   bool isVReg32() const {
288     return isRegClass(AMDGPU::VGPR_32RegClassID);
289   }
290 
291   bool isVReg32OrOff() const {
292     return isOff() || isVReg32();
293   }
294 
295   bool isNull() const {
296     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
297   }
298 
299   bool isSDWAOperand(MVT type) const;
300   bool isSDWAFP16Operand() const;
301   bool isSDWAFP32Operand() const;
302   bool isSDWAInt16Operand() const;
303   bool isSDWAInt32Operand() const;
304 
305   bool isImmTy(ImmTy ImmT) const {
306     return isImm() && Imm.Type == ImmT;
307   }
308 
309   bool isImmModifier() const {
310     return isImm() && Imm.Type != ImmTyNone;
311   }
312 
313   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
314   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
315   bool isDMask() const { return isImmTy(ImmTyDMask); }
316   bool isDim() const { return isImmTy(ImmTyDim); }
317   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
318   bool isDA() const { return isImmTy(ImmTyDA); }
319   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
320   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
321   bool isLWE() const { return isImmTy(ImmTyLWE); }
322   bool isOff() const { return isImmTy(ImmTyOff); }
323   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
324   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
325   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
326   bool isOffen() const { return isImmTy(ImmTyOffen); }
327   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
328   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
329   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
330   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
331   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
332 
333   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
334   bool isGDS() const { return isImmTy(ImmTyGDS); }
335   bool isLDS() const { return isImmTy(ImmTyLDS); }
336   bool isDLC() const { return isImmTy(ImmTyDLC); }
337   bool isGLC() const { return isImmTy(ImmTyGLC); }
338   bool isSLC() const { return isImmTy(ImmTySLC); }
339   bool isSWZ() const { return isImmTy(ImmTySWZ); }
340   bool isTFE() const { return isImmTy(ImmTyTFE); }
341   bool isD16() const { return isImmTy(ImmTyD16); }
342   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
343   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
344   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
345   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
346   bool isFI() const { return isImmTy(ImmTyDppFi); }
347   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
348   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
349   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
350   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
351   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
352   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
353   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
354   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
355   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
356   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
357   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
358   bool isHigh() const { return isImmTy(ImmTyHigh); }
359 
360   bool isMod() const {
361     return isClampSI() || isOModSI();
362   }
363 
364   bool isRegOrImm() const {
365     return isReg() || isImm();
366   }
367 
368   bool isRegClass(unsigned RCID) const;
369 
370   bool isInlineValue() const;
371 
372   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
373     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
374   }
375 
376   bool isSCSrcB16() const {
377     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
378   }
379 
380   bool isSCSrcV2B16() const {
381     return isSCSrcB16();
382   }
383 
384   bool isSCSrcB32() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
386   }
387 
388   bool isSCSrcB64() const {
389     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
390   }
391 
392   bool isBoolReg() const;
393 
394   bool isSCSrcF16() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
396   }
397 
398   bool isSCSrcV2F16() const {
399     return isSCSrcF16();
400   }
401 
402   bool isSCSrcF32() const {
403     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
404   }
405 
406   bool isSCSrcF64() const {
407     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
408   }
409 
410   bool isSSrcB32() const {
411     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
412   }
413 
414   bool isSSrcB16() const {
415     return isSCSrcB16() || isLiteralImm(MVT::i16);
416   }
417 
418   bool isSSrcV2B16() const {
419     llvm_unreachable("cannot happen");
420     return isSSrcB16();
421   }
422 
423   bool isSSrcB64() const {
424     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
425     // See isVSrc64().
426     return isSCSrcB64() || isLiteralImm(MVT::i64);
427   }
428 
429   bool isSSrcF32() const {
430     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
431   }
432 
433   bool isSSrcF64() const {
434     return isSCSrcB64() || isLiteralImm(MVT::f64);
435   }
436 
437   bool isSSrcF16() const {
438     return isSCSrcB16() || isLiteralImm(MVT::f16);
439   }
440 
441   bool isSSrcV2F16() const {
442     llvm_unreachable("cannot happen");
443     return isSSrcF16();
444   }
445 
446   bool isSSrcOrLdsB32() const {
447     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
448            isLiteralImm(MVT::i32) || isExpr();
449   }
450 
451   bool isVCSrcB32() const {
452     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
453   }
454 
455   bool isVCSrcB64() const {
456     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
457   }
458 
459   bool isVCSrcB16() const {
460     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
461   }
462 
463   bool isVCSrcV2B16() const {
464     return isVCSrcB16();
465   }
466 
467   bool isVCSrcF32() const {
468     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
469   }
470 
471   bool isVCSrcF64() const {
472     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
473   }
474 
475   bool isVCSrcF16() const {
476     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
477   }
478 
479   bool isVCSrcV2F16() const {
480     return isVCSrcF16();
481   }
482 
483   bool isVSrcB32() const {
484     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
485   }
486 
487   bool isVSrcB64() const {
488     return isVCSrcF64() || isLiteralImm(MVT::i64);
489   }
490 
491   bool isVSrcB16() const {
492     return isVCSrcF16() || isLiteralImm(MVT::i16);
493   }
494 
495   bool isVSrcV2B16() const {
496     return isVSrcB16() || isLiteralImm(MVT::v2i16);
497   }
498 
499   bool isVSrcF32() const {
500     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
501   }
502 
503   bool isVSrcF64() const {
504     return isVCSrcF64() || isLiteralImm(MVT::f64);
505   }
506 
507   bool isVSrcF16() const {
508     return isVCSrcF16() || isLiteralImm(MVT::f16);
509   }
510 
511   bool isVSrcV2F16() const {
512     return isVSrcF16() || isLiteralImm(MVT::v2f16);
513   }
514 
515   bool isVISrcB32() const {
516     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
517   }
518 
519   bool isVISrcB16() const {
520     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
521   }
522 
523   bool isVISrcV2B16() const {
524     return isVISrcB16();
525   }
526 
527   bool isVISrcF32() const {
528     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
529   }
530 
531   bool isVISrcF16() const {
532     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
533   }
534 
535   bool isVISrcV2F16() const {
536     return isVISrcF16() || isVISrcB32();
537   }
538 
539   bool isAISrcB32() const {
540     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
541   }
542 
543   bool isAISrcB16() const {
544     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
545   }
546 
547   bool isAISrcV2B16() const {
548     return isAISrcB16();
549   }
550 
551   bool isAISrcF32() const {
552     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
553   }
554 
555   bool isAISrcF16() const {
556     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
557   }
558 
559   bool isAISrcV2F16() const {
560     return isAISrcF16() || isAISrcB32();
561   }
562 
563   bool isAISrc_128B32() const {
564     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
565   }
566 
567   bool isAISrc_128B16() const {
568     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
569   }
570 
571   bool isAISrc_128V2B16() const {
572     return isAISrc_128B16();
573   }
574 
575   bool isAISrc_128F32() const {
576     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
577   }
578 
579   bool isAISrc_128F16() const {
580     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
581   }
582 
583   bool isAISrc_128V2F16() const {
584     return isAISrc_128F16() || isAISrc_128B32();
585   }
586 
587   bool isAISrc_512B32() const {
588     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
589   }
590 
591   bool isAISrc_512B16() const {
592     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
593   }
594 
595   bool isAISrc_512V2B16() const {
596     return isAISrc_512B16();
597   }
598 
599   bool isAISrc_512F32() const {
600     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
601   }
602 
603   bool isAISrc_512F16() const {
604     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
605   }
606 
607   bool isAISrc_512V2F16() const {
608     return isAISrc_512F16() || isAISrc_512B32();
609   }
610 
611   bool isAISrc_1024B32() const {
612     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
613   }
614 
615   bool isAISrc_1024B16() const {
616     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
617   }
618 
619   bool isAISrc_1024V2B16() const {
620     return isAISrc_1024B16();
621   }
622 
623   bool isAISrc_1024F32() const {
624     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
625   }
626 
627   bool isAISrc_1024F16() const {
628     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
629   }
630 
631   bool isAISrc_1024V2F16() const {
632     return isAISrc_1024F16() || isAISrc_1024B32();
633   }
634 
635   bool isKImmFP32() const {
636     return isLiteralImm(MVT::f32);
637   }
638 
639   bool isKImmFP16() const {
640     return isLiteralImm(MVT::f16);
641   }
642 
643   bool isMem() const override {
644     return false;
645   }
646 
647   bool isExpr() const {
648     return Kind == Expression;
649   }
650 
651   bool isSoppBrTarget() const {
652     return isExpr() || isImm();
653   }
654 
655   bool isSWaitCnt() const;
656   bool isHwreg() const;
657   bool isSendMsg() const;
658   bool isSwizzle() const;
659   bool isSMRDOffset8() const;
660   bool isSMEMOffset() const;
661   bool isSMRDLiteralOffset() const;
662   bool isDPP8() const;
663   bool isDPPCtrl() const;
664   bool isBLGP() const;
665   bool isCBSZ() const;
666   bool isABID() const;
667   bool isGPRIdxMode() const;
668   bool isS16Imm() const;
669   bool isU16Imm() const;
670   bool isEndpgm() const;
671 
672   StringRef getExpressionAsToken() const {
673     assert(isExpr());
674     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
675     return S->getSymbol().getName();
676   }
677 
678   StringRef getToken() const {
679     assert(isToken());
680 
681     if (Kind == Expression)
682       return getExpressionAsToken();
683 
684     return StringRef(Tok.Data, Tok.Length);
685   }
686 
687   int64_t getImm() const {
688     assert(isImm());
689     return Imm.Val;
690   }
691 
692   ImmTy getImmTy() const {
693     assert(isImm());
694     return Imm.Type;
695   }
696 
697   unsigned getReg() const override {
698     assert(isRegKind());
699     return Reg.RegNo;
700   }
701 
702   SMLoc getStartLoc() const override {
703     return StartLoc;
704   }
705 
706   SMLoc getEndLoc() const override {
707     return EndLoc;
708   }
709 
710   SMRange getLocRange() const {
711     return SMRange(StartLoc, EndLoc);
712   }
713 
714   Modifiers getModifiers() const {
715     assert(isRegKind() || isImmTy(ImmTyNone));
716     return isRegKind() ? Reg.Mods : Imm.Mods;
717   }
718 
719   void setModifiers(Modifiers Mods) {
720     assert(isRegKind() || isImmTy(ImmTyNone));
721     if (isRegKind())
722       Reg.Mods = Mods;
723     else
724       Imm.Mods = Mods;
725   }
726 
727   bool hasModifiers() const {
728     return getModifiers().hasModifiers();
729   }
730 
731   bool hasFPModifiers() const {
732     return getModifiers().hasFPModifiers();
733   }
734 
735   bool hasIntModifiers() const {
736     return getModifiers().hasIntModifiers();
737   }
738 
739   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
740 
741   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
742 
743   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
744 
745   template <unsigned Bitwidth>
746   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
747 
748   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
749     addKImmFPOperands<16>(Inst, N);
750   }
751 
752   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
753     addKImmFPOperands<32>(Inst, N);
754   }
755 
756   void addRegOperands(MCInst &Inst, unsigned N) const;
757 
758   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
759     addRegOperands(Inst, N);
760   }
761 
762   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
763     if (isRegKind())
764       addRegOperands(Inst, N);
765     else if (isExpr())
766       Inst.addOperand(MCOperand::createExpr(Expr));
767     else
768       addImmOperands(Inst, N);
769   }
770 
771   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
772     Modifiers Mods = getModifiers();
773     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
774     if (isRegKind()) {
775       addRegOperands(Inst, N);
776     } else {
777       addImmOperands(Inst, N, false);
778     }
779   }
780 
781   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
782     assert(!hasIntModifiers());
783     addRegOrImmWithInputModsOperands(Inst, N);
784   }
785 
786   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
787     assert(!hasFPModifiers());
788     addRegOrImmWithInputModsOperands(Inst, N);
789   }
790 
791   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
792     Modifiers Mods = getModifiers();
793     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
794     assert(isRegKind());
795     addRegOperands(Inst, N);
796   }
797 
798   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
799     assert(!hasIntModifiers());
800     addRegWithInputModsOperands(Inst, N);
801   }
802 
803   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
804     assert(!hasFPModifiers());
805     addRegWithInputModsOperands(Inst, N);
806   }
807 
808   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
809     if (isImm())
810       addImmOperands(Inst, N);
811     else {
812       assert(isExpr());
813       Inst.addOperand(MCOperand::createExpr(Expr));
814     }
815   }
816 
817   static void printImmTy(raw_ostream& OS, ImmTy Type) {
818     switch (Type) {
819     case ImmTyNone: OS << "None"; break;
820     case ImmTyGDS: OS << "GDS"; break;
821     case ImmTyLDS: OS << "LDS"; break;
822     case ImmTyOffen: OS << "Offen"; break;
823     case ImmTyIdxen: OS << "Idxen"; break;
824     case ImmTyAddr64: OS << "Addr64"; break;
825     case ImmTyOffset: OS << "Offset"; break;
826     case ImmTyInstOffset: OS << "InstOffset"; break;
827     case ImmTyOffset0: OS << "Offset0"; break;
828     case ImmTyOffset1: OS << "Offset1"; break;
829     case ImmTyDLC: OS << "DLC"; break;
830     case ImmTyGLC: OS << "GLC"; break;
831     case ImmTySLC: OS << "SLC"; break;
832     case ImmTySWZ: OS << "SWZ"; break;
833     case ImmTyTFE: OS << "TFE"; break;
834     case ImmTyD16: OS << "D16"; break;
835     case ImmTyFORMAT: OS << "FORMAT"; break;
836     case ImmTyClampSI: OS << "ClampSI"; break;
837     case ImmTyOModSI: OS << "OModSI"; break;
838     case ImmTyDPP8: OS << "DPP8"; break;
839     case ImmTyDppCtrl: OS << "DppCtrl"; break;
840     case ImmTyDppRowMask: OS << "DppRowMask"; break;
841     case ImmTyDppBankMask: OS << "DppBankMask"; break;
842     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
843     case ImmTyDppFi: OS << "FI"; break;
844     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
845     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
846     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
847     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
848     case ImmTyDMask: OS << "DMask"; break;
849     case ImmTyDim: OS << "Dim"; break;
850     case ImmTyUNorm: OS << "UNorm"; break;
851     case ImmTyDA: OS << "DA"; break;
852     case ImmTyR128A16: OS << "R128A16"; break;
853     case ImmTyA16: OS << "A16"; break;
854     case ImmTyLWE: OS << "LWE"; break;
855     case ImmTyOff: OS << "Off"; break;
856     case ImmTyExpTgt: OS << "ExpTgt"; break;
857     case ImmTyExpCompr: OS << "ExpCompr"; break;
858     case ImmTyExpVM: OS << "ExpVM"; break;
859     case ImmTyHwreg: OS << "Hwreg"; break;
860     case ImmTySendMsg: OS << "SendMsg"; break;
861     case ImmTyInterpSlot: OS << "InterpSlot"; break;
862     case ImmTyInterpAttr: OS << "InterpAttr"; break;
863     case ImmTyAttrChan: OS << "AttrChan"; break;
864     case ImmTyOpSel: OS << "OpSel"; break;
865     case ImmTyOpSelHi: OS << "OpSelHi"; break;
866     case ImmTyNegLo: OS << "NegLo"; break;
867     case ImmTyNegHi: OS << "NegHi"; break;
868     case ImmTySwizzle: OS << "Swizzle"; break;
869     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
870     case ImmTyHigh: OS << "High"; break;
871     case ImmTyBLGP: OS << "BLGP"; break;
872     case ImmTyCBSZ: OS << "CBSZ"; break;
873     case ImmTyABID: OS << "ABID"; break;
874     case ImmTyEndpgm: OS << "Endpgm"; break;
875     }
876   }
877 
878   void print(raw_ostream &OS) const override {
879     switch (Kind) {
880     case Register:
881       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
882       break;
883     case Immediate:
884       OS << '<' << getImm();
885       if (getImmTy() != ImmTyNone) {
886         OS << " type: "; printImmTy(OS, getImmTy());
887       }
888       OS << " mods: " << Imm.Mods << '>';
889       break;
890     case Token:
891       OS << '\'' << getToken() << '\'';
892       break;
893     case Expression:
894       OS << "<expr " << *Expr << '>';
895       break;
896     }
897   }
898 
899   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
900                                       int64_t Val, SMLoc Loc,
901                                       ImmTy Type = ImmTyNone,
902                                       bool IsFPImm = false) {
903     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
904     Op->Imm.Val = Val;
905     Op->Imm.IsFPImm = IsFPImm;
906     Op->Imm.Type = Type;
907     Op->Imm.Mods = Modifiers();
908     Op->StartLoc = Loc;
909     Op->EndLoc = Loc;
910     return Op;
911   }
912 
913   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
914                                         StringRef Str, SMLoc Loc,
915                                         bool HasExplicitEncodingSize = true) {
916     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
917     Res->Tok.Data = Str.data();
918     Res->Tok.Length = Str.size();
919     Res->StartLoc = Loc;
920     Res->EndLoc = Loc;
921     return Res;
922   }
923 
924   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
925                                       unsigned RegNo, SMLoc S,
926                                       SMLoc E) {
927     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
928     Op->Reg.RegNo = RegNo;
929     Op->Reg.Mods = Modifiers();
930     Op->StartLoc = S;
931     Op->EndLoc = E;
932     return Op;
933   }
934 
935   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
936                                        const class MCExpr *Expr, SMLoc S) {
937     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
938     Op->Expr = Expr;
939     Op->StartLoc = S;
940     Op->EndLoc = S;
941     return Op;
942   }
943 };
944 
945 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
946   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
947   return OS;
948 }
949 
950 //===----------------------------------------------------------------------===//
951 // AsmParser
952 //===----------------------------------------------------------------------===//
953 
954 // Holds info related to the current kernel, e.g. count of SGPRs used.
955 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
956 // .amdgpu_hsa_kernel or at EOF.
957 class KernelScopeInfo {
958   int SgprIndexUnusedMin = -1;
959   int VgprIndexUnusedMin = -1;
960   MCContext *Ctx = nullptr;
961 
962   void usesSgprAt(int i) {
963     if (i >= SgprIndexUnusedMin) {
964       SgprIndexUnusedMin = ++i;
965       if (Ctx) {
966         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
967         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
968       }
969     }
970   }
971 
972   void usesVgprAt(int i) {
973     if (i >= VgprIndexUnusedMin) {
974       VgprIndexUnusedMin = ++i;
975       if (Ctx) {
976         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
977         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
978       }
979     }
980   }
981 
982 public:
983   KernelScopeInfo() = default;
984 
985   void initialize(MCContext &Context) {
986     Ctx = &Context;
987     usesSgprAt(SgprIndexUnusedMin = -1);
988     usesVgprAt(VgprIndexUnusedMin = -1);
989   }
990 
991   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
992     switch (RegKind) {
993       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
994       case IS_AGPR: // fall through
995       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
996       default: break;
997     }
998   }
999 };
1000 
1001 class AMDGPUAsmParser : public MCTargetAsmParser {
1002   MCAsmParser &Parser;
1003 
1004   // Number of extra operands parsed after the first optional operand.
1005   // This may be necessary to skip hardcoded mandatory operands.
1006   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1007 
1008   unsigned ForcedEncodingSize = 0;
1009   bool ForcedDPP = false;
1010   bool ForcedSDWA = false;
1011   KernelScopeInfo KernelScope;
1012 
1013   /// @name Auto-generated Match Functions
1014   /// {
1015 
1016 #define GET_ASSEMBLER_HEADER
1017 #include "AMDGPUGenAsmMatcher.inc"
1018 
1019   /// }
1020 
1021 private:
1022   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1023   bool OutOfRangeError(SMRange Range);
1024   /// Calculate VGPR/SGPR blocks required for given target, reserved
1025   /// registers, and user-specified NextFreeXGPR values.
1026   ///
1027   /// \param Features [in] Target features, used for bug corrections.
1028   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1029   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1030   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1031   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1032   /// descriptor field, if valid.
1033   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1034   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1035   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1036   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1037   /// \param VGPRBlocks [out] Result VGPR block count.
1038   /// \param SGPRBlocks [out] Result SGPR block count.
1039   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1040                           bool FlatScrUsed, bool XNACKUsed,
1041                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1042                           SMRange VGPRRange, unsigned NextFreeSGPR,
1043                           SMRange SGPRRange, unsigned &VGPRBlocks,
1044                           unsigned &SGPRBlocks);
1045   bool ParseDirectiveAMDGCNTarget();
1046   bool ParseDirectiveAMDHSAKernel();
1047   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1048   bool ParseDirectiveHSACodeObjectVersion();
1049   bool ParseDirectiveHSACodeObjectISA();
1050   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1051   bool ParseDirectiveAMDKernelCodeT();
1052   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1053   bool ParseDirectiveAMDGPUHsaKernel();
1054 
1055   bool ParseDirectiveISAVersion();
1056   bool ParseDirectiveHSAMetadata();
1057   bool ParseDirectivePALMetadataBegin();
1058   bool ParseDirectivePALMetadata();
1059   bool ParseDirectiveAMDGPULDS();
1060 
1061   /// Common code to parse out a block of text (typically YAML) between start and
1062   /// end directives.
1063   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1064                            const char *AssemblerDirectiveEnd,
1065                            std::string &CollectString);
1066 
1067   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1068                              RegisterKind RegKind, unsigned Reg1);
1069   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1070                            unsigned &RegNum, unsigned &RegWidth,
1071                            bool RestoreOnFailure = false);
1072   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1073                            unsigned &RegNum, unsigned &RegWidth,
1074                            SmallVectorImpl<AsmToken> &Tokens);
1075   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1076                            unsigned &RegWidth,
1077                            SmallVectorImpl<AsmToken> &Tokens);
1078   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1079                            unsigned &RegWidth,
1080                            SmallVectorImpl<AsmToken> &Tokens);
1081   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1082                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1083   bool ParseRegRange(unsigned& Num, unsigned& Width);
1084   unsigned getRegularReg(RegisterKind RegKind,
1085                          unsigned RegNum,
1086                          unsigned RegWidth);
1087 
1088   bool isRegister();
1089   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1090   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1091   void initializeGprCountSymbol(RegisterKind RegKind);
1092   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1093                              unsigned RegWidth);
1094   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1095                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1096   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1097                  bool IsGdsHardcoded);
1098 
1099 public:
1100   enum AMDGPUMatchResultTy {
1101     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1102   };
1103   enum OperandMode {
1104     OperandMode_Default,
1105     OperandMode_NSA,
1106   };
1107 
1108   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1109 
1110   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1111                const MCInstrInfo &MII,
1112                const MCTargetOptions &Options)
1113       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1114     MCAsmParserExtension::Initialize(Parser);
1115 
1116     if (getFeatureBits().none()) {
1117       // Set default features.
1118       copySTI().ToggleFeature("southern-islands");
1119     }
1120 
1121     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1122 
1123     {
1124       // TODO: make those pre-defined variables read-only.
1125       // Currently there is none suitable machinery in the core llvm-mc for this.
1126       // MCSymbol::isRedefinable is intended for another purpose, and
1127       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1128       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1129       MCContext &Ctx = getContext();
1130       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1131         MCSymbol *Sym =
1132             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1133         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1134         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1135         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1136         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1137         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1138       } else {
1139         MCSymbol *Sym =
1140             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1141         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1142         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1143         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1144         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1145         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1146       }
1147       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1148         initializeGprCountSymbol(IS_VGPR);
1149         initializeGprCountSymbol(IS_SGPR);
1150       } else
1151         KernelScope.initialize(getContext());
1152     }
1153   }
1154 
1155   bool hasXNACK() const {
1156     return AMDGPU::hasXNACK(getSTI());
1157   }
1158 
1159   bool hasMIMG_R128() const {
1160     return AMDGPU::hasMIMG_R128(getSTI());
1161   }
1162 
1163   bool hasPackedD16() const {
1164     return AMDGPU::hasPackedD16(getSTI());
1165   }
1166 
1167   bool hasGFX10A16() const {
1168     return AMDGPU::hasGFX10A16(getSTI());
1169   }
1170 
1171   bool isSI() const {
1172     return AMDGPU::isSI(getSTI());
1173   }
1174 
1175   bool isCI() const {
1176     return AMDGPU::isCI(getSTI());
1177   }
1178 
1179   bool isVI() const {
1180     return AMDGPU::isVI(getSTI());
1181   }
1182 
1183   bool isGFX9() const {
1184     return AMDGPU::isGFX9(getSTI());
1185   }
1186 
1187   bool isGFX10() const {
1188     return AMDGPU::isGFX10(getSTI());
1189   }
1190 
1191   bool hasInv2PiInlineImm() const {
1192     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1193   }
1194 
1195   bool hasFlatOffsets() const {
1196     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1197   }
1198 
1199   bool hasSGPR102_SGPR103() const {
1200     return !isVI() && !isGFX9();
1201   }
1202 
1203   bool hasSGPR104_SGPR105() const {
1204     return isGFX10();
1205   }
1206 
1207   bool hasIntClamp() const {
1208     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1209   }
1210 
1211   AMDGPUTargetStreamer &getTargetStreamer() {
1212     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1213     return static_cast<AMDGPUTargetStreamer &>(TS);
1214   }
1215 
1216   const MCRegisterInfo *getMRI() const {
1217     // We need this const_cast because for some reason getContext() is not const
1218     // in MCAsmParser.
1219     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1220   }
1221 
1222   const MCInstrInfo *getMII() const {
1223     return &MII;
1224   }
1225 
1226   const FeatureBitset &getFeatureBits() const {
1227     return getSTI().getFeatureBits();
1228   }
1229 
1230   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1231   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1232   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1233 
1234   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1235   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1236   bool isForcedDPP() const { return ForcedDPP; }
1237   bool isForcedSDWA() const { return ForcedSDWA; }
1238   ArrayRef<unsigned> getMatchedVariants() const;
1239 
1240   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1241   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1242                      bool RestoreOnFailure);
1243   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1244   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1245                                         SMLoc &EndLoc) override;
1246   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1247   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1248                                       unsigned Kind) override;
1249   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1250                                OperandVector &Operands, MCStreamer &Out,
1251                                uint64_t &ErrorInfo,
1252                                bool MatchingInlineAsm) override;
1253   bool ParseDirective(AsmToken DirectiveID) override;
1254   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1255                                     OperandMode Mode = OperandMode_Default);
1256   StringRef parseMnemonicSuffix(StringRef Name);
1257   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1258                         SMLoc NameLoc, OperandVector &Operands) override;
1259   //bool ProcessInstruction(MCInst &Inst);
1260 
1261   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1262 
1263   OperandMatchResultTy
1264   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1265                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1266                      bool (*ConvertResult)(int64_t &) = nullptr);
1267 
1268   OperandMatchResultTy
1269   parseOperandArrayWithPrefix(const char *Prefix,
1270                               OperandVector &Operands,
1271                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1272                               bool (*ConvertResult)(int64_t&) = nullptr);
1273 
1274   OperandMatchResultTy
1275   parseNamedBit(const char *Name, OperandVector &Operands,
1276                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1277   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1278                                              StringRef &Value);
1279 
1280   bool isModifier();
1281   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1282   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1283   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1284   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1285   bool parseSP3NegModifier();
1286   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1287   OperandMatchResultTy parseReg(OperandVector &Operands);
1288   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1289   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1290   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1291   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1292   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1293   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1294   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1295 
1296   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1297   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1298   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1299   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1300 
1301   bool parseCnt(int64_t &IntVal);
1302   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1303   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1304 
1305 private:
1306   struct OperandInfoTy {
1307     int64_t Id;
1308     bool IsSymbolic = false;
1309     bool IsDefined = false;
1310 
1311     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1312   };
1313 
1314   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1315   bool validateSendMsg(const OperandInfoTy &Msg,
1316                        const OperandInfoTy &Op,
1317                        const OperandInfoTy &Stream,
1318                        const SMLoc Loc);
1319 
1320   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1321   bool validateHwreg(const OperandInfoTy &HwReg,
1322                      const int64_t Offset,
1323                      const int64_t Width,
1324                      const SMLoc Loc);
1325 
1326   void errorExpTgt();
1327   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1328   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1329   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1330 
1331   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1332   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1333   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1334   bool validateSOPLiteral(const MCInst &Inst) const;
1335   bool validateConstantBusLimitations(const MCInst &Inst);
1336   bool validateEarlyClobberLimitations(const MCInst &Inst);
1337   bool validateIntClampSupported(const MCInst &Inst);
1338   bool validateMIMGAtomicDMask(const MCInst &Inst);
1339   bool validateMIMGGatherDMask(const MCInst &Inst);
1340   bool validateMovrels(const MCInst &Inst);
1341   bool validateMIMGDataSize(const MCInst &Inst);
1342   bool validateMIMGAddrSize(const MCInst &Inst);
1343   bool validateMIMGD16(const MCInst &Inst);
1344   bool validateMIMGDim(const MCInst &Inst);
1345   bool validateLdsDirect(const MCInst &Inst);
1346   bool validateOpSel(const MCInst &Inst);
1347   bool validateVccOperand(unsigned Reg) const;
1348   bool validateVOP3Literal(const MCInst &Inst) const;
1349   unsigned getConstantBusLimit(unsigned Opcode) const;
1350   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1351   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1352   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1353 
1354   bool isId(const StringRef Id) const;
1355   bool isId(const AsmToken &Token, const StringRef Id) const;
1356   bool isToken(const AsmToken::TokenKind Kind) const;
1357   bool trySkipId(const StringRef Id);
1358   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1359   bool trySkipToken(const AsmToken::TokenKind Kind);
1360   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1361   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1362   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1363   AsmToken::TokenKind getTokenKind() const;
1364   bool parseExpr(int64_t &Imm);
1365   bool parseExpr(OperandVector &Operands);
1366   StringRef getTokenStr() const;
1367   AsmToken peekToken();
1368   AsmToken getToken() const;
1369   SMLoc getLoc() const;
1370   void lex();
1371 
1372 public:
1373   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1374   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1375 
1376   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1377   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1378   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1379   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1380   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1381   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1382 
1383   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1384                             const unsigned MinVal,
1385                             const unsigned MaxVal,
1386                             const StringRef ErrMsg);
1387   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1388   bool parseSwizzleOffset(int64_t &Imm);
1389   bool parseSwizzleMacro(int64_t &Imm);
1390   bool parseSwizzleQuadPerm(int64_t &Imm);
1391   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1392   bool parseSwizzleBroadcast(int64_t &Imm);
1393   bool parseSwizzleSwap(int64_t &Imm);
1394   bool parseSwizzleReverse(int64_t &Imm);
1395 
1396   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1397   int64_t parseGPRIdxMacro();
1398 
1399   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1400   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1401   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1402   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1403   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1404 
1405   AMDGPUOperand::Ptr defaultDLC() const;
1406   AMDGPUOperand::Ptr defaultGLC() const;
1407   AMDGPUOperand::Ptr defaultSLC() const;
1408 
1409   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1410   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1411   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1412   AMDGPUOperand::Ptr defaultFlatOffset() const;
1413 
1414   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1415 
1416   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1417                OptionalImmIndexMap &OptionalIdx);
1418   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1419   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1420   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1421 
1422   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1423 
1424   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1425                bool IsAtomic = false);
1426   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1427 
1428   OperandMatchResultTy parseDim(OperandVector &Operands);
1429   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1430   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1431   AMDGPUOperand::Ptr defaultRowMask() const;
1432   AMDGPUOperand::Ptr defaultBankMask() const;
1433   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1434   AMDGPUOperand::Ptr defaultFI() const;
1435   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1436   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1437 
1438   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1439                                     AMDGPUOperand::ImmTy Type);
1440   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1441   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1442   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1443   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1444   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1445   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1446   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1447                uint64_t BasicInstType,
1448                bool SkipDstVcc = false,
1449                bool SkipSrcVcc = false);
1450 
1451   AMDGPUOperand::Ptr defaultBLGP() const;
1452   AMDGPUOperand::Ptr defaultCBSZ() const;
1453   AMDGPUOperand::Ptr defaultABID() const;
1454 
1455   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1456   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1457 };
1458 
1459 struct OptionalOperand {
1460   const char *Name;
1461   AMDGPUOperand::ImmTy Type;
1462   bool IsBit;
1463   bool (*ConvertResult)(int64_t&);
1464 };
1465 
1466 } // end anonymous namespace
1467 
1468 // May be called with integer type with equivalent bitwidth.
1469 static const fltSemantics *getFltSemantics(unsigned Size) {
1470   switch (Size) {
1471   case 4:
1472     return &APFloat::IEEEsingle();
1473   case 8:
1474     return &APFloat::IEEEdouble();
1475   case 2:
1476     return &APFloat::IEEEhalf();
1477   default:
1478     llvm_unreachable("unsupported fp type");
1479   }
1480 }
1481 
1482 static const fltSemantics *getFltSemantics(MVT VT) {
1483   return getFltSemantics(VT.getSizeInBits() / 8);
1484 }
1485 
1486 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1487   switch (OperandType) {
1488   case AMDGPU::OPERAND_REG_IMM_INT32:
1489   case AMDGPU::OPERAND_REG_IMM_FP32:
1490   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1491   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1492   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1493   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1494     return &APFloat::IEEEsingle();
1495   case AMDGPU::OPERAND_REG_IMM_INT64:
1496   case AMDGPU::OPERAND_REG_IMM_FP64:
1497   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1498   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1499     return &APFloat::IEEEdouble();
1500   case AMDGPU::OPERAND_REG_IMM_INT16:
1501   case AMDGPU::OPERAND_REG_IMM_FP16:
1502   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1503   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1504   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1505   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1506   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1507   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1508   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1509   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1510   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1511   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1512     return &APFloat::IEEEhalf();
1513   default:
1514     llvm_unreachable("unsupported fp type");
1515   }
1516 }
1517 
1518 //===----------------------------------------------------------------------===//
1519 // Operand
1520 //===----------------------------------------------------------------------===//
1521 
1522 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1523   bool Lost;
1524 
1525   // Convert literal to single precision
1526   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1527                                                APFloat::rmNearestTiesToEven,
1528                                                &Lost);
1529   // We allow precision lost but not overflow or underflow
1530   if (Status != APFloat::opOK &&
1531       Lost &&
1532       ((Status & APFloat::opOverflow)  != 0 ||
1533        (Status & APFloat::opUnderflow) != 0)) {
1534     return false;
1535   }
1536 
1537   return true;
1538 }
1539 
1540 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1541   return isUIntN(Size, Val) || isIntN(Size, Val);
1542 }
1543 
1544 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1545 
1546   // This is a hack to enable named inline values like
1547   // shared_base with both 32-bit and 64-bit operands.
1548   // Note that these values are defined as
1549   // 32-bit operands only.
1550   if (isInlineValue()) {
1551     return true;
1552   }
1553 
1554   if (!isImmTy(ImmTyNone)) {
1555     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1556     return false;
1557   }
1558   // TODO: We should avoid using host float here. It would be better to
1559   // check the float bit values which is what a few other places do.
1560   // We've had bot failures before due to weird NaN support on mips hosts.
1561 
1562   APInt Literal(64, Imm.Val);
1563 
1564   if (Imm.IsFPImm) { // We got fp literal token
1565     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1566       return AMDGPU::isInlinableLiteral64(Imm.Val,
1567                                           AsmParser->hasInv2PiInlineImm());
1568     }
1569 
1570     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1571     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1572       return false;
1573 
1574     if (type.getScalarSizeInBits() == 16) {
1575       return AMDGPU::isInlinableLiteral16(
1576         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1577         AsmParser->hasInv2PiInlineImm());
1578     }
1579 
1580     // Check if single precision literal is inlinable
1581     return AMDGPU::isInlinableLiteral32(
1582       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1583       AsmParser->hasInv2PiInlineImm());
1584   }
1585 
1586   // We got int literal token.
1587   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1588     return AMDGPU::isInlinableLiteral64(Imm.Val,
1589                                         AsmParser->hasInv2PiInlineImm());
1590   }
1591 
1592   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1593     return false;
1594   }
1595 
1596   if (type.getScalarSizeInBits() == 16) {
1597     return AMDGPU::isInlinableLiteral16(
1598       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1599       AsmParser->hasInv2PiInlineImm());
1600   }
1601 
1602   return AMDGPU::isInlinableLiteral32(
1603     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1604     AsmParser->hasInv2PiInlineImm());
1605 }
1606 
1607 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1608   // Check that this immediate can be added as literal
1609   if (!isImmTy(ImmTyNone)) {
1610     return false;
1611   }
1612 
1613   if (!Imm.IsFPImm) {
1614     // We got int literal token.
1615 
1616     if (type == MVT::f64 && hasFPModifiers()) {
1617       // Cannot apply fp modifiers to int literals preserving the same semantics
1618       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1619       // disable these cases.
1620       return false;
1621     }
1622 
1623     unsigned Size = type.getSizeInBits();
1624     if (Size == 64)
1625       Size = 32;
1626 
1627     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1628     // types.
1629     return isSafeTruncation(Imm.Val, Size);
1630   }
1631 
1632   // We got fp literal token
1633   if (type == MVT::f64) { // Expected 64-bit fp operand
1634     // We would set low 64-bits of literal to zeroes but we accept this literals
1635     return true;
1636   }
1637 
1638   if (type == MVT::i64) { // Expected 64-bit int operand
1639     // We don't allow fp literals in 64-bit integer instructions. It is
1640     // unclear how we should encode them.
1641     return false;
1642   }
1643 
1644   // We allow fp literals with f16x2 operands assuming that the specified
1645   // literal goes into the lower half and the upper half is zero. We also
1646   // require that the literal may be losslesly converted to f16.
1647   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1648                      (type == MVT::v2i16)? MVT::i16 : type;
1649 
1650   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1651   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1652 }
1653 
1654 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1655   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1656 }
1657 
1658 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1659   if (AsmParser->isVI())
1660     return isVReg32();
1661   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1662     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1663   else
1664     return false;
1665 }
1666 
1667 bool AMDGPUOperand::isSDWAFP16Operand() const {
1668   return isSDWAOperand(MVT::f16);
1669 }
1670 
1671 bool AMDGPUOperand::isSDWAFP32Operand() const {
1672   return isSDWAOperand(MVT::f32);
1673 }
1674 
1675 bool AMDGPUOperand::isSDWAInt16Operand() const {
1676   return isSDWAOperand(MVT::i16);
1677 }
1678 
1679 bool AMDGPUOperand::isSDWAInt32Operand() const {
1680   return isSDWAOperand(MVT::i32);
1681 }
1682 
1683 bool AMDGPUOperand::isBoolReg() const {
1684   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1685          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1686 }
1687 
1688 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1689 {
1690   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1691   assert(Size == 2 || Size == 4 || Size == 8);
1692 
1693   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1694 
1695   if (Imm.Mods.Abs) {
1696     Val &= ~FpSignMask;
1697   }
1698   if (Imm.Mods.Neg) {
1699     Val ^= FpSignMask;
1700   }
1701 
1702   return Val;
1703 }
1704 
1705 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1706   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1707                              Inst.getNumOperands())) {
1708     addLiteralImmOperand(Inst, Imm.Val,
1709                          ApplyModifiers &
1710                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1711   } else {
1712     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1713     Inst.addOperand(MCOperand::createImm(Imm.Val));
1714   }
1715 }
1716 
1717 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1718   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1719   auto OpNum = Inst.getNumOperands();
1720   // Check that this operand accepts literals
1721   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1722 
1723   if (ApplyModifiers) {
1724     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1725     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1726     Val = applyInputFPModifiers(Val, Size);
1727   }
1728 
1729   APInt Literal(64, Val);
1730   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1731 
1732   if (Imm.IsFPImm) { // We got fp literal token
1733     switch (OpTy) {
1734     case AMDGPU::OPERAND_REG_IMM_INT64:
1735     case AMDGPU::OPERAND_REG_IMM_FP64:
1736     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1737     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1738       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1739                                        AsmParser->hasInv2PiInlineImm())) {
1740         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1741         return;
1742       }
1743 
1744       // Non-inlineable
1745       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1746         // For fp operands we check if low 32 bits are zeros
1747         if (Literal.getLoBits(32) != 0) {
1748           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1749           "Can't encode literal as exact 64-bit floating-point operand. "
1750           "Low 32-bits will be set to zero");
1751         }
1752 
1753         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1754         return;
1755       }
1756 
1757       // We don't allow fp literals in 64-bit integer instructions. It is
1758       // unclear how we should encode them. This case should be checked earlier
1759       // in predicate methods (isLiteralImm())
1760       llvm_unreachable("fp literal in 64-bit integer instruction.");
1761 
1762     case AMDGPU::OPERAND_REG_IMM_INT32:
1763     case AMDGPU::OPERAND_REG_IMM_FP32:
1764     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1765     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1766     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1767     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1768     case AMDGPU::OPERAND_REG_IMM_INT16:
1769     case AMDGPU::OPERAND_REG_IMM_FP16:
1770     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1771     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1772     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1773     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1774     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1775     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1776     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1777     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1778     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1779     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1780       bool lost;
1781       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1782       // Convert literal to single precision
1783       FPLiteral.convert(*getOpFltSemantics(OpTy),
1784                         APFloat::rmNearestTiesToEven, &lost);
1785       // We allow precision lost but not overflow or underflow. This should be
1786       // checked earlier in isLiteralImm()
1787 
1788       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1789       Inst.addOperand(MCOperand::createImm(ImmVal));
1790       return;
1791     }
1792     default:
1793       llvm_unreachable("invalid operand size");
1794     }
1795 
1796     return;
1797   }
1798 
1799   // We got int literal token.
1800   // Only sign extend inline immediates.
1801   switch (OpTy) {
1802   case AMDGPU::OPERAND_REG_IMM_INT32:
1803   case AMDGPU::OPERAND_REG_IMM_FP32:
1804   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1805   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1806   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1807   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1808   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1809   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1810     if (isSafeTruncation(Val, 32) &&
1811         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1812                                      AsmParser->hasInv2PiInlineImm())) {
1813       Inst.addOperand(MCOperand::createImm(Val));
1814       return;
1815     }
1816 
1817     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1818     return;
1819 
1820   case AMDGPU::OPERAND_REG_IMM_INT64:
1821   case AMDGPU::OPERAND_REG_IMM_FP64:
1822   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1823   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1824     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1825       Inst.addOperand(MCOperand::createImm(Val));
1826       return;
1827     }
1828 
1829     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1830     return;
1831 
1832   case AMDGPU::OPERAND_REG_IMM_INT16:
1833   case AMDGPU::OPERAND_REG_IMM_FP16:
1834   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1835   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1836   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1837   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1838     if (isSafeTruncation(Val, 16) &&
1839         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1840                                      AsmParser->hasInv2PiInlineImm())) {
1841       Inst.addOperand(MCOperand::createImm(Val));
1842       return;
1843     }
1844 
1845     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1846     return;
1847 
1848   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1849   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1850   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1851   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1852     assert(isSafeTruncation(Val, 16));
1853     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1854                                         AsmParser->hasInv2PiInlineImm()));
1855 
1856     Inst.addOperand(MCOperand::createImm(Val));
1857     return;
1858   }
1859   default:
1860     llvm_unreachable("invalid operand size");
1861   }
1862 }
1863 
1864 template <unsigned Bitwidth>
1865 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1866   APInt Literal(64, Imm.Val);
1867 
1868   if (!Imm.IsFPImm) {
1869     // We got int literal token.
1870     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1871     return;
1872   }
1873 
1874   bool Lost;
1875   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1876   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1877                     APFloat::rmNearestTiesToEven, &Lost);
1878   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1879 }
1880 
1881 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1882   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1883 }
1884 
1885 static bool isInlineValue(unsigned Reg) {
1886   switch (Reg) {
1887   case AMDGPU::SRC_SHARED_BASE:
1888   case AMDGPU::SRC_SHARED_LIMIT:
1889   case AMDGPU::SRC_PRIVATE_BASE:
1890   case AMDGPU::SRC_PRIVATE_LIMIT:
1891   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1892     return true;
1893   case AMDGPU::SRC_VCCZ:
1894   case AMDGPU::SRC_EXECZ:
1895   case AMDGPU::SRC_SCC:
1896     return true;
1897   case AMDGPU::SGPR_NULL:
1898     return true;
1899   default:
1900     return false;
1901   }
1902 }
1903 
1904 bool AMDGPUOperand::isInlineValue() const {
1905   return isRegKind() && ::isInlineValue(getReg());
1906 }
1907 
1908 //===----------------------------------------------------------------------===//
1909 // AsmParser
1910 //===----------------------------------------------------------------------===//
1911 
1912 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1913   if (Is == IS_VGPR) {
1914     switch (RegWidth) {
1915       default: return -1;
1916       case 1: return AMDGPU::VGPR_32RegClassID;
1917       case 2: return AMDGPU::VReg_64RegClassID;
1918       case 3: return AMDGPU::VReg_96RegClassID;
1919       case 4: return AMDGPU::VReg_128RegClassID;
1920       case 5: return AMDGPU::VReg_160RegClassID;
1921       case 6: return AMDGPU::VReg_192RegClassID;
1922       case 8: return AMDGPU::VReg_256RegClassID;
1923       case 16: return AMDGPU::VReg_512RegClassID;
1924       case 32: return AMDGPU::VReg_1024RegClassID;
1925     }
1926   } else if (Is == IS_TTMP) {
1927     switch (RegWidth) {
1928       default: return -1;
1929       case 1: return AMDGPU::TTMP_32RegClassID;
1930       case 2: return AMDGPU::TTMP_64RegClassID;
1931       case 4: return AMDGPU::TTMP_128RegClassID;
1932       case 8: return AMDGPU::TTMP_256RegClassID;
1933       case 16: return AMDGPU::TTMP_512RegClassID;
1934     }
1935   } else if (Is == IS_SGPR) {
1936     switch (RegWidth) {
1937       default: return -1;
1938       case 1: return AMDGPU::SGPR_32RegClassID;
1939       case 2: return AMDGPU::SGPR_64RegClassID;
1940       case 3: return AMDGPU::SGPR_96RegClassID;
1941       case 4: return AMDGPU::SGPR_128RegClassID;
1942       case 5: return AMDGPU::SGPR_160RegClassID;
1943       case 6: return AMDGPU::SGPR_192RegClassID;
1944       case 8: return AMDGPU::SGPR_256RegClassID;
1945       case 16: return AMDGPU::SGPR_512RegClassID;
1946     }
1947   } else if (Is == IS_AGPR) {
1948     switch (RegWidth) {
1949       default: return -1;
1950       case 1: return AMDGPU::AGPR_32RegClassID;
1951       case 2: return AMDGPU::AReg_64RegClassID;
1952       case 3: return AMDGPU::AReg_96RegClassID;
1953       case 4: return AMDGPU::AReg_128RegClassID;
1954       case 5: return AMDGPU::AReg_160RegClassID;
1955       case 6: return AMDGPU::AReg_192RegClassID;
1956       case 8: return AMDGPU::AReg_256RegClassID;
1957       case 16: return AMDGPU::AReg_512RegClassID;
1958       case 32: return AMDGPU::AReg_1024RegClassID;
1959     }
1960   }
1961   return -1;
1962 }
1963 
1964 static unsigned getSpecialRegForName(StringRef RegName) {
1965   return StringSwitch<unsigned>(RegName)
1966     .Case("exec", AMDGPU::EXEC)
1967     .Case("vcc", AMDGPU::VCC)
1968     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1969     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1970     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1971     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1972     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1973     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1974     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1975     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1976     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1977     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1978     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1979     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1980     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1981     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1982     .Case("m0", AMDGPU::M0)
1983     .Case("vccz", AMDGPU::SRC_VCCZ)
1984     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1985     .Case("execz", AMDGPU::SRC_EXECZ)
1986     .Case("src_execz", AMDGPU::SRC_EXECZ)
1987     .Case("scc", AMDGPU::SRC_SCC)
1988     .Case("src_scc", AMDGPU::SRC_SCC)
1989     .Case("tba", AMDGPU::TBA)
1990     .Case("tma", AMDGPU::TMA)
1991     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1992     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1993     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1994     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1995     .Case("vcc_lo", AMDGPU::VCC_LO)
1996     .Case("vcc_hi", AMDGPU::VCC_HI)
1997     .Case("exec_lo", AMDGPU::EXEC_LO)
1998     .Case("exec_hi", AMDGPU::EXEC_HI)
1999     .Case("tma_lo", AMDGPU::TMA_LO)
2000     .Case("tma_hi", AMDGPU::TMA_HI)
2001     .Case("tba_lo", AMDGPU::TBA_LO)
2002     .Case("tba_hi", AMDGPU::TBA_HI)
2003     .Case("pc", AMDGPU::PC_REG)
2004     .Case("null", AMDGPU::SGPR_NULL)
2005     .Default(AMDGPU::NoRegister);
2006 }
2007 
2008 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2009                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2010   auto R = parseRegister();
2011   if (!R) return true;
2012   assert(R->isReg());
2013   RegNo = R->getReg();
2014   StartLoc = R->getStartLoc();
2015   EndLoc = R->getEndLoc();
2016   return false;
2017 }
2018 
2019 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2020                                     SMLoc &EndLoc) {
2021   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2022 }
2023 
2024 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2025                                                        SMLoc &StartLoc,
2026                                                        SMLoc &EndLoc) {
2027   bool Result =
2028       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2029   bool PendingErrors = getParser().hasPendingError();
2030   getParser().clearPendingErrors();
2031   if (PendingErrors)
2032     return MatchOperand_ParseFail;
2033   if (Result)
2034     return MatchOperand_NoMatch;
2035   return MatchOperand_Success;
2036 }
2037 
2038 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2039                                             RegisterKind RegKind, unsigned Reg1) {
2040   switch (RegKind) {
2041   case IS_SPECIAL:
2042     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2043       Reg = AMDGPU::EXEC;
2044       RegWidth = 2;
2045       return true;
2046     }
2047     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2048       Reg = AMDGPU::FLAT_SCR;
2049       RegWidth = 2;
2050       return true;
2051     }
2052     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2053       Reg = AMDGPU::XNACK_MASK;
2054       RegWidth = 2;
2055       return true;
2056     }
2057     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2058       Reg = AMDGPU::VCC;
2059       RegWidth = 2;
2060       return true;
2061     }
2062     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2063       Reg = AMDGPU::TBA;
2064       RegWidth = 2;
2065       return true;
2066     }
2067     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2068       Reg = AMDGPU::TMA;
2069       RegWidth = 2;
2070       return true;
2071     }
2072     return false;
2073   case IS_VGPR:
2074   case IS_SGPR:
2075   case IS_AGPR:
2076   case IS_TTMP:
2077     if (Reg1 != Reg + RegWidth) {
2078       return false;
2079     }
2080     RegWidth++;
2081     return true;
2082   default:
2083     llvm_unreachable("unexpected register kind");
2084   }
2085 }
2086 
2087 struct RegInfo {
2088   StringLiteral Name;
2089   RegisterKind Kind;
2090 };
2091 
2092 static constexpr RegInfo RegularRegisters[] = {
2093   {{"v"},    IS_VGPR},
2094   {{"s"},    IS_SGPR},
2095   {{"ttmp"}, IS_TTMP},
2096   {{"acc"},  IS_AGPR},
2097   {{"a"},    IS_AGPR},
2098 };
2099 
2100 static bool isRegularReg(RegisterKind Kind) {
2101   return Kind == IS_VGPR ||
2102          Kind == IS_SGPR ||
2103          Kind == IS_TTMP ||
2104          Kind == IS_AGPR;
2105 }
2106 
2107 static const RegInfo* getRegularRegInfo(StringRef Str) {
2108   for (const RegInfo &Reg : RegularRegisters)
2109     if (Str.startswith(Reg.Name))
2110       return &Reg;
2111   return nullptr;
2112 }
2113 
2114 static bool getRegNum(StringRef Str, unsigned& Num) {
2115   return !Str.getAsInteger(10, Num);
2116 }
2117 
2118 bool
2119 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2120                             const AsmToken &NextToken) const {
2121 
2122   // A list of consecutive registers: [s0,s1,s2,s3]
2123   if (Token.is(AsmToken::LBrac))
2124     return true;
2125 
2126   if (!Token.is(AsmToken::Identifier))
2127     return false;
2128 
2129   // A single register like s0 or a range of registers like s[0:1]
2130 
2131   StringRef Str = Token.getString();
2132   const RegInfo *Reg = getRegularRegInfo(Str);
2133   if (Reg) {
2134     StringRef RegName = Reg->Name;
2135     StringRef RegSuffix = Str.substr(RegName.size());
2136     if (!RegSuffix.empty()) {
2137       unsigned Num;
2138       // A single register with an index: rXX
2139       if (getRegNum(RegSuffix, Num))
2140         return true;
2141     } else {
2142       // A range of registers: r[XX:YY].
2143       if (NextToken.is(AsmToken::LBrac))
2144         return true;
2145     }
2146   }
2147 
2148   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2149 }
2150 
2151 bool
2152 AMDGPUAsmParser::isRegister()
2153 {
2154   return isRegister(getToken(), peekToken());
2155 }
2156 
2157 unsigned
2158 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2159                                unsigned RegNum,
2160                                unsigned RegWidth) {
2161 
2162   assert(isRegularReg(RegKind));
2163 
2164   unsigned AlignSize = 1;
2165   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2166     // SGPR and TTMP registers must be aligned.
2167     // Max required alignment is 4 dwords.
2168     AlignSize = std::min(RegWidth, 4u);
2169   }
2170 
2171   if (RegNum % AlignSize != 0)
2172     return AMDGPU::NoRegister;
2173 
2174   unsigned RegIdx = RegNum / AlignSize;
2175   int RCID = getRegClass(RegKind, RegWidth);
2176   if (RCID == -1)
2177     return AMDGPU::NoRegister;
2178 
2179   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2180   const MCRegisterClass RC = TRI->getRegClass(RCID);
2181   if (RegIdx >= RC.getNumRegs())
2182     return AMDGPU::NoRegister;
2183 
2184   return RC.getRegister(RegIdx);
2185 }
2186 
2187 bool
2188 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2189   int64_t RegLo, RegHi;
2190   if (!trySkipToken(AsmToken::LBrac))
2191     return false;
2192 
2193   if (!parseExpr(RegLo))
2194     return false;
2195 
2196   if (trySkipToken(AsmToken::Colon)) {
2197     if (!parseExpr(RegHi))
2198       return false;
2199   } else {
2200     RegHi = RegLo;
2201   }
2202 
2203   if (!trySkipToken(AsmToken::RBrac))
2204     return false;
2205 
2206   if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi)
2207     return false;
2208 
2209   Num = static_cast<unsigned>(RegLo);
2210   Width = (RegHi - RegLo) + 1;
2211   return true;
2212 }
2213 
2214 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2215                                           unsigned &RegNum, unsigned &RegWidth,
2216                                           SmallVectorImpl<AsmToken> &Tokens) {
2217   assert(isToken(AsmToken::Identifier));
2218   unsigned Reg = getSpecialRegForName(getTokenStr());
2219   if (Reg) {
2220     RegNum = 0;
2221     RegWidth = 1;
2222     RegKind = IS_SPECIAL;
2223     Tokens.push_back(getToken());
2224     lex(); // skip register name
2225   }
2226   return Reg;
2227 }
2228 
2229 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2230                                           unsigned &RegNum, unsigned &RegWidth,
2231                                           SmallVectorImpl<AsmToken> &Tokens) {
2232   assert(isToken(AsmToken::Identifier));
2233   StringRef RegName = getTokenStr();
2234 
2235   const RegInfo *RI = getRegularRegInfo(RegName);
2236   if (!RI)
2237     return AMDGPU::NoRegister;
2238   Tokens.push_back(getToken());
2239   lex(); // skip register name
2240 
2241   RegKind = RI->Kind;
2242   StringRef RegSuffix = RegName.substr(RI->Name.size());
2243   if (!RegSuffix.empty()) {
2244     // Single 32-bit register: vXX.
2245     if (!getRegNum(RegSuffix, RegNum))
2246       return AMDGPU::NoRegister;
2247     RegWidth = 1;
2248   } else {
2249     // Range of registers: v[XX:YY]. ":YY" is optional.
2250     if (!ParseRegRange(RegNum, RegWidth))
2251       return AMDGPU::NoRegister;
2252   }
2253 
2254   return getRegularReg(RegKind, RegNum, RegWidth);
2255 }
2256 
2257 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2258                                        unsigned &RegWidth,
2259                                        SmallVectorImpl<AsmToken> &Tokens) {
2260   unsigned Reg = AMDGPU::NoRegister;
2261 
2262   if (!trySkipToken(AsmToken::LBrac))
2263     return AMDGPU::NoRegister;
2264 
2265   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2266 
2267   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2268     return AMDGPU::NoRegister;
2269   if (RegWidth != 1)
2270     return AMDGPU::NoRegister;
2271 
2272   for (; trySkipToken(AsmToken::Comma); ) {
2273     RegisterKind NextRegKind;
2274     unsigned NextReg, NextRegNum, NextRegWidth;
2275 
2276     if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth,
2277                              Tokens))
2278       return AMDGPU::NoRegister;
2279     if (NextRegWidth != 1)
2280       return AMDGPU::NoRegister;
2281     if (NextRegKind != RegKind)
2282       return AMDGPU::NoRegister;
2283     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg))
2284       return AMDGPU::NoRegister;
2285   }
2286 
2287   if (!trySkipToken(AsmToken::RBrac))
2288     return AMDGPU::NoRegister;
2289 
2290   if (isRegularReg(RegKind))
2291     Reg = getRegularReg(RegKind, RegNum, RegWidth);
2292 
2293   return Reg;
2294 }
2295 
2296 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2297                                           unsigned &RegNum, unsigned &RegWidth,
2298                                           SmallVectorImpl<AsmToken> &Tokens) {
2299   Reg = AMDGPU::NoRegister;
2300 
2301   if (isToken(AsmToken::Identifier)) {
2302     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2303     if (Reg == AMDGPU::NoRegister)
2304       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2305   } else {
2306     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2307   }
2308 
2309   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2310   return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg);
2311 }
2312 
2313 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2314                                           unsigned &RegNum, unsigned &RegWidth,
2315                                           bool RestoreOnFailure) {
2316   Reg = AMDGPU::NoRegister;
2317 
2318   SmallVector<AsmToken, 1> Tokens;
2319   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2320     if (RestoreOnFailure) {
2321       while (!Tokens.empty()) {
2322         getLexer().UnLex(Tokens.pop_back_val());
2323       }
2324     }
2325     return true;
2326   }
2327   return false;
2328 }
2329 
2330 Optional<StringRef>
2331 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2332   switch (RegKind) {
2333   case IS_VGPR:
2334     return StringRef(".amdgcn.next_free_vgpr");
2335   case IS_SGPR:
2336     return StringRef(".amdgcn.next_free_sgpr");
2337   default:
2338     return None;
2339   }
2340 }
2341 
2342 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2343   auto SymbolName = getGprCountSymbolName(RegKind);
2344   assert(SymbolName && "initializing invalid register kind");
2345   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2346   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2347 }
2348 
2349 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2350                                             unsigned DwordRegIndex,
2351                                             unsigned RegWidth) {
2352   // Symbols are only defined for GCN targets
2353   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2354     return true;
2355 
2356   auto SymbolName = getGprCountSymbolName(RegKind);
2357   if (!SymbolName)
2358     return true;
2359   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2360 
2361   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2362   int64_t OldCount;
2363 
2364   if (!Sym->isVariable())
2365     return !Error(getParser().getTok().getLoc(),
2366                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2367   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2368     return !Error(
2369         getParser().getTok().getLoc(),
2370         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2371 
2372   if (OldCount <= NewMax)
2373     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2374 
2375   return true;
2376 }
2377 
2378 std::unique_ptr<AMDGPUOperand>
2379 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2380   const auto &Tok = Parser.getTok();
2381   SMLoc StartLoc = Tok.getLoc();
2382   SMLoc EndLoc = Tok.getEndLoc();
2383   RegisterKind RegKind;
2384   unsigned Reg, RegNum, RegWidth;
2385 
2386   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2387     //FIXME: improve error messages (bug 41303).
2388     Error(StartLoc, "not a valid operand.");
2389     return nullptr;
2390   }
2391   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2392     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2393       return nullptr;
2394   } else
2395     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2396   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2397 }
2398 
2399 OperandMatchResultTy
2400 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2401   // TODO: add syntactic sugar for 1/(2*PI)
2402 
2403   assert(!isRegister());
2404   assert(!isModifier());
2405 
2406   const auto& Tok = getToken();
2407   const auto& NextTok = peekToken();
2408   bool IsReal = Tok.is(AsmToken::Real);
2409   SMLoc S = getLoc();
2410   bool Negate = false;
2411 
2412   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2413     lex();
2414     IsReal = true;
2415     Negate = true;
2416   }
2417 
2418   if (IsReal) {
2419     // Floating-point expressions are not supported.
2420     // Can only allow floating-point literals with an
2421     // optional sign.
2422 
2423     StringRef Num = getTokenStr();
2424     lex();
2425 
2426     APFloat RealVal(APFloat::IEEEdouble());
2427     auto roundMode = APFloat::rmNearestTiesToEven;
2428     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2429       return MatchOperand_ParseFail;
2430     }
2431     if (Negate)
2432       RealVal.changeSign();
2433 
2434     Operands.push_back(
2435       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2436                                AMDGPUOperand::ImmTyNone, true));
2437 
2438     return MatchOperand_Success;
2439 
2440   } else {
2441     int64_t IntVal;
2442     const MCExpr *Expr;
2443     SMLoc S = getLoc();
2444 
2445     if (HasSP3AbsModifier) {
2446       // This is a workaround for handling expressions
2447       // as arguments of SP3 'abs' modifier, for example:
2448       //     |1.0|
2449       //     |-1|
2450       //     |1+x|
2451       // This syntax is not compatible with syntax of standard
2452       // MC expressions (due to the trailing '|').
2453       SMLoc EndLoc;
2454       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2455         return MatchOperand_ParseFail;
2456     } else {
2457       if (Parser.parseExpression(Expr))
2458         return MatchOperand_ParseFail;
2459     }
2460 
2461     if (Expr->evaluateAsAbsolute(IntVal)) {
2462       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2463     } else {
2464       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2465     }
2466 
2467     return MatchOperand_Success;
2468   }
2469 
2470   return MatchOperand_NoMatch;
2471 }
2472 
2473 OperandMatchResultTy
2474 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2475   if (!isRegister())
2476     return MatchOperand_NoMatch;
2477 
2478   if (auto R = parseRegister()) {
2479     assert(R->isReg());
2480     Operands.push_back(std::move(R));
2481     return MatchOperand_Success;
2482   }
2483   return MatchOperand_ParseFail;
2484 }
2485 
2486 OperandMatchResultTy
2487 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2488   auto res = parseReg(Operands);
2489   if (res != MatchOperand_NoMatch) {
2490     return res;
2491   } else if (isModifier()) {
2492     return MatchOperand_NoMatch;
2493   } else {
2494     return parseImm(Operands, HasSP3AbsMod);
2495   }
2496 }
2497 
2498 bool
2499 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2500   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2501     const auto &str = Token.getString();
2502     return str == "abs" || str == "neg" || str == "sext";
2503   }
2504   return false;
2505 }
2506 
2507 bool
2508 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2509   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2510 }
2511 
2512 bool
2513 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2514   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2515 }
2516 
2517 bool
2518 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2519   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2520 }
2521 
2522 // Check if this is an operand modifier or an opcode modifier
2523 // which may look like an expression but it is not. We should
2524 // avoid parsing these modifiers as expressions. Currently
2525 // recognized sequences are:
2526 //   |...|
2527 //   abs(...)
2528 //   neg(...)
2529 //   sext(...)
2530 //   -reg
2531 //   -|...|
2532 //   -abs(...)
2533 //   name:...
2534 // Note that simple opcode modifiers like 'gds' may be parsed as
2535 // expressions; this is a special case. See getExpressionAsToken.
2536 //
2537 bool
2538 AMDGPUAsmParser::isModifier() {
2539 
2540   AsmToken Tok = getToken();
2541   AsmToken NextToken[2];
2542   peekTokens(NextToken);
2543 
2544   return isOperandModifier(Tok, NextToken[0]) ||
2545          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2546          isOpcodeModifierWithVal(Tok, NextToken[0]);
2547 }
2548 
2549 // Check if the current token is an SP3 'neg' modifier.
2550 // Currently this modifier is allowed in the following context:
2551 //
2552 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2553 // 2. Before an 'abs' modifier: -abs(...)
2554 // 3. Before an SP3 'abs' modifier: -|...|
2555 //
2556 // In all other cases "-" is handled as a part
2557 // of an expression that follows the sign.
2558 //
2559 // Note: When "-" is followed by an integer literal,
2560 // this is interpreted as integer negation rather
2561 // than a floating-point NEG modifier applied to N.
2562 // Beside being contr-intuitive, such use of floating-point
2563 // NEG modifier would have resulted in different meaning
2564 // of integer literals used with VOP1/2/C and VOP3,
2565 // for example:
2566 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2567 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2568 // Negative fp literals with preceding "-" are
2569 // handled likewise for unifomtity
2570 //
2571 bool
2572 AMDGPUAsmParser::parseSP3NegModifier() {
2573 
2574   AsmToken NextToken[2];
2575   peekTokens(NextToken);
2576 
2577   if (isToken(AsmToken::Minus) &&
2578       (isRegister(NextToken[0], NextToken[1]) ||
2579        NextToken[0].is(AsmToken::Pipe) ||
2580        isId(NextToken[0], "abs"))) {
2581     lex();
2582     return true;
2583   }
2584 
2585   return false;
2586 }
2587 
2588 OperandMatchResultTy
2589 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2590                                               bool AllowImm) {
2591   bool Neg, SP3Neg;
2592   bool Abs, SP3Abs;
2593   SMLoc Loc;
2594 
2595   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2596   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2597     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2598     return MatchOperand_ParseFail;
2599   }
2600 
2601   SP3Neg = parseSP3NegModifier();
2602 
2603   Loc = getLoc();
2604   Neg = trySkipId("neg");
2605   if (Neg && SP3Neg) {
2606     Error(Loc, "expected register or immediate");
2607     return MatchOperand_ParseFail;
2608   }
2609   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2610     return MatchOperand_ParseFail;
2611 
2612   Abs = trySkipId("abs");
2613   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2614     return MatchOperand_ParseFail;
2615 
2616   Loc = getLoc();
2617   SP3Abs = trySkipToken(AsmToken::Pipe);
2618   if (Abs && SP3Abs) {
2619     Error(Loc, "expected register or immediate");
2620     return MatchOperand_ParseFail;
2621   }
2622 
2623   OperandMatchResultTy Res;
2624   if (AllowImm) {
2625     Res = parseRegOrImm(Operands, SP3Abs);
2626   } else {
2627     Res = parseReg(Operands);
2628   }
2629   if (Res != MatchOperand_Success) {
2630     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2631   }
2632 
2633   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2634     return MatchOperand_ParseFail;
2635   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2636     return MatchOperand_ParseFail;
2637   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2638     return MatchOperand_ParseFail;
2639 
2640   AMDGPUOperand::Modifiers Mods;
2641   Mods.Abs = Abs || SP3Abs;
2642   Mods.Neg = Neg || SP3Neg;
2643 
2644   if (Mods.hasFPModifiers()) {
2645     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2646     if (Op.isExpr()) {
2647       Error(Op.getStartLoc(), "expected an absolute expression");
2648       return MatchOperand_ParseFail;
2649     }
2650     Op.setModifiers(Mods);
2651   }
2652   return MatchOperand_Success;
2653 }
2654 
2655 OperandMatchResultTy
2656 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2657                                                bool AllowImm) {
2658   bool Sext = trySkipId("sext");
2659   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2660     return MatchOperand_ParseFail;
2661 
2662   OperandMatchResultTy Res;
2663   if (AllowImm) {
2664     Res = parseRegOrImm(Operands);
2665   } else {
2666     Res = parseReg(Operands);
2667   }
2668   if (Res != MatchOperand_Success) {
2669     return Sext? MatchOperand_ParseFail : Res;
2670   }
2671 
2672   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2673     return MatchOperand_ParseFail;
2674 
2675   AMDGPUOperand::Modifiers Mods;
2676   Mods.Sext = Sext;
2677 
2678   if (Mods.hasIntModifiers()) {
2679     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2680     if (Op.isExpr()) {
2681       Error(Op.getStartLoc(), "expected an absolute expression");
2682       return MatchOperand_ParseFail;
2683     }
2684     Op.setModifiers(Mods);
2685   }
2686 
2687   return MatchOperand_Success;
2688 }
2689 
2690 OperandMatchResultTy
2691 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2692   return parseRegOrImmWithFPInputMods(Operands, false);
2693 }
2694 
2695 OperandMatchResultTy
2696 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2697   return parseRegOrImmWithIntInputMods(Operands, false);
2698 }
2699 
2700 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2701   auto Loc = getLoc();
2702   if (trySkipId("off")) {
2703     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2704                                                 AMDGPUOperand::ImmTyOff, false));
2705     return MatchOperand_Success;
2706   }
2707 
2708   if (!isRegister())
2709     return MatchOperand_NoMatch;
2710 
2711   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2712   if (Reg) {
2713     Operands.push_back(std::move(Reg));
2714     return MatchOperand_Success;
2715   }
2716 
2717   return MatchOperand_ParseFail;
2718 
2719 }
2720 
2721 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2722   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2723 
2724   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2725       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2726       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2727       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2728     return Match_InvalidOperand;
2729 
2730   if ((TSFlags & SIInstrFlags::VOP3) &&
2731       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2732       getForcedEncodingSize() != 64)
2733     return Match_PreferE32;
2734 
2735   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2736       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2737     // v_mac_f32/16 allow only dst_sel == DWORD;
2738     auto OpNum =
2739         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2740     const auto &Op = Inst.getOperand(OpNum);
2741     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2742       return Match_InvalidOperand;
2743     }
2744   }
2745 
2746   return Match_Success;
2747 }
2748 
2749 // What asm variants we should check
2750 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2751   if (getForcedEncodingSize() == 32) {
2752     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2753     return makeArrayRef(Variants);
2754   }
2755 
2756   if (isForcedVOP3()) {
2757     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2758     return makeArrayRef(Variants);
2759   }
2760 
2761   if (isForcedSDWA()) {
2762     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2763                                         AMDGPUAsmVariants::SDWA9};
2764     return makeArrayRef(Variants);
2765   }
2766 
2767   if (isForcedDPP()) {
2768     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2769     return makeArrayRef(Variants);
2770   }
2771 
2772   static const unsigned Variants[] = {
2773     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2774     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2775   };
2776 
2777   return makeArrayRef(Variants);
2778 }
2779 
2780 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2781   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2782   const unsigned Num = Desc.getNumImplicitUses();
2783   for (unsigned i = 0; i < Num; ++i) {
2784     unsigned Reg = Desc.ImplicitUses[i];
2785     switch (Reg) {
2786     case AMDGPU::FLAT_SCR:
2787     case AMDGPU::VCC:
2788     case AMDGPU::VCC_LO:
2789     case AMDGPU::VCC_HI:
2790     case AMDGPU::M0:
2791       return Reg;
2792     default:
2793       break;
2794     }
2795   }
2796   return AMDGPU::NoRegister;
2797 }
2798 
2799 // NB: This code is correct only when used to check constant
2800 // bus limitations because GFX7 support no f16 inline constants.
2801 // Note that there are no cases when a GFX7 opcode violates
2802 // constant bus limitations due to the use of an f16 constant.
2803 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2804                                        unsigned OpIdx) const {
2805   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2806 
2807   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2808     return false;
2809   }
2810 
2811   const MCOperand &MO = Inst.getOperand(OpIdx);
2812 
2813   int64_t Val = MO.getImm();
2814   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2815 
2816   switch (OpSize) { // expected operand size
2817   case 8:
2818     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2819   case 4:
2820     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2821   case 2: {
2822     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2823     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2824         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2825         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2826         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2827         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2828         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2829       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2830     } else {
2831       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2832     }
2833   }
2834   default:
2835     llvm_unreachable("invalid operand size");
2836   }
2837 }
2838 
2839 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2840   if (!isGFX10())
2841     return 1;
2842 
2843   switch (Opcode) {
2844   // 64-bit shift instructions can use only one scalar value input
2845   case AMDGPU::V_LSHLREV_B64:
2846   case AMDGPU::V_LSHLREV_B64_gfx10:
2847   case AMDGPU::V_LSHL_B64:
2848   case AMDGPU::V_LSHRREV_B64:
2849   case AMDGPU::V_LSHRREV_B64_gfx10:
2850   case AMDGPU::V_LSHR_B64:
2851   case AMDGPU::V_ASHRREV_I64:
2852   case AMDGPU::V_ASHRREV_I64_gfx10:
2853   case AMDGPU::V_ASHR_I64:
2854     return 1;
2855   default:
2856     return 2;
2857   }
2858 }
2859 
2860 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2861   const MCOperand &MO = Inst.getOperand(OpIdx);
2862   if (MO.isImm()) {
2863     return !isInlineConstant(Inst, OpIdx);
2864   } else if (MO.isReg()) {
2865     auto Reg = MO.getReg();
2866     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2867     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2868   } else {
2869     return true;
2870   }
2871 }
2872 
2873 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2874   const unsigned Opcode = Inst.getOpcode();
2875   const MCInstrDesc &Desc = MII.get(Opcode);
2876   unsigned ConstantBusUseCount = 0;
2877   unsigned NumLiterals = 0;
2878   unsigned LiteralSize;
2879 
2880   if (Desc.TSFlags &
2881       (SIInstrFlags::VOPC |
2882        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2883        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2884        SIInstrFlags::SDWA)) {
2885     // Check special imm operands (used by madmk, etc)
2886     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2887       ++ConstantBusUseCount;
2888     }
2889 
2890     SmallDenseSet<unsigned> SGPRsUsed;
2891     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2892     if (SGPRUsed != AMDGPU::NoRegister) {
2893       SGPRsUsed.insert(SGPRUsed);
2894       ++ConstantBusUseCount;
2895     }
2896 
2897     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2898     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2899     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2900 
2901     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2902 
2903     for (int OpIdx : OpIndices) {
2904       if (OpIdx == -1) break;
2905 
2906       const MCOperand &MO = Inst.getOperand(OpIdx);
2907       if (usesConstantBus(Inst, OpIdx)) {
2908         if (MO.isReg()) {
2909           const unsigned Reg = mc2PseudoReg(MO.getReg());
2910           // Pairs of registers with a partial intersections like these
2911           //   s0, s[0:1]
2912           //   flat_scratch_lo, flat_scratch
2913           //   flat_scratch_lo, flat_scratch_hi
2914           // are theoretically valid but they are disabled anyway.
2915           // Note that this code mimics SIInstrInfo::verifyInstruction
2916           if (!SGPRsUsed.count(Reg)) {
2917             SGPRsUsed.insert(Reg);
2918             ++ConstantBusUseCount;
2919           }
2920         } else { // Expression or a literal
2921 
2922           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2923             continue; // special operand like VINTERP attr_chan
2924 
2925           // An instruction may use only one literal.
2926           // This has been validated on the previous step.
2927           // See validateVOP3Literal.
2928           // This literal may be used as more than one operand.
2929           // If all these operands are of the same size,
2930           // this literal counts as one scalar value.
2931           // Otherwise it counts as 2 scalar values.
2932           // See "GFX10 Shader Programming", section 3.6.2.3.
2933 
2934           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2935           if (Size < 4) Size = 4;
2936 
2937           if (NumLiterals == 0) {
2938             NumLiterals = 1;
2939             LiteralSize = Size;
2940           } else if (LiteralSize != Size) {
2941             NumLiterals = 2;
2942           }
2943         }
2944       }
2945     }
2946   }
2947   ConstantBusUseCount += NumLiterals;
2948 
2949   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2950 }
2951 
2952 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2953   const unsigned Opcode = Inst.getOpcode();
2954   const MCInstrDesc &Desc = MII.get(Opcode);
2955 
2956   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2957   if (DstIdx == -1 ||
2958       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2959     return true;
2960   }
2961 
2962   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2963 
2964   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2965   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2966   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2967 
2968   assert(DstIdx != -1);
2969   const MCOperand &Dst = Inst.getOperand(DstIdx);
2970   assert(Dst.isReg());
2971   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2972 
2973   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2974 
2975   for (int SrcIdx : SrcIndices) {
2976     if (SrcIdx == -1) break;
2977     const MCOperand &Src = Inst.getOperand(SrcIdx);
2978     if (Src.isReg()) {
2979       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2980       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2981         return false;
2982       }
2983     }
2984   }
2985 
2986   return true;
2987 }
2988 
2989 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2990 
2991   const unsigned Opc = Inst.getOpcode();
2992   const MCInstrDesc &Desc = MII.get(Opc);
2993 
2994   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2995     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2996     assert(ClampIdx != -1);
2997     return Inst.getOperand(ClampIdx).getImm() == 0;
2998   }
2999 
3000   return true;
3001 }
3002 
3003 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3004 
3005   const unsigned Opc = Inst.getOpcode();
3006   const MCInstrDesc &Desc = MII.get(Opc);
3007 
3008   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3009     return true;
3010 
3011   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3012   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3013   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3014 
3015   assert(VDataIdx != -1);
3016   assert(DMaskIdx != -1);
3017   assert(TFEIdx != -1);
3018 
3019   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3020   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3021   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3022   if (DMask == 0)
3023     DMask = 1;
3024 
3025   unsigned DataSize =
3026     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3027   if (hasPackedD16()) {
3028     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3029     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3030       DataSize = (DataSize + 1) / 2;
3031   }
3032 
3033   return (VDataSize / 4) == DataSize + TFESize;
3034 }
3035 
3036 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3037   const unsigned Opc = Inst.getOpcode();
3038   const MCInstrDesc &Desc = MII.get(Opc);
3039 
3040   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
3041     return true;
3042 
3043   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3044   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3045       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3046   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3047   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3048   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3049 
3050   assert(VAddr0Idx != -1);
3051   assert(SrsrcIdx != -1);
3052   assert(DimIdx != -1);
3053   assert(SrsrcIdx > VAddr0Idx);
3054 
3055   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3056   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3057   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3058   unsigned VAddrSize =
3059       IsNSA ? SrsrcIdx - VAddr0Idx
3060             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3061 
3062   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3063                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3064                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3065                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3066   if (!IsNSA) {
3067     if (AddrSize > 8)
3068       AddrSize = 16;
3069     else if (AddrSize > 4)
3070       AddrSize = 8;
3071   }
3072 
3073   return VAddrSize == AddrSize;
3074 }
3075 
3076 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3077 
3078   const unsigned Opc = Inst.getOpcode();
3079   const MCInstrDesc &Desc = MII.get(Opc);
3080 
3081   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3082     return true;
3083   if (!Desc.mayLoad() || !Desc.mayStore())
3084     return true; // Not atomic
3085 
3086   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3087   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3088 
3089   // This is an incomplete check because image_atomic_cmpswap
3090   // may only use 0x3 and 0xf while other atomic operations
3091   // may use 0x1 and 0x3. However these limitations are
3092   // verified when we check that dmask matches dst size.
3093   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3094 }
3095 
3096 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3097 
3098   const unsigned Opc = Inst.getOpcode();
3099   const MCInstrDesc &Desc = MII.get(Opc);
3100 
3101   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3102     return true;
3103 
3104   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3105   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3106 
3107   // GATHER4 instructions use dmask in a different fashion compared to
3108   // other MIMG instructions. The only useful DMASK values are
3109   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3110   // (red,red,red,red) etc.) The ISA document doesn't mention
3111   // this.
3112   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3113 }
3114 
3115 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3116 {
3117   switch (Opcode) {
3118   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3119   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3120   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3121     return true;
3122   default:
3123     return false;
3124   }
3125 }
3126 
3127 // movrels* opcodes should only allow VGPRS as src0.
3128 // This is specified in .td description for vop1/vop3,
3129 // but sdwa is handled differently. See isSDWAOperand.
3130 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3131 
3132   const unsigned Opc = Inst.getOpcode();
3133   const MCInstrDesc &Desc = MII.get(Opc);
3134 
3135   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3136     return true;
3137 
3138   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3139   assert(Src0Idx != -1);
3140 
3141   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3142   if (!Src0.isReg())
3143     return false;
3144 
3145   auto Reg = Src0.getReg();
3146   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3147   return !isSGPR(mc2PseudoReg(Reg), TRI);
3148 }
3149 
3150 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3151 
3152   const unsigned Opc = Inst.getOpcode();
3153   const MCInstrDesc &Desc = MII.get(Opc);
3154 
3155   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3156     return true;
3157 
3158   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3159   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3160     if (isCI() || isSI())
3161       return false;
3162   }
3163 
3164   return true;
3165 }
3166 
3167 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3168   const unsigned Opc = Inst.getOpcode();
3169   const MCInstrDesc &Desc = MII.get(Opc);
3170 
3171   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3172     return true;
3173 
3174   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3175   if (DimIdx < 0)
3176     return true;
3177 
3178   long Imm = Inst.getOperand(DimIdx).getImm();
3179   if (Imm < 0 || Imm >= 8)
3180     return false;
3181 
3182   return true;
3183 }
3184 
3185 static bool IsRevOpcode(const unsigned Opcode)
3186 {
3187   switch (Opcode) {
3188   case AMDGPU::V_SUBREV_F32_e32:
3189   case AMDGPU::V_SUBREV_F32_e64:
3190   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3191   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3192   case AMDGPU::V_SUBREV_F32_e32_vi:
3193   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3194   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3195   case AMDGPU::V_SUBREV_F32_e64_vi:
3196 
3197   case AMDGPU::V_SUBREV_I32_e32:
3198   case AMDGPU::V_SUBREV_I32_e64:
3199   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3200   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3201 
3202   case AMDGPU::V_SUBBREV_U32_e32:
3203   case AMDGPU::V_SUBBREV_U32_e64:
3204   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3205   case AMDGPU::V_SUBBREV_U32_e32_vi:
3206   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3207   case AMDGPU::V_SUBBREV_U32_e64_vi:
3208 
3209   case AMDGPU::V_SUBREV_U32_e32:
3210   case AMDGPU::V_SUBREV_U32_e64:
3211   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3212   case AMDGPU::V_SUBREV_U32_e32_vi:
3213   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3214   case AMDGPU::V_SUBREV_U32_e64_vi:
3215 
3216   case AMDGPU::V_SUBREV_F16_e32:
3217   case AMDGPU::V_SUBREV_F16_e64:
3218   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3219   case AMDGPU::V_SUBREV_F16_e32_vi:
3220   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3221   case AMDGPU::V_SUBREV_F16_e64_vi:
3222 
3223   case AMDGPU::V_SUBREV_U16_e32:
3224   case AMDGPU::V_SUBREV_U16_e64:
3225   case AMDGPU::V_SUBREV_U16_e32_vi:
3226   case AMDGPU::V_SUBREV_U16_e64_vi:
3227 
3228   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3229   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3230   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3231 
3232   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3233   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3234 
3235   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3236   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3237 
3238   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3239   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3240 
3241   case AMDGPU::V_LSHRREV_B32_e32:
3242   case AMDGPU::V_LSHRREV_B32_e64:
3243   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3244   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3245   case AMDGPU::V_LSHRREV_B32_e32_vi:
3246   case AMDGPU::V_LSHRREV_B32_e64_vi:
3247   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3248   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3249 
3250   case AMDGPU::V_ASHRREV_I32_e32:
3251   case AMDGPU::V_ASHRREV_I32_e64:
3252   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3253   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3254   case AMDGPU::V_ASHRREV_I32_e32_vi:
3255   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3256   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3257   case AMDGPU::V_ASHRREV_I32_e64_vi:
3258 
3259   case AMDGPU::V_LSHLREV_B32_e32:
3260   case AMDGPU::V_LSHLREV_B32_e64:
3261   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3262   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3263   case AMDGPU::V_LSHLREV_B32_e32_vi:
3264   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3265   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3266   case AMDGPU::V_LSHLREV_B32_e64_vi:
3267 
3268   case AMDGPU::V_LSHLREV_B16_e32:
3269   case AMDGPU::V_LSHLREV_B16_e64:
3270   case AMDGPU::V_LSHLREV_B16_e32_vi:
3271   case AMDGPU::V_LSHLREV_B16_e64_vi:
3272   case AMDGPU::V_LSHLREV_B16_gfx10:
3273 
3274   case AMDGPU::V_LSHRREV_B16_e32:
3275   case AMDGPU::V_LSHRREV_B16_e64:
3276   case AMDGPU::V_LSHRREV_B16_e32_vi:
3277   case AMDGPU::V_LSHRREV_B16_e64_vi:
3278   case AMDGPU::V_LSHRREV_B16_gfx10:
3279 
3280   case AMDGPU::V_ASHRREV_I16_e32:
3281   case AMDGPU::V_ASHRREV_I16_e64:
3282   case AMDGPU::V_ASHRREV_I16_e32_vi:
3283   case AMDGPU::V_ASHRREV_I16_e64_vi:
3284   case AMDGPU::V_ASHRREV_I16_gfx10:
3285 
3286   case AMDGPU::V_LSHLREV_B64:
3287   case AMDGPU::V_LSHLREV_B64_gfx10:
3288   case AMDGPU::V_LSHLREV_B64_vi:
3289 
3290   case AMDGPU::V_LSHRREV_B64:
3291   case AMDGPU::V_LSHRREV_B64_gfx10:
3292   case AMDGPU::V_LSHRREV_B64_vi:
3293 
3294   case AMDGPU::V_ASHRREV_I64:
3295   case AMDGPU::V_ASHRREV_I64_gfx10:
3296   case AMDGPU::V_ASHRREV_I64_vi:
3297 
3298   case AMDGPU::V_PK_LSHLREV_B16:
3299   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3300   case AMDGPU::V_PK_LSHLREV_B16_vi:
3301 
3302   case AMDGPU::V_PK_LSHRREV_B16:
3303   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3304   case AMDGPU::V_PK_LSHRREV_B16_vi:
3305   case AMDGPU::V_PK_ASHRREV_I16:
3306   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3307   case AMDGPU::V_PK_ASHRREV_I16_vi:
3308     return true;
3309   default:
3310     return false;
3311   }
3312 }
3313 
3314 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3315 
3316   using namespace SIInstrFlags;
3317   const unsigned Opcode = Inst.getOpcode();
3318   const MCInstrDesc &Desc = MII.get(Opcode);
3319 
3320   // lds_direct register is defined so that it can be used
3321   // with 9-bit operands only. Ignore encodings which do not accept these.
3322   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3323     return true;
3324 
3325   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3326   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3327   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3328 
3329   const int SrcIndices[] = { Src1Idx, Src2Idx };
3330 
3331   // lds_direct cannot be specified as either src1 or src2.
3332   for (int SrcIdx : SrcIndices) {
3333     if (SrcIdx == -1) break;
3334     const MCOperand &Src = Inst.getOperand(SrcIdx);
3335     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3336       return false;
3337     }
3338   }
3339 
3340   if (Src0Idx == -1)
3341     return true;
3342 
3343   const MCOperand &Src = Inst.getOperand(Src0Idx);
3344   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3345     return true;
3346 
3347   // lds_direct is specified as src0. Check additional limitations.
3348   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3349 }
3350 
3351 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3352   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3353     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3354     if (Op.isFlatOffset())
3355       return Op.getStartLoc();
3356   }
3357   return getLoc();
3358 }
3359 
3360 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3361                                          const OperandVector &Operands) {
3362   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3363   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3364     return true;
3365 
3366   auto Opcode = Inst.getOpcode();
3367   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3368   assert(OpNum != -1);
3369 
3370   const auto &Op = Inst.getOperand(OpNum);
3371   if (!hasFlatOffsets() && Op.getImm() != 0) {
3372     Error(getFlatOffsetLoc(Operands),
3373           "flat offset modifier is not supported on this GPU");
3374     return false;
3375   }
3376 
3377   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3378   // For FLAT segment the offset must be positive;
3379   // MSB is ignored and forced to zero.
3380   unsigned OffsetSize = isGFX9() ? 13 : 12;
3381   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3382     if (!isIntN(OffsetSize, Op.getImm())) {
3383       Error(getFlatOffsetLoc(Operands),
3384             isGFX9() ? "expected a 13-bit signed offset" :
3385                        "expected a 12-bit signed offset");
3386       return false;
3387     }
3388   } else {
3389     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3390       Error(getFlatOffsetLoc(Operands),
3391             isGFX9() ? "expected a 12-bit unsigned offset" :
3392                        "expected an 11-bit unsigned offset");
3393       return false;
3394     }
3395   }
3396 
3397   return true;
3398 }
3399 
3400 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3401   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3402     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3403     if (Op.isSMEMOffset())
3404       return Op.getStartLoc();
3405   }
3406   return getLoc();
3407 }
3408 
3409 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3410                                          const OperandVector &Operands) {
3411   if (isCI() || isSI())
3412     return true;
3413 
3414   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3415   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3416     return true;
3417 
3418   auto Opcode = Inst.getOpcode();
3419   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3420   if (OpNum == -1)
3421     return true;
3422 
3423   const auto &Op = Inst.getOperand(OpNum);
3424   if (!Op.isImm())
3425     return true;
3426 
3427   uint64_t Offset = Op.getImm();
3428   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3429   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3430       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3431     return true;
3432 
3433   Error(getSMEMOffsetLoc(Operands),
3434         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3435                                "expected a 21-bit signed offset");
3436 
3437   return false;
3438 }
3439 
3440 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3441   unsigned Opcode = Inst.getOpcode();
3442   const MCInstrDesc &Desc = MII.get(Opcode);
3443   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3444     return true;
3445 
3446   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3447   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3448 
3449   const int OpIndices[] = { Src0Idx, Src1Idx };
3450 
3451   unsigned NumExprs = 0;
3452   unsigned NumLiterals = 0;
3453   uint32_t LiteralValue;
3454 
3455   for (int OpIdx : OpIndices) {
3456     if (OpIdx == -1) break;
3457 
3458     const MCOperand &MO = Inst.getOperand(OpIdx);
3459     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3460     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3461       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3462         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3463         if (NumLiterals == 0 || LiteralValue != Value) {
3464           LiteralValue = Value;
3465           ++NumLiterals;
3466         }
3467       } else if (MO.isExpr()) {
3468         ++NumExprs;
3469       }
3470     }
3471   }
3472 
3473   return NumLiterals + NumExprs <= 1;
3474 }
3475 
3476 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3477   const unsigned Opc = Inst.getOpcode();
3478   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3479       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3480     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3481     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3482 
3483     if (OpSel & ~3)
3484       return false;
3485   }
3486   return true;
3487 }
3488 
3489 // Check if VCC register matches wavefront size
3490 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3491   auto FB = getFeatureBits();
3492   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3493     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3494 }
3495 
3496 // VOP3 literal is only allowed in GFX10+ and only one can be used
3497 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3498   unsigned Opcode = Inst.getOpcode();
3499   const MCInstrDesc &Desc = MII.get(Opcode);
3500   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3501     return true;
3502 
3503   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3504   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3505   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3506 
3507   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3508 
3509   unsigned NumExprs = 0;
3510   unsigned NumLiterals = 0;
3511   uint32_t LiteralValue;
3512 
3513   for (int OpIdx : OpIndices) {
3514     if (OpIdx == -1) break;
3515 
3516     const MCOperand &MO = Inst.getOperand(OpIdx);
3517     if (!MO.isImm() && !MO.isExpr())
3518       continue;
3519     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3520       continue;
3521 
3522     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3523         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3524       return false;
3525 
3526     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3527       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3528       if (NumLiterals == 0 || LiteralValue != Value) {
3529         LiteralValue = Value;
3530         ++NumLiterals;
3531       }
3532     } else if (MO.isExpr()) {
3533       ++NumExprs;
3534     }
3535   }
3536   NumLiterals += NumExprs;
3537 
3538   return !NumLiterals ||
3539          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3540 }
3541 
3542 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3543                                           const SMLoc &IDLoc,
3544                                           const OperandVector &Operands) {
3545   if (!validateLdsDirect(Inst)) {
3546     Error(IDLoc,
3547       "invalid use of lds_direct");
3548     return false;
3549   }
3550   if (!validateSOPLiteral(Inst)) {
3551     Error(IDLoc,
3552       "only one literal operand is allowed");
3553     return false;
3554   }
3555   if (!validateVOP3Literal(Inst)) {
3556     Error(IDLoc,
3557       "invalid literal operand");
3558     return false;
3559   }
3560   if (!validateConstantBusLimitations(Inst)) {
3561     Error(IDLoc,
3562       "invalid operand (violates constant bus restrictions)");
3563     return false;
3564   }
3565   if (!validateEarlyClobberLimitations(Inst)) {
3566     Error(IDLoc,
3567       "destination must be different than all sources");
3568     return false;
3569   }
3570   if (!validateIntClampSupported(Inst)) {
3571     Error(IDLoc,
3572       "integer clamping is not supported on this GPU");
3573     return false;
3574   }
3575   if (!validateOpSel(Inst)) {
3576     Error(IDLoc,
3577       "invalid op_sel operand");
3578     return false;
3579   }
3580   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3581   if (!validateMIMGD16(Inst)) {
3582     Error(IDLoc,
3583       "d16 modifier is not supported on this GPU");
3584     return false;
3585   }
3586   if (!validateMIMGDim(Inst)) {
3587     Error(IDLoc, "dim modifier is required on this GPU");
3588     return false;
3589   }
3590   if (!validateMIMGDataSize(Inst)) {
3591     Error(IDLoc,
3592       "image data size does not match dmask and tfe");
3593     return false;
3594   }
3595   if (!validateMIMGAddrSize(Inst)) {
3596     Error(IDLoc,
3597       "image address size does not match dim and a16");
3598     return false;
3599   }
3600   if (!validateMIMGAtomicDMask(Inst)) {
3601     Error(IDLoc,
3602       "invalid atomic image dmask");
3603     return false;
3604   }
3605   if (!validateMIMGGatherDMask(Inst)) {
3606     Error(IDLoc,
3607       "invalid image_gather dmask: only one bit must be set");
3608     return false;
3609   }
3610   if (!validateMovrels(Inst)) {
3611     Error(IDLoc, "source operand must be a VGPR");
3612     return false;
3613   }
3614   if (!validateFlatOffset(Inst, Operands)) {
3615     return false;
3616   }
3617   if (!validateSMEMOffset(Inst, Operands)) {
3618     return false;
3619   }
3620 
3621   return true;
3622 }
3623 
3624 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3625                                             const FeatureBitset &FBS,
3626                                             unsigned VariantID = 0);
3627 
3628 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3629                                               OperandVector &Operands,
3630                                               MCStreamer &Out,
3631                                               uint64_t &ErrorInfo,
3632                                               bool MatchingInlineAsm) {
3633   MCInst Inst;
3634   unsigned Result = Match_Success;
3635   for (auto Variant : getMatchedVariants()) {
3636     uint64_t EI;
3637     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3638                                   Variant);
3639     // We order match statuses from least to most specific. We use most specific
3640     // status as resulting
3641     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3642     if ((R == Match_Success) ||
3643         (R == Match_PreferE32) ||
3644         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3645         (R == Match_InvalidOperand && Result != Match_MissingFeature
3646                                    && Result != Match_PreferE32) ||
3647         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3648                                    && Result != Match_MissingFeature
3649                                    && Result != Match_PreferE32)) {
3650       Result = R;
3651       ErrorInfo = EI;
3652     }
3653     if (R == Match_Success)
3654       break;
3655   }
3656 
3657   switch (Result) {
3658   default: break;
3659   case Match_Success:
3660     if (!validateInstruction(Inst, IDLoc, Operands)) {
3661       return true;
3662     }
3663     Inst.setLoc(IDLoc);
3664     Out.emitInstruction(Inst, getSTI());
3665     return false;
3666 
3667   case Match_MissingFeature:
3668     return Error(IDLoc, "instruction not supported on this GPU");
3669 
3670   case Match_MnemonicFail: {
3671     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3672     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3673         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3674     return Error(IDLoc, "invalid instruction" + Suggestion,
3675                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3676   }
3677 
3678   case Match_InvalidOperand: {
3679     SMLoc ErrorLoc = IDLoc;
3680     if (ErrorInfo != ~0ULL) {
3681       if (ErrorInfo >= Operands.size()) {
3682         return Error(IDLoc, "too few operands for instruction");
3683       }
3684       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3685       if (ErrorLoc == SMLoc())
3686         ErrorLoc = IDLoc;
3687     }
3688     return Error(ErrorLoc, "invalid operand for instruction");
3689   }
3690 
3691   case Match_PreferE32:
3692     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3693                         "should be encoded as e32");
3694   }
3695   llvm_unreachable("Implement any new match types added!");
3696 }
3697 
3698 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3699   int64_t Tmp = -1;
3700   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3701     return true;
3702   }
3703   if (getParser().parseAbsoluteExpression(Tmp)) {
3704     return true;
3705   }
3706   Ret = static_cast<uint32_t>(Tmp);
3707   return false;
3708 }
3709 
3710 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3711                                                uint32_t &Minor) {
3712   if (ParseAsAbsoluteExpression(Major))
3713     return TokError("invalid major version");
3714 
3715   if (getLexer().isNot(AsmToken::Comma))
3716     return TokError("minor version number required, comma expected");
3717   Lex();
3718 
3719   if (ParseAsAbsoluteExpression(Minor))
3720     return TokError("invalid minor version");
3721 
3722   return false;
3723 }
3724 
3725 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3726   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3727     return TokError("directive only supported for amdgcn architecture");
3728 
3729   std::string Target;
3730 
3731   SMLoc TargetStart = getTok().getLoc();
3732   if (getParser().parseEscapedString(Target))
3733     return true;
3734   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3735 
3736   std::string ExpectedTarget;
3737   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3738   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3739 
3740   if (Target != ExpectedTargetOS.str())
3741     return getParser().Error(TargetRange.Start, "target must match options",
3742                              TargetRange);
3743 
3744   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3745   return false;
3746 }
3747 
3748 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3749   return getParser().Error(Range.Start, "value out of range", Range);
3750 }
3751 
3752 bool AMDGPUAsmParser::calculateGPRBlocks(
3753     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3754     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3755     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3756     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3757   // TODO(scott.linder): These calculations are duplicated from
3758   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3759   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3760 
3761   unsigned NumVGPRs = NextFreeVGPR;
3762   unsigned NumSGPRs = NextFreeSGPR;
3763 
3764   if (Version.Major >= 10)
3765     NumSGPRs = 0;
3766   else {
3767     unsigned MaxAddressableNumSGPRs =
3768         IsaInfo::getAddressableNumSGPRs(&getSTI());
3769 
3770     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3771         NumSGPRs > MaxAddressableNumSGPRs)
3772       return OutOfRangeError(SGPRRange);
3773 
3774     NumSGPRs +=
3775         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3776 
3777     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3778         NumSGPRs > MaxAddressableNumSGPRs)
3779       return OutOfRangeError(SGPRRange);
3780 
3781     if (Features.test(FeatureSGPRInitBug))
3782       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3783   }
3784 
3785   VGPRBlocks =
3786       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3787   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3788 
3789   return false;
3790 }
3791 
3792 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3793   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3794     return TokError("directive only supported for amdgcn architecture");
3795 
3796   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3797     return TokError("directive only supported for amdhsa OS");
3798 
3799   StringRef KernelName;
3800   if (getParser().parseIdentifier(KernelName))
3801     return true;
3802 
3803   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3804 
3805   StringSet<> Seen;
3806 
3807   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3808 
3809   SMRange VGPRRange;
3810   uint64_t NextFreeVGPR = 0;
3811   SMRange SGPRRange;
3812   uint64_t NextFreeSGPR = 0;
3813   unsigned UserSGPRCount = 0;
3814   bool ReserveVCC = true;
3815   bool ReserveFlatScr = true;
3816   bool ReserveXNACK = hasXNACK();
3817   Optional<bool> EnableWavefrontSize32;
3818 
3819   while (true) {
3820     while (getLexer().is(AsmToken::EndOfStatement))
3821       Lex();
3822 
3823     if (getLexer().isNot(AsmToken::Identifier))
3824       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3825 
3826     StringRef ID = getTok().getIdentifier();
3827     SMRange IDRange = getTok().getLocRange();
3828     Lex();
3829 
3830     if (ID == ".end_amdhsa_kernel")
3831       break;
3832 
3833     if (Seen.find(ID) != Seen.end())
3834       return TokError(".amdhsa_ directives cannot be repeated");
3835     Seen.insert(ID);
3836 
3837     SMLoc ValStart = getTok().getLoc();
3838     int64_t IVal;
3839     if (getParser().parseAbsoluteExpression(IVal))
3840       return true;
3841     SMLoc ValEnd = getTok().getLoc();
3842     SMRange ValRange = SMRange(ValStart, ValEnd);
3843 
3844     if (IVal < 0)
3845       return OutOfRangeError(ValRange);
3846 
3847     uint64_t Val = IVal;
3848 
3849 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3850   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3851     return OutOfRangeError(RANGE);                                             \
3852   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3853 
3854     if (ID == ".amdhsa_group_segment_fixed_size") {
3855       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3856         return OutOfRangeError(ValRange);
3857       KD.group_segment_fixed_size = Val;
3858     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3859       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3860         return OutOfRangeError(ValRange);
3861       KD.private_segment_fixed_size = Val;
3862     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3863       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3864                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3865                        Val, ValRange);
3866       if (Val)
3867         UserSGPRCount += 4;
3868     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3869       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3870                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3871                        ValRange);
3872       if (Val)
3873         UserSGPRCount += 2;
3874     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3875       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3876                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3877                        ValRange);
3878       if (Val)
3879         UserSGPRCount += 2;
3880     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3881       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3882                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3883                        Val, ValRange);
3884       if (Val)
3885         UserSGPRCount += 2;
3886     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3887       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3888                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3889                        ValRange);
3890       if (Val)
3891         UserSGPRCount += 2;
3892     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3893       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3894                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3895                        ValRange);
3896       if (Val)
3897         UserSGPRCount += 2;
3898     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3899       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3900                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3901                        Val, ValRange);
3902       if (Val)
3903         UserSGPRCount += 1;
3904     } else if (ID == ".amdhsa_wavefront_size32") {
3905       if (IVersion.Major < 10)
3906         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3907                                  IDRange);
3908       EnableWavefrontSize32 = Val;
3909       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3910                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3911                        Val, ValRange);
3912     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3913       PARSE_BITS_ENTRY(
3914           KD.compute_pgm_rsrc2,
3915           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3916           ValRange);
3917     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3918       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3919                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3920                        ValRange);
3921     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3922       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3923                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3924                        ValRange);
3925     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3926       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3927                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3928                        ValRange);
3929     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3930       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3931                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3932                        ValRange);
3933     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3934       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3935                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3936                        ValRange);
3937     } else if (ID == ".amdhsa_next_free_vgpr") {
3938       VGPRRange = ValRange;
3939       NextFreeVGPR = Val;
3940     } else if (ID == ".amdhsa_next_free_sgpr") {
3941       SGPRRange = ValRange;
3942       NextFreeSGPR = Val;
3943     } else if (ID == ".amdhsa_reserve_vcc") {
3944       if (!isUInt<1>(Val))
3945         return OutOfRangeError(ValRange);
3946       ReserveVCC = Val;
3947     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3948       if (IVersion.Major < 7)
3949         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3950                                  IDRange);
3951       if (!isUInt<1>(Val))
3952         return OutOfRangeError(ValRange);
3953       ReserveFlatScr = Val;
3954     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3955       if (IVersion.Major < 8)
3956         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3957                                  IDRange);
3958       if (!isUInt<1>(Val))
3959         return OutOfRangeError(ValRange);
3960       ReserveXNACK = Val;
3961     } else if (ID == ".amdhsa_float_round_mode_32") {
3962       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3963                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3964     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3965       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3966                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3967     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3968       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3969                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3970     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3971       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3972                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3973                        ValRange);
3974     } else if (ID == ".amdhsa_dx10_clamp") {
3975       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3976                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3977     } else if (ID == ".amdhsa_ieee_mode") {
3978       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3979                        Val, ValRange);
3980     } else if (ID == ".amdhsa_fp16_overflow") {
3981       if (IVersion.Major < 9)
3982         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3983                                  IDRange);
3984       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3985                        ValRange);
3986     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3987       if (IVersion.Major < 10)
3988         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3989                                  IDRange);
3990       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3991                        ValRange);
3992     } else if (ID == ".amdhsa_memory_ordered") {
3993       if (IVersion.Major < 10)
3994         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3995                                  IDRange);
3996       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3997                        ValRange);
3998     } else if (ID == ".amdhsa_forward_progress") {
3999       if (IVersion.Major < 10)
4000         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4001                                  IDRange);
4002       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4003                        ValRange);
4004     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4005       PARSE_BITS_ENTRY(
4006           KD.compute_pgm_rsrc2,
4007           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4008           ValRange);
4009     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4010       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4011                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4012                        Val, ValRange);
4013     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4014       PARSE_BITS_ENTRY(
4015           KD.compute_pgm_rsrc2,
4016           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4017           ValRange);
4018     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4019       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4020                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4021                        Val, ValRange);
4022     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4023       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4024                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4025                        Val, ValRange);
4026     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4027       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4028                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4029                        Val, ValRange);
4030     } else if (ID == ".amdhsa_exception_int_div_zero") {
4031       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4032                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4033                        Val, ValRange);
4034     } else {
4035       return getParser().Error(IDRange.Start,
4036                                "unknown .amdhsa_kernel directive", IDRange);
4037     }
4038 
4039 #undef PARSE_BITS_ENTRY
4040   }
4041 
4042   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4043     return TokError(".amdhsa_next_free_vgpr directive is required");
4044 
4045   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4046     return TokError(".amdhsa_next_free_sgpr directive is required");
4047 
4048   unsigned VGPRBlocks;
4049   unsigned SGPRBlocks;
4050   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4051                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4052                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4053                          SGPRBlocks))
4054     return true;
4055 
4056   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4057           VGPRBlocks))
4058     return OutOfRangeError(VGPRRange);
4059   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4060                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4061 
4062   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4063           SGPRBlocks))
4064     return OutOfRangeError(SGPRRange);
4065   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4066                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4067                   SGPRBlocks);
4068 
4069   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4070     return TokError("too many user SGPRs enabled");
4071   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4072                   UserSGPRCount);
4073 
4074   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4075       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4076       ReserveFlatScr, ReserveXNACK);
4077   return false;
4078 }
4079 
4080 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4081   uint32_t Major;
4082   uint32_t Minor;
4083 
4084   if (ParseDirectiveMajorMinor(Major, Minor))
4085     return true;
4086 
4087   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4088   return false;
4089 }
4090 
4091 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4092   uint32_t Major;
4093   uint32_t Minor;
4094   uint32_t Stepping;
4095   StringRef VendorName;
4096   StringRef ArchName;
4097 
4098   // If this directive has no arguments, then use the ISA version for the
4099   // targeted GPU.
4100   if (getLexer().is(AsmToken::EndOfStatement)) {
4101     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4102     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4103                                                       ISA.Stepping,
4104                                                       "AMD", "AMDGPU");
4105     return false;
4106   }
4107 
4108   if (ParseDirectiveMajorMinor(Major, Minor))
4109     return true;
4110 
4111   if (getLexer().isNot(AsmToken::Comma))
4112     return TokError("stepping version number required, comma expected");
4113   Lex();
4114 
4115   if (ParseAsAbsoluteExpression(Stepping))
4116     return TokError("invalid stepping version");
4117 
4118   if (getLexer().isNot(AsmToken::Comma))
4119     return TokError("vendor name required, comma expected");
4120   Lex();
4121 
4122   if (getLexer().isNot(AsmToken::String))
4123     return TokError("invalid vendor name");
4124 
4125   VendorName = getLexer().getTok().getStringContents();
4126   Lex();
4127 
4128   if (getLexer().isNot(AsmToken::Comma))
4129     return TokError("arch name required, comma expected");
4130   Lex();
4131 
4132   if (getLexer().isNot(AsmToken::String))
4133     return TokError("invalid arch name");
4134 
4135   ArchName = getLexer().getTok().getStringContents();
4136   Lex();
4137 
4138   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4139                                                     VendorName, ArchName);
4140   return false;
4141 }
4142 
4143 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4144                                                amd_kernel_code_t &Header) {
4145   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4146   // assembly for backwards compatibility.
4147   if (ID == "max_scratch_backing_memory_byte_size") {
4148     Parser.eatToEndOfStatement();
4149     return false;
4150   }
4151 
4152   SmallString<40> ErrStr;
4153   raw_svector_ostream Err(ErrStr);
4154   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4155     return TokError(Err.str());
4156   }
4157   Lex();
4158 
4159   if (ID == "enable_wavefront_size32") {
4160     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4161       if (!isGFX10())
4162         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4163       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4164         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4165     } else {
4166       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4167         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4168     }
4169   }
4170 
4171   if (ID == "wavefront_size") {
4172     if (Header.wavefront_size == 5) {
4173       if (!isGFX10())
4174         return TokError("wavefront_size=5 is only allowed on GFX10+");
4175       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4176         return TokError("wavefront_size=5 requires +WavefrontSize32");
4177     } else if (Header.wavefront_size == 6) {
4178       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4179         return TokError("wavefront_size=6 requires +WavefrontSize64");
4180     }
4181   }
4182 
4183   if (ID == "enable_wgp_mode") {
4184     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4185       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4186   }
4187 
4188   if (ID == "enable_mem_ordered") {
4189     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4190       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4191   }
4192 
4193   if (ID == "enable_fwd_progress") {
4194     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4195       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4196   }
4197 
4198   return false;
4199 }
4200 
4201 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4202   amd_kernel_code_t Header;
4203   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4204 
4205   while (true) {
4206     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4207     // will set the current token to EndOfStatement.
4208     while(getLexer().is(AsmToken::EndOfStatement))
4209       Lex();
4210 
4211     if (getLexer().isNot(AsmToken::Identifier))
4212       return TokError("expected value identifier or .end_amd_kernel_code_t");
4213 
4214     StringRef ID = getLexer().getTok().getIdentifier();
4215     Lex();
4216 
4217     if (ID == ".end_amd_kernel_code_t")
4218       break;
4219 
4220     if (ParseAMDKernelCodeTValue(ID, Header))
4221       return true;
4222   }
4223 
4224   getTargetStreamer().EmitAMDKernelCodeT(Header);
4225 
4226   return false;
4227 }
4228 
4229 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4230   if (getLexer().isNot(AsmToken::Identifier))
4231     return TokError("expected symbol name");
4232 
4233   StringRef KernelName = Parser.getTok().getString();
4234 
4235   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4236                                            ELF::STT_AMDGPU_HSA_KERNEL);
4237   Lex();
4238   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4239     KernelScope.initialize(getContext());
4240   return false;
4241 }
4242 
4243 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4244   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4245     return Error(getParser().getTok().getLoc(),
4246                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4247                  "architectures");
4248   }
4249 
4250   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4251 
4252   std::string ISAVersionStringFromSTI;
4253   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4254   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4255 
4256   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4257     return Error(getParser().getTok().getLoc(),
4258                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4259                  "arguments specified through the command line");
4260   }
4261 
4262   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4263   Lex();
4264 
4265   return false;
4266 }
4267 
4268 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4269   const char *AssemblerDirectiveBegin;
4270   const char *AssemblerDirectiveEnd;
4271   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4272       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4273           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4274                             HSAMD::V3::AssemblerDirectiveEnd)
4275           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4276                             HSAMD::AssemblerDirectiveEnd);
4277 
4278   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4279     return Error(getParser().getTok().getLoc(),
4280                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4281                  "not available on non-amdhsa OSes")).str());
4282   }
4283 
4284   std::string HSAMetadataString;
4285   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4286                           HSAMetadataString))
4287     return true;
4288 
4289   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4290     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4291       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4292   } else {
4293     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4294       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4295   }
4296 
4297   return false;
4298 }
4299 
4300 /// Common code to parse out a block of text (typically YAML) between start and
4301 /// end directives.
4302 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4303                                           const char *AssemblerDirectiveEnd,
4304                                           std::string &CollectString) {
4305 
4306   raw_string_ostream CollectStream(CollectString);
4307 
4308   getLexer().setSkipSpace(false);
4309 
4310   bool FoundEnd = false;
4311   while (!getLexer().is(AsmToken::Eof)) {
4312     while (getLexer().is(AsmToken::Space)) {
4313       CollectStream << getLexer().getTok().getString();
4314       Lex();
4315     }
4316 
4317     if (getLexer().is(AsmToken::Identifier)) {
4318       StringRef ID = getLexer().getTok().getIdentifier();
4319       if (ID == AssemblerDirectiveEnd) {
4320         Lex();
4321         FoundEnd = true;
4322         break;
4323       }
4324     }
4325 
4326     CollectStream << Parser.parseStringToEndOfStatement()
4327                   << getContext().getAsmInfo()->getSeparatorString();
4328 
4329     Parser.eatToEndOfStatement();
4330   }
4331 
4332   getLexer().setSkipSpace(true);
4333 
4334   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4335     return TokError(Twine("expected directive ") +
4336                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4337   }
4338 
4339   CollectStream.flush();
4340   return false;
4341 }
4342 
4343 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4344 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4345   std::string String;
4346   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4347                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4348     return true;
4349 
4350   auto PALMetadata = getTargetStreamer().getPALMetadata();
4351   if (!PALMetadata->setFromString(String))
4352     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4353   return false;
4354 }
4355 
4356 /// Parse the assembler directive for old linear-format PAL metadata.
4357 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4358   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4359     return Error(getParser().getTok().getLoc(),
4360                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4361                  "not available on non-amdpal OSes")).str());
4362   }
4363 
4364   auto PALMetadata = getTargetStreamer().getPALMetadata();
4365   PALMetadata->setLegacy();
4366   for (;;) {
4367     uint32_t Key, Value;
4368     if (ParseAsAbsoluteExpression(Key)) {
4369       return TokError(Twine("invalid value in ") +
4370                       Twine(PALMD::AssemblerDirective));
4371     }
4372     if (getLexer().isNot(AsmToken::Comma)) {
4373       return TokError(Twine("expected an even number of values in ") +
4374                       Twine(PALMD::AssemblerDirective));
4375     }
4376     Lex();
4377     if (ParseAsAbsoluteExpression(Value)) {
4378       return TokError(Twine("invalid value in ") +
4379                       Twine(PALMD::AssemblerDirective));
4380     }
4381     PALMetadata->setRegister(Key, Value);
4382     if (getLexer().isNot(AsmToken::Comma))
4383       break;
4384     Lex();
4385   }
4386   return false;
4387 }
4388 
4389 /// ParseDirectiveAMDGPULDS
4390 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4391 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4392   if (getParser().checkForValidSection())
4393     return true;
4394 
4395   StringRef Name;
4396   SMLoc NameLoc = getLexer().getLoc();
4397   if (getParser().parseIdentifier(Name))
4398     return TokError("expected identifier in directive");
4399 
4400   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4401   if (parseToken(AsmToken::Comma, "expected ','"))
4402     return true;
4403 
4404   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4405 
4406   int64_t Size;
4407   SMLoc SizeLoc = getLexer().getLoc();
4408   if (getParser().parseAbsoluteExpression(Size))
4409     return true;
4410   if (Size < 0)
4411     return Error(SizeLoc, "size must be non-negative");
4412   if (Size > LocalMemorySize)
4413     return Error(SizeLoc, "size is too large");
4414 
4415   int64_t Align = 4;
4416   if (getLexer().is(AsmToken::Comma)) {
4417     Lex();
4418     SMLoc AlignLoc = getLexer().getLoc();
4419     if (getParser().parseAbsoluteExpression(Align))
4420       return true;
4421     if (Align < 0 || !isPowerOf2_64(Align))
4422       return Error(AlignLoc, "alignment must be a power of two");
4423 
4424     // Alignment larger than the size of LDS is possible in theory, as long
4425     // as the linker manages to place to symbol at address 0, but we do want
4426     // to make sure the alignment fits nicely into a 32-bit integer.
4427     if (Align >= 1u << 31)
4428       return Error(AlignLoc, "alignment is too large");
4429   }
4430 
4431   if (parseToken(AsmToken::EndOfStatement,
4432                  "unexpected token in '.amdgpu_lds' directive"))
4433     return true;
4434 
4435   Symbol->redefineIfPossible();
4436   if (!Symbol->isUndefined())
4437     return Error(NameLoc, "invalid symbol redefinition");
4438 
4439   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4440   return false;
4441 }
4442 
4443 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4444   StringRef IDVal = DirectiveID.getString();
4445 
4446   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4447     if (IDVal == ".amdgcn_target")
4448       return ParseDirectiveAMDGCNTarget();
4449 
4450     if (IDVal == ".amdhsa_kernel")
4451       return ParseDirectiveAMDHSAKernel();
4452 
4453     // TODO: Restructure/combine with PAL metadata directive.
4454     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4455       return ParseDirectiveHSAMetadata();
4456   } else {
4457     if (IDVal == ".hsa_code_object_version")
4458       return ParseDirectiveHSACodeObjectVersion();
4459 
4460     if (IDVal == ".hsa_code_object_isa")
4461       return ParseDirectiveHSACodeObjectISA();
4462 
4463     if (IDVal == ".amd_kernel_code_t")
4464       return ParseDirectiveAMDKernelCodeT();
4465 
4466     if (IDVal == ".amdgpu_hsa_kernel")
4467       return ParseDirectiveAMDGPUHsaKernel();
4468 
4469     if (IDVal == ".amd_amdgpu_isa")
4470       return ParseDirectiveISAVersion();
4471 
4472     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4473       return ParseDirectiveHSAMetadata();
4474   }
4475 
4476   if (IDVal == ".amdgpu_lds")
4477     return ParseDirectiveAMDGPULDS();
4478 
4479   if (IDVal == PALMD::AssemblerDirectiveBegin)
4480     return ParseDirectivePALMetadataBegin();
4481 
4482   if (IDVal == PALMD::AssemblerDirective)
4483     return ParseDirectivePALMetadata();
4484 
4485   return true;
4486 }
4487 
4488 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4489                                            unsigned RegNo) const {
4490 
4491   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4492        R.isValid(); ++R) {
4493     if (*R == RegNo)
4494       return isGFX9() || isGFX10();
4495   }
4496 
4497   // GFX10 has 2 more SGPRs 104 and 105.
4498   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4499        R.isValid(); ++R) {
4500     if (*R == RegNo)
4501       return hasSGPR104_SGPR105();
4502   }
4503 
4504   switch (RegNo) {
4505   case AMDGPU::SRC_SHARED_BASE:
4506   case AMDGPU::SRC_SHARED_LIMIT:
4507   case AMDGPU::SRC_PRIVATE_BASE:
4508   case AMDGPU::SRC_PRIVATE_LIMIT:
4509   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4510     return !isCI() && !isSI() && !isVI();
4511   case AMDGPU::TBA:
4512   case AMDGPU::TBA_LO:
4513   case AMDGPU::TBA_HI:
4514   case AMDGPU::TMA:
4515   case AMDGPU::TMA_LO:
4516   case AMDGPU::TMA_HI:
4517     return !isGFX9() && !isGFX10();
4518   case AMDGPU::XNACK_MASK:
4519   case AMDGPU::XNACK_MASK_LO:
4520   case AMDGPU::XNACK_MASK_HI:
4521     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4522   case AMDGPU::SGPR_NULL:
4523     return isGFX10();
4524   default:
4525     break;
4526   }
4527 
4528   if (isCI())
4529     return true;
4530 
4531   if (isSI() || isGFX10()) {
4532     // No flat_scr on SI.
4533     // On GFX10 flat scratch is not a valid register operand and can only be
4534     // accessed with s_setreg/s_getreg.
4535     switch (RegNo) {
4536     case AMDGPU::FLAT_SCR:
4537     case AMDGPU::FLAT_SCR_LO:
4538     case AMDGPU::FLAT_SCR_HI:
4539       return false;
4540     default:
4541       return true;
4542     }
4543   }
4544 
4545   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4546   // SI/CI have.
4547   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4548        R.isValid(); ++R) {
4549     if (*R == RegNo)
4550       return hasSGPR102_SGPR103();
4551   }
4552 
4553   return true;
4554 }
4555 
4556 OperandMatchResultTy
4557 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4558                               OperandMode Mode) {
4559   // Try to parse with a custom parser
4560   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4561 
4562   // If we successfully parsed the operand or if there as an error parsing,
4563   // we are done.
4564   //
4565   // If we are parsing after we reach EndOfStatement then this means we
4566   // are appending default values to the Operands list.  This is only done
4567   // by custom parser, so we shouldn't continue on to the generic parsing.
4568   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4569       getLexer().is(AsmToken::EndOfStatement))
4570     return ResTy;
4571 
4572   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4573     unsigned Prefix = Operands.size();
4574     SMLoc LBraceLoc = getTok().getLoc();
4575     Parser.Lex(); // eat the '['
4576 
4577     for (;;) {
4578       ResTy = parseReg(Operands);
4579       if (ResTy != MatchOperand_Success)
4580         return ResTy;
4581 
4582       if (getLexer().is(AsmToken::RBrac))
4583         break;
4584 
4585       if (getLexer().isNot(AsmToken::Comma))
4586         return MatchOperand_ParseFail;
4587       Parser.Lex();
4588     }
4589 
4590     if (Operands.size() - Prefix > 1) {
4591       Operands.insert(Operands.begin() + Prefix,
4592                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4593       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4594                                                     getTok().getLoc()));
4595     }
4596 
4597     Parser.Lex(); // eat the ']'
4598     return MatchOperand_Success;
4599   }
4600 
4601   return parseRegOrImm(Operands);
4602 }
4603 
4604 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4605   // Clear any forced encodings from the previous instruction.
4606   setForcedEncodingSize(0);
4607   setForcedDPP(false);
4608   setForcedSDWA(false);
4609 
4610   if (Name.endswith("_e64")) {
4611     setForcedEncodingSize(64);
4612     return Name.substr(0, Name.size() - 4);
4613   } else if (Name.endswith("_e32")) {
4614     setForcedEncodingSize(32);
4615     return Name.substr(0, Name.size() - 4);
4616   } else if (Name.endswith("_dpp")) {
4617     setForcedDPP(true);
4618     return Name.substr(0, Name.size() - 4);
4619   } else if (Name.endswith("_sdwa")) {
4620     setForcedSDWA(true);
4621     return Name.substr(0, Name.size() - 5);
4622   }
4623   return Name;
4624 }
4625 
4626 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4627                                        StringRef Name,
4628                                        SMLoc NameLoc, OperandVector &Operands) {
4629   // Add the instruction mnemonic
4630   Name = parseMnemonicSuffix(Name);
4631   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4632 
4633   bool IsMIMG = Name.startswith("image_");
4634 
4635   while (!getLexer().is(AsmToken::EndOfStatement)) {
4636     OperandMode Mode = OperandMode_Default;
4637     if (IsMIMG && isGFX10() && Operands.size() == 2)
4638       Mode = OperandMode_NSA;
4639     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4640 
4641     // Eat the comma or space if there is one.
4642     if (getLexer().is(AsmToken::Comma))
4643       Parser.Lex();
4644 
4645     switch (Res) {
4646       case MatchOperand_Success: break;
4647       case MatchOperand_ParseFail:
4648         // FIXME: use real operand location rather than the current location.
4649         Error(getLexer().getLoc(), "failed parsing operand.");
4650         while (!getLexer().is(AsmToken::EndOfStatement)) {
4651           Parser.Lex();
4652         }
4653         return true;
4654       case MatchOperand_NoMatch:
4655         // FIXME: use real operand location rather than the current location.
4656         Error(getLexer().getLoc(), "not a valid operand.");
4657         while (!getLexer().is(AsmToken::EndOfStatement)) {
4658           Parser.Lex();
4659         }
4660         return true;
4661     }
4662   }
4663 
4664   return false;
4665 }
4666 
4667 //===----------------------------------------------------------------------===//
4668 // Utility functions
4669 //===----------------------------------------------------------------------===//
4670 
4671 OperandMatchResultTy
4672 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4673 
4674   if (!trySkipId(Prefix, AsmToken::Colon))
4675     return MatchOperand_NoMatch;
4676 
4677   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4678 }
4679 
4680 OperandMatchResultTy
4681 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4682                                     AMDGPUOperand::ImmTy ImmTy,
4683                                     bool (*ConvertResult)(int64_t&)) {
4684   SMLoc S = getLoc();
4685   int64_t Value = 0;
4686 
4687   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4688   if (Res != MatchOperand_Success)
4689     return Res;
4690 
4691   if (ConvertResult && !ConvertResult(Value)) {
4692     Error(S, "invalid " + StringRef(Prefix) + " value.");
4693   }
4694 
4695   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4696   return MatchOperand_Success;
4697 }
4698 
4699 OperandMatchResultTy
4700 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4701                                              OperandVector &Operands,
4702                                              AMDGPUOperand::ImmTy ImmTy,
4703                                              bool (*ConvertResult)(int64_t&)) {
4704   SMLoc S = getLoc();
4705   if (!trySkipId(Prefix, AsmToken::Colon))
4706     return MatchOperand_NoMatch;
4707 
4708   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4709     return MatchOperand_ParseFail;
4710 
4711   unsigned Val = 0;
4712   const unsigned MaxSize = 4;
4713 
4714   // FIXME: How to verify the number of elements matches the number of src
4715   // operands?
4716   for (int I = 0; ; ++I) {
4717     int64_t Op;
4718     SMLoc Loc = getLoc();
4719     if (!parseExpr(Op))
4720       return MatchOperand_ParseFail;
4721 
4722     if (Op != 0 && Op != 1) {
4723       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4724       return MatchOperand_ParseFail;
4725     }
4726 
4727     Val |= (Op << I);
4728 
4729     if (trySkipToken(AsmToken::RBrac))
4730       break;
4731 
4732     if (I + 1 == MaxSize) {
4733       Error(getLoc(), "expected a closing square bracket");
4734       return MatchOperand_ParseFail;
4735     }
4736 
4737     if (!skipToken(AsmToken::Comma, "expected a comma"))
4738       return MatchOperand_ParseFail;
4739   }
4740 
4741   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4742   return MatchOperand_Success;
4743 }
4744 
4745 OperandMatchResultTy
4746 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4747                                AMDGPUOperand::ImmTy ImmTy) {
4748   int64_t Bit = 0;
4749   SMLoc S = Parser.getTok().getLoc();
4750 
4751   // We are at the end of the statement, and this is a default argument, so
4752   // use a default value.
4753   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4754     switch(getLexer().getKind()) {
4755       case AsmToken::Identifier: {
4756         StringRef Tok = Parser.getTok().getString();
4757         if (Tok == Name) {
4758           if (Tok == "r128" && !hasMIMG_R128())
4759             Error(S, "r128 modifier is not supported on this GPU");
4760           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
4761             Error(S, "a16 modifier is not supported on this GPU");
4762           Bit = 1;
4763           Parser.Lex();
4764         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4765           Bit = 0;
4766           Parser.Lex();
4767         } else {
4768           return MatchOperand_NoMatch;
4769         }
4770         break;
4771       }
4772       default:
4773         return MatchOperand_NoMatch;
4774     }
4775   }
4776 
4777   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4778     return MatchOperand_ParseFail;
4779 
4780   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
4781     ImmTy = AMDGPUOperand::ImmTyR128A16;
4782 
4783   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4784   return MatchOperand_Success;
4785 }
4786 
4787 static void addOptionalImmOperand(
4788   MCInst& Inst, const OperandVector& Operands,
4789   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4790   AMDGPUOperand::ImmTy ImmT,
4791   int64_t Default = 0) {
4792   auto i = OptionalIdx.find(ImmT);
4793   if (i != OptionalIdx.end()) {
4794     unsigned Idx = i->second;
4795     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4796   } else {
4797     Inst.addOperand(MCOperand::createImm(Default));
4798   }
4799 }
4800 
4801 OperandMatchResultTy
4802 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4803   if (getLexer().isNot(AsmToken::Identifier)) {
4804     return MatchOperand_NoMatch;
4805   }
4806   StringRef Tok = Parser.getTok().getString();
4807   if (Tok != Prefix) {
4808     return MatchOperand_NoMatch;
4809   }
4810 
4811   Parser.Lex();
4812   if (getLexer().isNot(AsmToken::Colon)) {
4813     return MatchOperand_ParseFail;
4814   }
4815 
4816   Parser.Lex();
4817   if (getLexer().isNot(AsmToken::Identifier)) {
4818     return MatchOperand_ParseFail;
4819   }
4820 
4821   Value = Parser.getTok().getString();
4822   return MatchOperand_Success;
4823 }
4824 
4825 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4826 // values to live in a joint format operand in the MCInst encoding.
4827 OperandMatchResultTy
4828 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4829   SMLoc S = Parser.getTok().getLoc();
4830   int64_t Dfmt = 0, Nfmt = 0;
4831   // dfmt and nfmt can appear in either order, and each is optional.
4832   bool GotDfmt = false, GotNfmt = false;
4833   while (!GotDfmt || !GotNfmt) {
4834     if (!GotDfmt) {
4835       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4836       if (Res != MatchOperand_NoMatch) {
4837         if (Res != MatchOperand_Success)
4838           return Res;
4839         if (Dfmt >= 16) {
4840           Error(Parser.getTok().getLoc(), "out of range dfmt");
4841           return MatchOperand_ParseFail;
4842         }
4843         GotDfmt = true;
4844         Parser.Lex();
4845         continue;
4846       }
4847     }
4848     if (!GotNfmt) {
4849       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4850       if (Res != MatchOperand_NoMatch) {
4851         if (Res != MatchOperand_Success)
4852           return Res;
4853         if (Nfmt >= 8) {
4854           Error(Parser.getTok().getLoc(), "out of range nfmt");
4855           return MatchOperand_ParseFail;
4856         }
4857         GotNfmt = true;
4858         Parser.Lex();
4859         continue;
4860       }
4861     }
4862     break;
4863   }
4864   if (!GotDfmt && !GotNfmt)
4865     return MatchOperand_NoMatch;
4866   auto Format = Dfmt | Nfmt << 4;
4867   Operands.push_back(
4868       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4869   return MatchOperand_Success;
4870 }
4871 
4872 //===----------------------------------------------------------------------===//
4873 // ds
4874 //===----------------------------------------------------------------------===//
4875 
4876 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4877                                     const OperandVector &Operands) {
4878   OptionalImmIndexMap OptionalIdx;
4879 
4880   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4881     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4882 
4883     // Add the register arguments
4884     if (Op.isReg()) {
4885       Op.addRegOperands(Inst, 1);
4886       continue;
4887     }
4888 
4889     // Handle optional arguments
4890     OptionalIdx[Op.getImmTy()] = i;
4891   }
4892 
4893   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4894   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4895   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4896 
4897   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4898 }
4899 
4900 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4901                                 bool IsGdsHardcoded) {
4902   OptionalImmIndexMap OptionalIdx;
4903 
4904   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4905     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4906 
4907     // Add the register arguments
4908     if (Op.isReg()) {
4909       Op.addRegOperands(Inst, 1);
4910       continue;
4911     }
4912 
4913     if (Op.isToken() && Op.getToken() == "gds") {
4914       IsGdsHardcoded = true;
4915       continue;
4916     }
4917 
4918     // Handle optional arguments
4919     OptionalIdx[Op.getImmTy()] = i;
4920   }
4921 
4922   AMDGPUOperand::ImmTy OffsetType =
4923     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4924      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4925      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4926                                                       AMDGPUOperand::ImmTyOffset;
4927 
4928   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4929 
4930   if (!IsGdsHardcoded) {
4931     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4932   }
4933   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4934 }
4935 
4936 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4937   OptionalImmIndexMap OptionalIdx;
4938 
4939   unsigned OperandIdx[4];
4940   unsigned EnMask = 0;
4941   int SrcIdx = 0;
4942 
4943   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4944     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4945 
4946     // Add the register arguments
4947     if (Op.isReg()) {
4948       assert(SrcIdx < 4);
4949       OperandIdx[SrcIdx] = Inst.size();
4950       Op.addRegOperands(Inst, 1);
4951       ++SrcIdx;
4952       continue;
4953     }
4954 
4955     if (Op.isOff()) {
4956       assert(SrcIdx < 4);
4957       OperandIdx[SrcIdx] = Inst.size();
4958       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4959       ++SrcIdx;
4960       continue;
4961     }
4962 
4963     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4964       Op.addImmOperands(Inst, 1);
4965       continue;
4966     }
4967 
4968     if (Op.isToken() && Op.getToken() == "done")
4969       continue;
4970 
4971     // Handle optional arguments
4972     OptionalIdx[Op.getImmTy()] = i;
4973   }
4974 
4975   assert(SrcIdx == 4);
4976 
4977   bool Compr = false;
4978   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4979     Compr = true;
4980     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4981     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4982     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4983   }
4984 
4985   for (auto i = 0; i < SrcIdx; ++i) {
4986     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4987       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4988     }
4989   }
4990 
4991   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4992   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4993 
4994   Inst.addOperand(MCOperand::createImm(EnMask));
4995 }
4996 
4997 //===----------------------------------------------------------------------===//
4998 // s_waitcnt
4999 //===----------------------------------------------------------------------===//
5000 
5001 static bool
5002 encodeCnt(
5003   const AMDGPU::IsaVersion ISA,
5004   int64_t &IntVal,
5005   int64_t CntVal,
5006   bool Saturate,
5007   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5008   unsigned (*decode)(const IsaVersion &Version, unsigned))
5009 {
5010   bool Failed = false;
5011 
5012   IntVal = encode(ISA, IntVal, CntVal);
5013   if (CntVal != decode(ISA, IntVal)) {
5014     if (Saturate) {
5015       IntVal = encode(ISA, IntVal, -1);
5016     } else {
5017       Failed = true;
5018     }
5019   }
5020   return Failed;
5021 }
5022 
5023 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5024 
5025   SMLoc CntLoc = getLoc();
5026   StringRef CntName = getTokenStr();
5027 
5028   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5029       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5030     return false;
5031 
5032   int64_t CntVal;
5033   SMLoc ValLoc = getLoc();
5034   if (!parseExpr(CntVal))
5035     return false;
5036 
5037   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5038 
5039   bool Failed = true;
5040   bool Sat = CntName.endswith("_sat");
5041 
5042   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5043     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5044   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5045     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5046   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5047     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5048   } else {
5049     Error(CntLoc, "invalid counter name " + CntName);
5050     return false;
5051   }
5052 
5053   if (Failed) {
5054     Error(ValLoc, "too large value for " + CntName);
5055     return false;
5056   }
5057 
5058   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5059     return false;
5060 
5061   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5062     if (isToken(AsmToken::EndOfStatement)) {
5063       Error(getLoc(), "expected a counter name");
5064       return false;
5065     }
5066   }
5067 
5068   return true;
5069 }
5070 
5071 OperandMatchResultTy
5072 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5073   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5074   int64_t Waitcnt = getWaitcntBitMask(ISA);
5075   SMLoc S = getLoc();
5076 
5077   // If parse failed, do not return error code
5078   // to avoid excessive error messages.
5079   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5080     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
5081   } else {
5082     parseExpr(Waitcnt);
5083   }
5084 
5085   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5086   return MatchOperand_Success;
5087 }
5088 
5089 bool
5090 AMDGPUOperand::isSWaitCnt() const {
5091   return isImm();
5092 }
5093 
5094 //===----------------------------------------------------------------------===//
5095 // hwreg
5096 //===----------------------------------------------------------------------===//
5097 
5098 bool
5099 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5100                                 int64_t &Offset,
5101                                 int64_t &Width) {
5102   using namespace llvm::AMDGPU::Hwreg;
5103 
5104   // The register may be specified by name or using a numeric code
5105   if (isToken(AsmToken::Identifier) &&
5106       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5107     HwReg.IsSymbolic = true;
5108     lex(); // skip message name
5109   } else if (!parseExpr(HwReg.Id)) {
5110     return false;
5111   }
5112 
5113   if (trySkipToken(AsmToken::RParen))
5114     return true;
5115 
5116   // parse optional params
5117   return
5118     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
5119     parseExpr(Offset) &&
5120     skipToken(AsmToken::Comma, "expected a comma") &&
5121     parseExpr(Width) &&
5122     skipToken(AsmToken::RParen, "expected a closing parenthesis");
5123 }
5124 
5125 bool
5126 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5127                                const int64_t Offset,
5128                                const int64_t Width,
5129                                const SMLoc Loc) {
5130 
5131   using namespace llvm::AMDGPU::Hwreg;
5132 
5133   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5134     Error(Loc, "specified hardware register is not supported on this GPU");
5135     return false;
5136   } else if (!isValidHwreg(HwReg.Id)) {
5137     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
5138     return false;
5139   } else if (!isValidHwregOffset(Offset)) {
5140     Error(Loc, "invalid bit offset: only 5-bit values are legal");
5141     return false;
5142   } else if (!isValidHwregWidth(Width)) {
5143     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
5144     return false;
5145   }
5146   return true;
5147 }
5148 
5149 OperandMatchResultTy
5150 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5151   using namespace llvm::AMDGPU::Hwreg;
5152 
5153   int64_t ImmVal = 0;
5154   SMLoc Loc = getLoc();
5155 
5156   // If parse failed, do not return error code
5157   // to avoid excessive error messages.
5158   if (trySkipId("hwreg", AsmToken::LParen)) {
5159     OperandInfoTy HwReg(ID_UNKNOWN_);
5160     int64_t Offset = OFFSET_DEFAULT_;
5161     int64_t Width = WIDTH_DEFAULT_;
5162     if (parseHwregBody(HwReg, Offset, Width) &&
5163         validateHwreg(HwReg, Offset, Width, Loc)) {
5164       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5165     }
5166   } else if (parseExpr(ImmVal)) {
5167     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5168       Error(Loc, "invalid immediate: only 16-bit values are legal");
5169   }
5170 
5171   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5172   return MatchOperand_Success;
5173 }
5174 
5175 bool AMDGPUOperand::isHwreg() const {
5176   return isImmTy(ImmTyHwreg);
5177 }
5178 
5179 //===----------------------------------------------------------------------===//
5180 // sendmsg
5181 //===----------------------------------------------------------------------===//
5182 
5183 bool
5184 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5185                                   OperandInfoTy &Op,
5186                                   OperandInfoTy &Stream) {
5187   using namespace llvm::AMDGPU::SendMsg;
5188 
5189   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5190     Msg.IsSymbolic = true;
5191     lex(); // skip message name
5192   } else if (!parseExpr(Msg.Id)) {
5193     return false;
5194   }
5195 
5196   if (trySkipToken(AsmToken::Comma)) {
5197     Op.IsDefined = true;
5198     if (isToken(AsmToken::Identifier) &&
5199         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5200       lex(); // skip operation name
5201     } else if (!parseExpr(Op.Id)) {
5202       return false;
5203     }
5204 
5205     if (trySkipToken(AsmToken::Comma)) {
5206       Stream.IsDefined = true;
5207       if (!parseExpr(Stream.Id))
5208         return false;
5209     }
5210   }
5211 
5212   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5213 }
5214 
5215 bool
5216 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5217                                  const OperandInfoTy &Op,
5218                                  const OperandInfoTy &Stream,
5219                                  const SMLoc S) {
5220   using namespace llvm::AMDGPU::SendMsg;
5221 
5222   // Validation strictness depends on whether message is specified
5223   // in a symbolc or in a numeric form. In the latter case
5224   // only encoding possibility is checked.
5225   bool Strict = Msg.IsSymbolic;
5226 
5227   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5228     Error(S, "invalid message id");
5229     return false;
5230   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5231     Error(S, Op.IsDefined ?
5232              "message does not support operations" :
5233              "missing message operation");
5234     return false;
5235   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5236     Error(S, "invalid operation id");
5237     return false;
5238   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5239     Error(S, "message operation does not support streams");
5240     return false;
5241   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5242     Error(S, "invalid message stream id");
5243     return false;
5244   }
5245   return true;
5246 }
5247 
5248 OperandMatchResultTy
5249 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5250   using namespace llvm::AMDGPU::SendMsg;
5251 
5252   int64_t ImmVal = 0;
5253   SMLoc Loc = getLoc();
5254 
5255   // If parse failed, do not return error code
5256   // to avoid excessive error messages.
5257   if (trySkipId("sendmsg", AsmToken::LParen)) {
5258     OperandInfoTy Msg(ID_UNKNOWN_);
5259     OperandInfoTy Op(OP_NONE_);
5260     OperandInfoTy Stream(STREAM_ID_NONE_);
5261     if (parseSendMsgBody(Msg, Op, Stream) &&
5262         validateSendMsg(Msg, Op, Stream, Loc)) {
5263       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5264     }
5265   } else if (parseExpr(ImmVal)) {
5266     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5267       Error(Loc, "invalid immediate: only 16-bit values are legal");
5268   }
5269 
5270   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5271   return MatchOperand_Success;
5272 }
5273 
5274 bool AMDGPUOperand::isSendMsg() const {
5275   return isImmTy(ImmTySendMsg);
5276 }
5277 
5278 //===----------------------------------------------------------------------===//
5279 // v_interp
5280 //===----------------------------------------------------------------------===//
5281 
5282 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5283   if (getLexer().getKind() != AsmToken::Identifier)
5284     return MatchOperand_NoMatch;
5285 
5286   StringRef Str = Parser.getTok().getString();
5287   int Slot = StringSwitch<int>(Str)
5288     .Case("p10", 0)
5289     .Case("p20", 1)
5290     .Case("p0", 2)
5291     .Default(-1);
5292 
5293   SMLoc S = Parser.getTok().getLoc();
5294   if (Slot == -1)
5295     return MatchOperand_ParseFail;
5296 
5297   Parser.Lex();
5298   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5299                                               AMDGPUOperand::ImmTyInterpSlot));
5300   return MatchOperand_Success;
5301 }
5302 
5303 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5304   if (getLexer().getKind() != AsmToken::Identifier)
5305     return MatchOperand_NoMatch;
5306 
5307   StringRef Str = Parser.getTok().getString();
5308   if (!Str.startswith("attr"))
5309     return MatchOperand_NoMatch;
5310 
5311   StringRef Chan = Str.take_back(2);
5312   int AttrChan = StringSwitch<int>(Chan)
5313     .Case(".x", 0)
5314     .Case(".y", 1)
5315     .Case(".z", 2)
5316     .Case(".w", 3)
5317     .Default(-1);
5318   if (AttrChan == -1)
5319     return MatchOperand_ParseFail;
5320 
5321   Str = Str.drop_back(2).drop_front(4);
5322 
5323   uint8_t Attr;
5324   if (Str.getAsInteger(10, Attr))
5325     return MatchOperand_ParseFail;
5326 
5327   SMLoc S = Parser.getTok().getLoc();
5328   Parser.Lex();
5329   if (Attr > 63) {
5330     Error(S, "out of bounds attr");
5331     return MatchOperand_Success;
5332   }
5333 
5334   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5335 
5336   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5337                                               AMDGPUOperand::ImmTyInterpAttr));
5338   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5339                                               AMDGPUOperand::ImmTyAttrChan));
5340   return MatchOperand_Success;
5341 }
5342 
5343 //===----------------------------------------------------------------------===//
5344 // exp
5345 //===----------------------------------------------------------------------===//
5346 
5347 void AMDGPUAsmParser::errorExpTgt() {
5348   Error(Parser.getTok().getLoc(), "invalid exp target");
5349 }
5350 
5351 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5352                                                       uint8_t &Val) {
5353   if (Str == "null") {
5354     Val = 9;
5355     return MatchOperand_Success;
5356   }
5357 
5358   if (Str.startswith("mrt")) {
5359     Str = Str.drop_front(3);
5360     if (Str == "z") { // == mrtz
5361       Val = 8;
5362       return MatchOperand_Success;
5363     }
5364 
5365     if (Str.getAsInteger(10, Val))
5366       return MatchOperand_ParseFail;
5367 
5368     if (Val > 7)
5369       errorExpTgt();
5370 
5371     return MatchOperand_Success;
5372   }
5373 
5374   if (Str.startswith("pos")) {
5375     Str = Str.drop_front(3);
5376     if (Str.getAsInteger(10, Val))
5377       return MatchOperand_ParseFail;
5378 
5379     if (Val > 4 || (Val == 4 && !isGFX10()))
5380       errorExpTgt();
5381 
5382     Val += 12;
5383     return MatchOperand_Success;
5384   }
5385 
5386   if (isGFX10() && Str == "prim") {
5387     Val = 20;
5388     return MatchOperand_Success;
5389   }
5390 
5391   if (Str.startswith("param")) {
5392     Str = Str.drop_front(5);
5393     if (Str.getAsInteger(10, Val))
5394       return MatchOperand_ParseFail;
5395 
5396     if (Val >= 32)
5397       errorExpTgt();
5398 
5399     Val += 32;
5400     return MatchOperand_Success;
5401   }
5402 
5403   if (Str.startswith("invalid_target_")) {
5404     Str = Str.drop_front(15);
5405     if (Str.getAsInteger(10, Val))
5406       return MatchOperand_ParseFail;
5407 
5408     errorExpTgt();
5409     return MatchOperand_Success;
5410   }
5411 
5412   return MatchOperand_NoMatch;
5413 }
5414 
5415 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5416   uint8_t Val;
5417   StringRef Str = Parser.getTok().getString();
5418 
5419   auto Res = parseExpTgtImpl(Str, Val);
5420   if (Res != MatchOperand_Success)
5421     return Res;
5422 
5423   SMLoc S = Parser.getTok().getLoc();
5424   Parser.Lex();
5425 
5426   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5427                                               AMDGPUOperand::ImmTyExpTgt));
5428   return MatchOperand_Success;
5429 }
5430 
5431 //===----------------------------------------------------------------------===//
5432 // parser helpers
5433 //===----------------------------------------------------------------------===//
5434 
5435 bool
5436 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5437   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5438 }
5439 
5440 bool
5441 AMDGPUAsmParser::isId(const StringRef Id) const {
5442   return isId(getToken(), Id);
5443 }
5444 
5445 bool
5446 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5447   return getTokenKind() == Kind;
5448 }
5449 
5450 bool
5451 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5452   if (isId(Id)) {
5453     lex();
5454     return true;
5455   }
5456   return false;
5457 }
5458 
5459 bool
5460 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5461   if (isId(Id) && peekToken().is(Kind)) {
5462     lex();
5463     lex();
5464     return true;
5465   }
5466   return false;
5467 }
5468 
5469 bool
5470 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5471   if (isToken(Kind)) {
5472     lex();
5473     return true;
5474   }
5475   return false;
5476 }
5477 
5478 bool
5479 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5480                            const StringRef ErrMsg) {
5481   if (!trySkipToken(Kind)) {
5482     Error(getLoc(), ErrMsg);
5483     return false;
5484   }
5485   return true;
5486 }
5487 
5488 bool
5489 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5490   return !getParser().parseAbsoluteExpression(Imm);
5491 }
5492 
5493 bool
5494 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5495   SMLoc S = getLoc();
5496 
5497   const MCExpr *Expr;
5498   if (Parser.parseExpression(Expr))
5499     return false;
5500 
5501   int64_t IntVal;
5502   if (Expr->evaluateAsAbsolute(IntVal)) {
5503     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5504   } else {
5505     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5506   }
5507   return true;
5508 }
5509 
5510 bool
5511 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5512   if (isToken(AsmToken::String)) {
5513     Val = getToken().getStringContents();
5514     lex();
5515     return true;
5516   } else {
5517     Error(getLoc(), ErrMsg);
5518     return false;
5519   }
5520 }
5521 
5522 AsmToken
5523 AMDGPUAsmParser::getToken() const {
5524   return Parser.getTok();
5525 }
5526 
5527 AsmToken
5528 AMDGPUAsmParser::peekToken() {
5529   return getLexer().peekTok();
5530 }
5531 
5532 void
5533 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5534   auto TokCount = getLexer().peekTokens(Tokens);
5535 
5536   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5537     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5538 }
5539 
5540 AsmToken::TokenKind
5541 AMDGPUAsmParser::getTokenKind() const {
5542   return getLexer().getKind();
5543 }
5544 
5545 SMLoc
5546 AMDGPUAsmParser::getLoc() const {
5547   return getToken().getLoc();
5548 }
5549 
5550 StringRef
5551 AMDGPUAsmParser::getTokenStr() const {
5552   return getToken().getString();
5553 }
5554 
5555 void
5556 AMDGPUAsmParser::lex() {
5557   Parser.Lex();
5558 }
5559 
5560 //===----------------------------------------------------------------------===//
5561 // swizzle
5562 //===----------------------------------------------------------------------===//
5563 
5564 LLVM_READNONE
5565 static unsigned
5566 encodeBitmaskPerm(const unsigned AndMask,
5567                   const unsigned OrMask,
5568                   const unsigned XorMask) {
5569   using namespace llvm::AMDGPU::Swizzle;
5570 
5571   return BITMASK_PERM_ENC |
5572          (AndMask << BITMASK_AND_SHIFT) |
5573          (OrMask  << BITMASK_OR_SHIFT)  |
5574          (XorMask << BITMASK_XOR_SHIFT);
5575 }
5576 
5577 bool
5578 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5579                                       const unsigned MinVal,
5580                                       const unsigned MaxVal,
5581                                       const StringRef ErrMsg) {
5582   for (unsigned i = 0; i < OpNum; ++i) {
5583     if (!skipToken(AsmToken::Comma, "expected a comma")){
5584       return false;
5585     }
5586     SMLoc ExprLoc = Parser.getTok().getLoc();
5587     if (!parseExpr(Op[i])) {
5588       return false;
5589     }
5590     if (Op[i] < MinVal || Op[i] > MaxVal) {
5591       Error(ExprLoc, ErrMsg);
5592       return false;
5593     }
5594   }
5595 
5596   return true;
5597 }
5598 
5599 bool
5600 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5601   using namespace llvm::AMDGPU::Swizzle;
5602 
5603   int64_t Lane[LANE_NUM];
5604   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5605                            "expected a 2-bit lane id")) {
5606     Imm = QUAD_PERM_ENC;
5607     for (unsigned I = 0; I < LANE_NUM; ++I) {
5608       Imm |= Lane[I] << (LANE_SHIFT * I);
5609     }
5610     return true;
5611   }
5612   return false;
5613 }
5614 
5615 bool
5616 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5617   using namespace llvm::AMDGPU::Swizzle;
5618 
5619   SMLoc S = Parser.getTok().getLoc();
5620   int64_t GroupSize;
5621   int64_t LaneIdx;
5622 
5623   if (!parseSwizzleOperands(1, &GroupSize,
5624                             2, 32,
5625                             "group size must be in the interval [2,32]")) {
5626     return false;
5627   }
5628   if (!isPowerOf2_64(GroupSize)) {
5629     Error(S, "group size must be a power of two");
5630     return false;
5631   }
5632   if (parseSwizzleOperands(1, &LaneIdx,
5633                            0, GroupSize - 1,
5634                            "lane id must be in the interval [0,group size - 1]")) {
5635     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5636     return true;
5637   }
5638   return false;
5639 }
5640 
5641 bool
5642 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5643   using namespace llvm::AMDGPU::Swizzle;
5644 
5645   SMLoc S = Parser.getTok().getLoc();
5646   int64_t GroupSize;
5647 
5648   if (!parseSwizzleOperands(1, &GroupSize,
5649       2, 32, "group size must be in the interval [2,32]")) {
5650     return false;
5651   }
5652   if (!isPowerOf2_64(GroupSize)) {
5653     Error(S, "group size must be a power of two");
5654     return false;
5655   }
5656 
5657   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5658   return true;
5659 }
5660 
5661 bool
5662 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5663   using namespace llvm::AMDGPU::Swizzle;
5664 
5665   SMLoc S = Parser.getTok().getLoc();
5666   int64_t GroupSize;
5667 
5668   if (!parseSwizzleOperands(1, &GroupSize,
5669       1, 16, "group size must be in the interval [1,16]")) {
5670     return false;
5671   }
5672   if (!isPowerOf2_64(GroupSize)) {
5673     Error(S, "group size must be a power of two");
5674     return false;
5675   }
5676 
5677   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5678   return true;
5679 }
5680 
5681 bool
5682 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5683   using namespace llvm::AMDGPU::Swizzle;
5684 
5685   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5686     return false;
5687   }
5688 
5689   StringRef Ctl;
5690   SMLoc StrLoc = Parser.getTok().getLoc();
5691   if (!parseString(Ctl)) {
5692     return false;
5693   }
5694   if (Ctl.size() != BITMASK_WIDTH) {
5695     Error(StrLoc, "expected a 5-character mask");
5696     return false;
5697   }
5698 
5699   unsigned AndMask = 0;
5700   unsigned OrMask = 0;
5701   unsigned XorMask = 0;
5702 
5703   for (size_t i = 0; i < Ctl.size(); ++i) {
5704     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5705     switch(Ctl[i]) {
5706     default:
5707       Error(StrLoc, "invalid mask");
5708       return false;
5709     case '0':
5710       break;
5711     case '1':
5712       OrMask |= Mask;
5713       break;
5714     case 'p':
5715       AndMask |= Mask;
5716       break;
5717     case 'i':
5718       AndMask |= Mask;
5719       XorMask |= Mask;
5720       break;
5721     }
5722   }
5723 
5724   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5725   return true;
5726 }
5727 
5728 bool
5729 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5730 
5731   SMLoc OffsetLoc = Parser.getTok().getLoc();
5732 
5733   if (!parseExpr(Imm)) {
5734     return false;
5735   }
5736   if (!isUInt<16>(Imm)) {
5737     Error(OffsetLoc, "expected a 16-bit offset");
5738     return false;
5739   }
5740   return true;
5741 }
5742 
5743 bool
5744 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5745   using namespace llvm::AMDGPU::Swizzle;
5746 
5747   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5748 
5749     SMLoc ModeLoc = Parser.getTok().getLoc();
5750     bool Ok = false;
5751 
5752     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5753       Ok = parseSwizzleQuadPerm(Imm);
5754     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5755       Ok = parseSwizzleBitmaskPerm(Imm);
5756     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5757       Ok = parseSwizzleBroadcast(Imm);
5758     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5759       Ok = parseSwizzleSwap(Imm);
5760     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5761       Ok = parseSwizzleReverse(Imm);
5762     } else {
5763       Error(ModeLoc, "expected a swizzle mode");
5764     }
5765 
5766     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5767   }
5768 
5769   return false;
5770 }
5771 
5772 OperandMatchResultTy
5773 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5774   SMLoc S = Parser.getTok().getLoc();
5775   int64_t Imm = 0;
5776 
5777   if (trySkipId("offset")) {
5778 
5779     bool Ok = false;
5780     if (skipToken(AsmToken::Colon, "expected a colon")) {
5781       if (trySkipId("swizzle")) {
5782         Ok = parseSwizzleMacro(Imm);
5783       } else {
5784         Ok = parseSwizzleOffset(Imm);
5785       }
5786     }
5787 
5788     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5789 
5790     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5791   } else {
5792     // Swizzle "offset" operand is optional.
5793     // If it is omitted, try parsing other optional operands.
5794     return parseOptionalOpr(Operands);
5795   }
5796 }
5797 
5798 bool
5799 AMDGPUOperand::isSwizzle() const {
5800   return isImmTy(ImmTySwizzle);
5801 }
5802 
5803 //===----------------------------------------------------------------------===//
5804 // VGPR Index Mode
5805 //===----------------------------------------------------------------------===//
5806 
5807 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5808 
5809   using namespace llvm::AMDGPU::VGPRIndexMode;
5810 
5811   if (trySkipToken(AsmToken::RParen)) {
5812     return OFF;
5813   }
5814 
5815   int64_t Imm = 0;
5816 
5817   while (true) {
5818     unsigned Mode = 0;
5819     SMLoc S = Parser.getTok().getLoc();
5820 
5821     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5822       if (trySkipId(IdSymbolic[ModeId])) {
5823         Mode = 1 << ModeId;
5824         break;
5825       }
5826     }
5827 
5828     if (Mode == 0) {
5829       Error(S, (Imm == 0)?
5830                "expected a VGPR index mode or a closing parenthesis" :
5831                "expected a VGPR index mode");
5832       break;
5833     }
5834 
5835     if (Imm & Mode) {
5836       Error(S, "duplicate VGPR index mode");
5837       break;
5838     }
5839     Imm |= Mode;
5840 
5841     if (trySkipToken(AsmToken::RParen))
5842       break;
5843     if (!skipToken(AsmToken::Comma,
5844                    "expected a comma or a closing parenthesis"))
5845       break;
5846   }
5847 
5848   return Imm;
5849 }
5850 
5851 OperandMatchResultTy
5852 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5853 
5854   int64_t Imm = 0;
5855   SMLoc S = Parser.getTok().getLoc();
5856 
5857   if (getLexer().getKind() == AsmToken::Identifier &&
5858       Parser.getTok().getString() == "gpr_idx" &&
5859       getLexer().peekTok().is(AsmToken::LParen)) {
5860 
5861     Parser.Lex();
5862     Parser.Lex();
5863 
5864     // If parse failed, trigger an error but do not return error code
5865     // to avoid excessive error messages.
5866     Imm = parseGPRIdxMacro();
5867 
5868   } else {
5869     if (getParser().parseAbsoluteExpression(Imm))
5870       return MatchOperand_NoMatch;
5871     if (Imm < 0 || !isUInt<4>(Imm)) {
5872       Error(S, "invalid immediate: only 4-bit values are legal");
5873     }
5874   }
5875 
5876   Operands.push_back(
5877       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5878   return MatchOperand_Success;
5879 }
5880 
5881 bool AMDGPUOperand::isGPRIdxMode() const {
5882   return isImmTy(ImmTyGprIdxMode);
5883 }
5884 
5885 //===----------------------------------------------------------------------===//
5886 // sopp branch targets
5887 //===----------------------------------------------------------------------===//
5888 
5889 OperandMatchResultTy
5890 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5891 
5892   // Make sure we are not parsing something
5893   // that looks like a label or an expression but is not.
5894   // This will improve error messages.
5895   if (isRegister() || isModifier())
5896     return MatchOperand_NoMatch;
5897 
5898   if (parseExpr(Operands)) {
5899 
5900     AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5901     assert(Opr.isImm() || Opr.isExpr());
5902     SMLoc Loc = Opr.getStartLoc();
5903 
5904     // Currently we do not support arbitrary expressions as branch targets.
5905     // Only labels and absolute expressions are accepted.
5906     if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5907       Error(Loc, "expected an absolute expression or a label");
5908     } else if (Opr.isImm() && !Opr.isS16Imm()) {
5909       Error(Loc, "expected a 16-bit signed jump offset");
5910     }
5911   }
5912 
5913   return MatchOperand_Success; // avoid excessive error messages
5914 }
5915 
5916 //===----------------------------------------------------------------------===//
5917 // Boolean holding registers
5918 //===----------------------------------------------------------------------===//
5919 
5920 OperandMatchResultTy
5921 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5922   return parseReg(Operands);
5923 }
5924 
5925 //===----------------------------------------------------------------------===//
5926 // mubuf
5927 //===----------------------------------------------------------------------===//
5928 
5929 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5930   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5931 }
5932 
5933 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5934   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5935 }
5936 
5937 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5938   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5939 }
5940 
5941 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5942                                const OperandVector &Operands,
5943                                bool IsAtomic,
5944                                bool IsAtomicReturn,
5945                                bool IsLds) {
5946   bool IsLdsOpcode = IsLds;
5947   bool HasLdsModifier = false;
5948   OptionalImmIndexMap OptionalIdx;
5949   assert(IsAtomicReturn ? IsAtomic : true);
5950   unsigned FirstOperandIdx = 1;
5951 
5952   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5953     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5954 
5955     // Add the register arguments
5956     if (Op.isReg()) {
5957       Op.addRegOperands(Inst, 1);
5958       // Insert a tied src for atomic return dst.
5959       // This cannot be postponed as subsequent calls to
5960       // addImmOperands rely on correct number of MC operands.
5961       if (IsAtomicReturn && i == FirstOperandIdx)
5962         Op.addRegOperands(Inst, 1);
5963       continue;
5964     }
5965 
5966     // Handle the case where soffset is an immediate
5967     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5968       Op.addImmOperands(Inst, 1);
5969       continue;
5970     }
5971 
5972     HasLdsModifier |= Op.isLDS();
5973 
5974     // Handle tokens like 'offen' which are sometimes hard-coded into the
5975     // asm string.  There are no MCInst operands for these.
5976     if (Op.isToken()) {
5977       continue;
5978     }
5979     assert(Op.isImm());
5980 
5981     // Handle optional arguments
5982     OptionalIdx[Op.getImmTy()] = i;
5983   }
5984 
5985   // This is a workaround for an llvm quirk which may result in an
5986   // incorrect instruction selection. Lds and non-lds versions of
5987   // MUBUF instructions are identical except that lds versions
5988   // have mandatory 'lds' modifier. However this modifier follows
5989   // optional modifiers and llvm asm matcher regards this 'lds'
5990   // modifier as an optional one. As a result, an lds version
5991   // of opcode may be selected even if it has no 'lds' modifier.
5992   if (IsLdsOpcode && !HasLdsModifier) {
5993     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5994     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5995       Inst.setOpcode(NoLdsOpcode);
5996       IsLdsOpcode = false;
5997     }
5998   }
5999 
6000   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6001   if (!IsAtomic) { // glc is hard-coded.
6002     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6003   }
6004   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6005 
6006   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6007     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6008   }
6009 
6010   if (isGFX10())
6011     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6012 }
6013 
6014 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6015   OptionalImmIndexMap OptionalIdx;
6016 
6017   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6018     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6019 
6020     // Add the register arguments
6021     if (Op.isReg()) {
6022       Op.addRegOperands(Inst, 1);
6023       continue;
6024     }
6025 
6026     // Handle the case where soffset is an immediate
6027     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6028       Op.addImmOperands(Inst, 1);
6029       continue;
6030     }
6031 
6032     // Handle tokens like 'offen' which are sometimes hard-coded into the
6033     // asm string.  There are no MCInst operands for these.
6034     if (Op.isToken()) {
6035       continue;
6036     }
6037     assert(Op.isImm());
6038 
6039     // Handle optional arguments
6040     OptionalIdx[Op.getImmTy()] = i;
6041   }
6042 
6043   addOptionalImmOperand(Inst, Operands, OptionalIdx,
6044                         AMDGPUOperand::ImmTyOffset);
6045   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6046   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6047   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6048   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6049 
6050   if (isGFX10())
6051     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6052 }
6053 
6054 //===----------------------------------------------------------------------===//
6055 // mimg
6056 //===----------------------------------------------------------------------===//
6057 
6058 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6059                               bool IsAtomic) {
6060   unsigned I = 1;
6061   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6062   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6063     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6064   }
6065 
6066   if (IsAtomic) {
6067     // Add src, same as dst
6068     assert(Desc.getNumDefs() == 1);
6069     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6070   }
6071 
6072   OptionalImmIndexMap OptionalIdx;
6073 
6074   for (unsigned E = Operands.size(); I != E; ++I) {
6075     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6076 
6077     // Add the register arguments
6078     if (Op.isReg()) {
6079       Op.addRegOperands(Inst, 1);
6080     } else if (Op.isImmModifier()) {
6081       OptionalIdx[Op.getImmTy()] = I;
6082     } else if (!Op.isToken()) {
6083       llvm_unreachable("unexpected operand type");
6084     }
6085   }
6086 
6087   bool IsGFX10 = isGFX10();
6088 
6089   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6090   if (IsGFX10)
6091     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6092   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6093   if (IsGFX10)
6094     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6095   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6096   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6097   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6098   if (IsGFX10)
6099     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6100   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6101   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6102   if (!IsGFX10)
6103     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6104   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6105 }
6106 
6107 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6108   cvtMIMG(Inst, Operands, true);
6109 }
6110 
6111 //===----------------------------------------------------------------------===//
6112 // smrd
6113 //===----------------------------------------------------------------------===//
6114 
6115 bool AMDGPUOperand::isSMRDOffset8() const {
6116   return isImm() && isUInt<8>(getImm());
6117 }
6118 
6119 bool AMDGPUOperand::isSMEMOffset() const {
6120   return isImm(); // Offset range is checked later by validator.
6121 }
6122 
6123 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6124   // 32-bit literals are only supported on CI and we only want to use them
6125   // when the offset is > 8-bits.
6126   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6127 }
6128 
6129 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6130   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6131 }
6132 
6133 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
6134   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6135 }
6136 
6137 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6138   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6139 }
6140 
6141 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6142   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6143 }
6144 
6145 //===----------------------------------------------------------------------===//
6146 // vop3
6147 //===----------------------------------------------------------------------===//
6148 
6149 static bool ConvertOmodMul(int64_t &Mul) {
6150   if (Mul != 1 && Mul != 2 && Mul != 4)
6151     return false;
6152 
6153   Mul >>= 1;
6154   return true;
6155 }
6156 
6157 static bool ConvertOmodDiv(int64_t &Div) {
6158   if (Div == 1) {
6159     Div = 0;
6160     return true;
6161   }
6162 
6163   if (Div == 2) {
6164     Div = 3;
6165     return true;
6166   }
6167 
6168   return false;
6169 }
6170 
6171 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6172   if (BoundCtrl == 0) {
6173     BoundCtrl = 1;
6174     return true;
6175   }
6176 
6177   if (BoundCtrl == -1) {
6178     BoundCtrl = 0;
6179     return true;
6180   }
6181 
6182   return false;
6183 }
6184 
6185 // Note: the order in this table matches the order of operands in AsmString.
6186 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6187   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6188   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6189   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6190   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6191   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6192   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6193   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6194   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6195   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6196   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6197   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
6198   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6199   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6200   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6201   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6202   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6203   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6204   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6205   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6206   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6207   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6208   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6209   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6210   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6211   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6212   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6213   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6214   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6215   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6216   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6217   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6218   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6219   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6220   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6221   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6222   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6223   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6224   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6225   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6226   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6227   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6228   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6229   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6230   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6231 };
6232 
6233 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6234 
6235   OperandMatchResultTy res = parseOptionalOpr(Operands);
6236 
6237   // This is a hack to enable hardcoded mandatory operands which follow
6238   // optional operands.
6239   //
6240   // Current design assumes that all operands after the first optional operand
6241   // are also optional. However implementation of some instructions violates
6242   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6243   //
6244   // To alleviate this problem, we have to (implicitly) parse extra operands
6245   // to make sure autogenerated parser of custom operands never hit hardcoded
6246   // mandatory operands.
6247 
6248   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6249     if (res != MatchOperand_Success ||
6250         isToken(AsmToken::EndOfStatement))
6251       break;
6252 
6253     trySkipToken(AsmToken::Comma);
6254     res = parseOptionalOpr(Operands);
6255   }
6256 
6257   return res;
6258 }
6259 
6260 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6261   OperandMatchResultTy res;
6262   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6263     // try to parse any optional operand here
6264     if (Op.IsBit) {
6265       res = parseNamedBit(Op.Name, Operands, Op.Type);
6266     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6267       res = parseOModOperand(Operands);
6268     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6269                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6270                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6271       res = parseSDWASel(Operands, Op.Name, Op.Type);
6272     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6273       res = parseSDWADstUnused(Operands);
6274     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6275                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6276                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6277                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6278       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6279                                         Op.ConvertResult);
6280     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6281       res = parseDim(Operands);
6282     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6283       res = parseDfmtNfmt(Operands);
6284     } else {
6285       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6286     }
6287     if (res != MatchOperand_NoMatch) {
6288       return res;
6289     }
6290   }
6291   return MatchOperand_NoMatch;
6292 }
6293 
6294 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6295   StringRef Name = Parser.getTok().getString();
6296   if (Name == "mul") {
6297     return parseIntWithPrefix("mul", Operands,
6298                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6299   }
6300 
6301   if (Name == "div") {
6302     return parseIntWithPrefix("div", Operands,
6303                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6304   }
6305 
6306   return MatchOperand_NoMatch;
6307 }
6308 
6309 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6310   cvtVOP3P(Inst, Operands);
6311 
6312   int Opc = Inst.getOpcode();
6313 
6314   int SrcNum;
6315   const int Ops[] = { AMDGPU::OpName::src0,
6316                       AMDGPU::OpName::src1,
6317                       AMDGPU::OpName::src2 };
6318   for (SrcNum = 0;
6319        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6320        ++SrcNum);
6321   assert(SrcNum > 0);
6322 
6323   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6324   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6325 
6326   if ((OpSel & (1 << SrcNum)) != 0) {
6327     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6328     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6329     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6330   }
6331 }
6332 
6333 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6334       // 1. This operand is input modifiers
6335   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6336       // 2. This is not last operand
6337       && Desc.NumOperands > (OpNum + 1)
6338       // 3. Next operand is register class
6339       && Desc.OpInfo[OpNum + 1].RegClass != -1
6340       // 4. Next register is not tied to any other operand
6341       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6342 }
6343 
6344 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6345 {
6346   OptionalImmIndexMap OptionalIdx;
6347   unsigned Opc = Inst.getOpcode();
6348 
6349   unsigned I = 1;
6350   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6351   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6352     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6353   }
6354 
6355   for (unsigned E = Operands.size(); I != E; ++I) {
6356     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6357     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6358       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6359     } else if (Op.isInterpSlot() ||
6360                Op.isInterpAttr() ||
6361                Op.isAttrChan()) {
6362       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6363     } else if (Op.isImmModifier()) {
6364       OptionalIdx[Op.getImmTy()] = I;
6365     } else {
6366       llvm_unreachable("unhandled operand type");
6367     }
6368   }
6369 
6370   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6371     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6372   }
6373 
6374   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6375     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6376   }
6377 
6378   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6379     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6380   }
6381 }
6382 
6383 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6384                               OptionalImmIndexMap &OptionalIdx) {
6385   unsigned Opc = Inst.getOpcode();
6386 
6387   unsigned I = 1;
6388   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6389   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6390     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6391   }
6392 
6393   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6394     // This instruction has src modifiers
6395     for (unsigned E = Operands.size(); I != E; ++I) {
6396       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6397       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6398         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6399       } else if (Op.isImmModifier()) {
6400         OptionalIdx[Op.getImmTy()] = I;
6401       } else if (Op.isRegOrImm()) {
6402         Op.addRegOrImmOperands(Inst, 1);
6403       } else {
6404         llvm_unreachable("unhandled operand type");
6405       }
6406     }
6407   } else {
6408     // No src modifiers
6409     for (unsigned E = Operands.size(); I != E; ++I) {
6410       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6411       if (Op.isMod()) {
6412         OptionalIdx[Op.getImmTy()] = I;
6413       } else {
6414         Op.addRegOrImmOperands(Inst, 1);
6415       }
6416     }
6417   }
6418 
6419   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6420     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6421   }
6422 
6423   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6424     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6425   }
6426 
6427   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6428   // it has src2 register operand that is tied to dst operand
6429   // we don't allow modifiers for this operand in assembler so src2_modifiers
6430   // should be 0.
6431   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6432       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6433       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6434       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6435       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6436       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6437       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6438     auto it = Inst.begin();
6439     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6440     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6441     ++it;
6442     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6443   }
6444 }
6445 
6446 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6447   OptionalImmIndexMap OptionalIdx;
6448   cvtVOP3(Inst, Operands, OptionalIdx);
6449 }
6450 
6451 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6452                                const OperandVector &Operands) {
6453   OptionalImmIndexMap OptIdx;
6454   const int Opc = Inst.getOpcode();
6455   const MCInstrDesc &Desc = MII.get(Opc);
6456 
6457   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6458 
6459   cvtVOP3(Inst, Operands, OptIdx);
6460 
6461   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6462     assert(!IsPacked);
6463     Inst.addOperand(Inst.getOperand(0));
6464   }
6465 
6466   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6467   // instruction, and then figure out where to actually put the modifiers
6468 
6469   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6470 
6471   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6472   if (OpSelHiIdx != -1) {
6473     int DefaultVal = IsPacked ? -1 : 0;
6474     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6475                           DefaultVal);
6476   }
6477 
6478   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6479   if (NegLoIdx != -1) {
6480     assert(IsPacked);
6481     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6482     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6483   }
6484 
6485   const int Ops[] = { AMDGPU::OpName::src0,
6486                       AMDGPU::OpName::src1,
6487                       AMDGPU::OpName::src2 };
6488   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6489                          AMDGPU::OpName::src1_modifiers,
6490                          AMDGPU::OpName::src2_modifiers };
6491 
6492   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6493 
6494   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6495   unsigned OpSelHi = 0;
6496   unsigned NegLo = 0;
6497   unsigned NegHi = 0;
6498 
6499   if (OpSelHiIdx != -1) {
6500     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6501   }
6502 
6503   if (NegLoIdx != -1) {
6504     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6505     NegLo = Inst.getOperand(NegLoIdx).getImm();
6506     NegHi = Inst.getOperand(NegHiIdx).getImm();
6507   }
6508 
6509   for (int J = 0; J < 3; ++J) {
6510     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6511     if (OpIdx == -1)
6512       break;
6513 
6514     uint32_t ModVal = 0;
6515 
6516     if ((OpSel & (1 << J)) != 0)
6517       ModVal |= SISrcMods::OP_SEL_0;
6518 
6519     if ((OpSelHi & (1 << J)) != 0)
6520       ModVal |= SISrcMods::OP_SEL_1;
6521 
6522     if ((NegLo & (1 << J)) != 0)
6523       ModVal |= SISrcMods::NEG;
6524 
6525     if ((NegHi & (1 << J)) != 0)
6526       ModVal |= SISrcMods::NEG_HI;
6527 
6528     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6529 
6530     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6531   }
6532 }
6533 
6534 //===----------------------------------------------------------------------===//
6535 // dpp
6536 //===----------------------------------------------------------------------===//
6537 
6538 bool AMDGPUOperand::isDPP8() const {
6539   return isImmTy(ImmTyDPP8);
6540 }
6541 
6542 bool AMDGPUOperand::isDPPCtrl() const {
6543   using namespace AMDGPU::DPP;
6544 
6545   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6546   if (result) {
6547     int64_t Imm = getImm();
6548     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6549            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6550            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6551            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6552            (Imm == DppCtrl::WAVE_SHL1) ||
6553            (Imm == DppCtrl::WAVE_ROL1) ||
6554            (Imm == DppCtrl::WAVE_SHR1) ||
6555            (Imm == DppCtrl::WAVE_ROR1) ||
6556            (Imm == DppCtrl::ROW_MIRROR) ||
6557            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6558            (Imm == DppCtrl::BCAST15) ||
6559            (Imm == DppCtrl::BCAST31) ||
6560            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6561            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6562   }
6563   return false;
6564 }
6565 
6566 //===----------------------------------------------------------------------===//
6567 // mAI
6568 //===----------------------------------------------------------------------===//
6569 
6570 bool AMDGPUOperand::isBLGP() const {
6571   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6572 }
6573 
6574 bool AMDGPUOperand::isCBSZ() const {
6575   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6576 }
6577 
6578 bool AMDGPUOperand::isABID() const {
6579   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6580 }
6581 
6582 bool AMDGPUOperand::isS16Imm() const {
6583   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6584 }
6585 
6586 bool AMDGPUOperand::isU16Imm() const {
6587   return isImm() && isUInt<16>(getImm());
6588 }
6589 
6590 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6591   if (!isGFX10())
6592     return MatchOperand_NoMatch;
6593 
6594   SMLoc S = Parser.getTok().getLoc();
6595 
6596   if (getLexer().isNot(AsmToken::Identifier))
6597     return MatchOperand_NoMatch;
6598   if (getLexer().getTok().getString() != "dim")
6599     return MatchOperand_NoMatch;
6600 
6601   Parser.Lex();
6602   if (getLexer().isNot(AsmToken::Colon))
6603     return MatchOperand_ParseFail;
6604 
6605   Parser.Lex();
6606 
6607   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6608   // integer.
6609   std::string Token;
6610   if (getLexer().is(AsmToken::Integer)) {
6611     SMLoc Loc = getLexer().getTok().getEndLoc();
6612     Token = std::string(getLexer().getTok().getString());
6613     Parser.Lex();
6614     if (getLexer().getTok().getLoc() != Loc)
6615       return MatchOperand_ParseFail;
6616   }
6617   if (getLexer().isNot(AsmToken::Identifier))
6618     return MatchOperand_ParseFail;
6619   Token += getLexer().getTok().getString();
6620 
6621   StringRef DimId = Token;
6622   if (DimId.startswith("SQ_RSRC_IMG_"))
6623     DimId = DimId.substr(12);
6624 
6625   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6626   if (!DimInfo)
6627     return MatchOperand_ParseFail;
6628 
6629   Parser.Lex();
6630 
6631   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6632                                               AMDGPUOperand::ImmTyDim));
6633   return MatchOperand_Success;
6634 }
6635 
6636 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6637   SMLoc S = Parser.getTok().getLoc();
6638   StringRef Prefix;
6639 
6640   if (getLexer().getKind() == AsmToken::Identifier) {
6641     Prefix = Parser.getTok().getString();
6642   } else {
6643     return MatchOperand_NoMatch;
6644   }
6645 
6646   if (Prefix != "dpp8")
6647     return parseDPPCtrl(Operands);
6648   if (!isGFX10())
6649     return MatchOperand_NoMatch;
6650 
6651   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6652 
6653   int64_t Sels[8];
6654 
6655   Parser.Lex();
6656   if (getLexer().isNot(AsmToken::Colon))
6657     return MatchOperand_ParseFail;
6658 
6659   Parser.Lex();
6660   if (getLexer().isNot(AsmToken::LBrac))
6661     return MatchOperand_ParseFail;
6662 
6663   Parser.Lex();
6664   if (getParser().parseAbsoluteExpression(Sels[0]))
6665     return MatchOperand_ParseFail;
6666   if (0 > Sels[0] || 7 < Sels[0])
6667     return MatchOperand_ParseFail;
6668 
6669   for (size_t i = 1; i < 8; ++i) {
6670     if (getLexer().isNot(AsmToken::Comma))
6671       return MatchOperand_ParseFail;
6672 
6673     Parser.Lex();
6674     if (getParser().parseAbsoluteExpression(Sels[i]))
6675       return MatchOperand_ParseFail;
6676     if (0 > Sels[i] || 7 < Sels[i])
6677       return MatchOperand_ParseFail;
6678   }
6679 
6680   if (getLexer().isNot(AsmToken::RBrac))
6681     return MatchOperand_ParseFail;
6682   Parser.Lex();
6683 
6684   unsigned DPP8 = 0;
6685   for (size_t i = 0; i < 8; ++i)
6686     DPP8 |= (Sels[i] << (i * 3));
6687 
6688   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6689   return MatchOperand_Success;
6690 }
6691 
6692 OperandMatchResultTy
6693 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6694   using namespace AMDGPU::DPP;
6695 
6696   SMLoc S = Parser.getTok().getLoc();
6697   StringRef Prefix;
6698   int64_t Int;
6699 
6700   if (getLexer().getKind() == AsmToken::Identifier) {
6701     Prefix = Parser.getTok().getString();
6702   } else {
6703     return MatchOperand_NoMatch;
6704   }
6705 
6706   if (Prefix == "row_mirror") {
6707     Int = DppCtrl::ROW_MIRROR;
6708     Parser.Lex();
6709   } else if (Prefix == "row_half_mirror") {
6710     Int = DppCtrl::ROW_HALF_MIRROR;
6711     Parser.Lex();
6712   } else {
6713     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6714     if (Prefix != "quad_perm"
6715         && Prefix != "row_shl"
6716         && Prefix != "row_shr"
6717         && Prefix != "row_ror"
6718         && Prefix != "wave_shl"
6719         && Prefix != "wave_rol"
6720         && Prefix != "wave_shr"
6721         && Prefix != "wave_ror"
6722         && Prefix != "row_bcast"
6723         && Prefix != "row_share"
6724         && Prefix != "row_xmask") {
6725       return MatchOperand_NoMatch;
6726     }
6727 
6728     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6729       return MatchOperand_NoMatch;
6730 
6731     if (!isVI() && !isGFX9() &&
6732         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6733          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6734          Prefix == "row_bcast"))
6735       return MatchOperand_NoMatch;
6736 
6737     Parser.Lex();
6738     if (getLexer().isNot(AsmToken::Colon))
6739       return MatchOperand_ParseFail;
6740 
6741     if (Prefix == "quad_perm") {
6742       // quad_perm:[%d,%d,%d,%d]
6743       Parser.Lex();
6744       if (getLexer().isNot(AsmToken::LBrac))
6745         return MatchOperand_ParseFail;
6746       Parser.Lex();
6747 
6748       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6749         return MatchOperand_ParseFail;
6750 
6751       for (int i = 0; i < 3; ++i) {
6752         if (getLexer().isNot(AsmToken::Comma))
6753           return MatchOperand_ParseFail;
6754         Parser.Lex();
6755 
6756         int64_t Temp;
6757         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6758           return MatchOperand_ParseFail;
6759         const int shift = i*2 + 2;
6760         Int += (Temp << shift);
6761       }
6762 
6763       if (getLexer().isNot(AsmToken::RBrac))
6764         return MatchOperand_ParseFail;
6765       Parser.Lex();
6766     } else {
6767       // sel:%d
6768       Parser.Lex();
6769       if (getParser().parseAbsoluteExpression(Int))
6770         return MatchOperand_ParseFail;
6771 
6772       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6773         Int |= DppCtrl::ROW_SHL0;
6774       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6775         Int |= DppCtrl::ROW_SHR0;
6776       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6777         Int |= DppCtrl::ROW_ROR0;
6778       } else if (Prefix == "wave_shl" && 1 == Int) {
6779         Int = DppCtrl::WAVE_SHL1;
6780       } else if (Prefix == "wave_rol" && 1 == Int) {
6781         Int = DppCtrl::WAVE_ROL1;
6782       } else if (Prefix == "wave_shr" && 1 == Int) {
6783         Int = DppCtrl::WAVE_SHR1;
6784       } else if (Prefix == "wave_ror" && 1 == Int) {
6785         Int = DppCtrl::WAVE_ROR1;
6786       } else if (Prefix == "row_bcast") {
6787         if (Int == 15) {
6788           Int = DppCtrl::BCAST15;
6789         } else if (Int == 31) {
6790           Int = DppCtrl::BCAST31;
6791         } else {
6792           return MatchOperand_ParseFail;
6793         }
6794       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6795         Int |= DppCtrl::ROW_SHARE_FIRST;
6796       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6797         Int |= DppCtrl::ROW_XMASK_FIRST;
6798       } else {
6799         return MatchOperand_ParseFail;
6800       }
6801     }
6802   }
6803 
6804   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6805   return MatchOperand_Success;
6806 }
6807 
6808 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6809   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6810 }
6811 
6812 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6813   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6814 }
6815 
6816 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6817   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6818 }
6819 
6820 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6821   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6822 }
6823 
6824 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6825   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6826 }
6827 
6828 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6829   OptionalImmIndexMap OptionalIdx;
6830 
6831   unsigned I = 1;
6832   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6833   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6834     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6835   }
6836 
6837   int Fi = 0;
6838   for (unsigned E = Operands.size(); I != E; ++I) {
6839     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6840                                             MCOI::TIED_TO);
6841     if (TiedTo != -1) {
6842       assert((unsigned)TiedTo < Inst.getNumOperands());
6843       // handle tied old or src2 for MAC instructions
6844       Inst.addOperand(Inst.getOperand(TiedTo));
6845     }
6846     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6847     // Add the register arguments
6848     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6849       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6850       // Skip it.
6851       continue;
6852     }
6853 
6854     if (IsDPP8) {
6855       if (Op.isDPP8()) {
6856         Op.addImmOperands(Inst, 1);
6857       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6858         Op.addRegWithFPInputModsOperands(Inst, 2);
6859       } else if (Op.isFI()) {
6860         Fi = Op.getImm();
6861       } else if (Op.isReg()) {
6862         Op.addRegOperands(Inst, 1);
6863       } else {
6864         llvm_unreachable("Invalid operand type");
6865       }
6866     } else {
6867       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6868         Op.addRegWithFPInputModsOperands(Inst, 2);
6869       } else if (Op.isDPPCtrl()) {
6870         Op.addImmOperands(Inst, 1);
6871       } else if (Op.isImm()) {
6872         // Handle optional arguments
6873         OptionalIdx[Op.getImmTy()] = I;
6874       } else {
6875         llvm_unreachable("Invalid operand type");
6876       }
6877     }
6878   }
6879 
6880   if (IsDPP8) {
6881     using namespace llvm::AMDGPU::DPP;
6882     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6883   } else {
6884     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6885     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6886     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6887     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6888       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6889     }
6890   }
6891 }
6892 
6893 //===----------------------------------------------------------------------===//
6894 // sdwa
6895 //===----------------------------------------------------------------------===//
6896 
6897 OperandMatchResultTy
6898 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6899                               AMDGPUOperand::ImmTy Type) {
6900   using namespace llvm::AMDGPU::SDWA;
6901 
6902   SMLoc S = Parser.getTok().getLoc();
6903   StringRef Value;
6904   OperandMatchResultTy res;
6905 
6906   res = parseStringWithPrefix(Prefix, Value);
6907   if (res != MatchOperand_Success) {
6908     return res;
6909   }
6910 
6911   int64_t Int;
6912   Int = StringSwitch<int64_t>(Value)
6913         .Case("BYTE_0", SdwaSel::BYTE_0)
6914         .Case("BYTE_1", SdwaSel::BYTE_1)
6915         .Case("BYTE_2", SdwaSel::BYTE_2)
6916         .Case("BYTE_3", SdwaSel::BYTE_3)
6917         .Case("WORD_0", SdwaSel::WORD_0)
6918         .Case("WORD_1", SdwaSel::WORD_1)
6919         .Case("DWORD", SdwaSel::DWORD)
6920         .Default(0xffffffff);
6921   Parser.Lex(); // eat last token
6922 
6923   if (Int == 0xffffffff) {
6924     return MatchOperand_ParseFail;
6925   }
6926 
6927   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6928   return MatchOperand_Success;
6929 }
6930 
6931 OperandMatchResultTy
6932 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6933   using namespace llvm::AMDGPU::SDWA;
6934 
6935   SMLoc S = Parser.getTok().getLoc();
6936   StringRef Value;
6937   OperandMatchResultTy res;
6938 
6939   res = parseStringWithPrefix("dst_unused", Value);
6940   if (res != MatchOperand_Success) {
6941     return res;
6942   }
6943 
6944   int64_t Int;
6945   Int = StringSwitch<int64_t>(Value)
6946         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6947         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6948         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6949         .Default(0xffffffff);
6950   Parser.Lex(); // eat last token
6951 
6952   if (Int == 0xffffffff) {
6953     return MatchOperand_ParseFail;
6954   }
6955 
6956   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6957   return MatchOperand_Success;
6958 }
6959 
6960 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6961   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6962 }
6963 
6964 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6965   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6966 }
6967 
6968 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6969   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
6970 }
6971 
6972 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
6973   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
6974 }
6975 
6976 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6977   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6978 }
6979 
6980 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6981                               uint64_t BasicInstType,
6982                               bool SkipDstVcc,
6983                               bool SkipSrcVcc) {
6984   using namespace llvm::AMDGPU::SDWA;
6985 
6986   OptionalImmIndexMap OptionalIdx;
6987   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
6988   bool SkippedVcc = false;
6989 
6990   unsigned I = 1;
6991   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6992   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6993     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6994   }
6995 
6996   for (unsigned E = Operands.size(); I != E; ++I) {
6997     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6998     if (SkipVcc && !SkippedVcc && Op.isReg() &&
6999         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7000       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7001       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7002       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7003       // Skip VCC only if we didn't skip it on previous iteration.
7004       // Note that src0 and src1 occupy 2 slots each because of modifiers.
7005       if (BasicInstType == SIInstrFlags::VOP2 &&
7006           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7007            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7008         SkippedVcc = true;
7009         continue;
7010       } else if (BasicInstType == SIInstrFlags::VOPC &&
7011                  Inst.getNumOperands() == 0) {
7012         SkippedVcc = true;
7013         continue;
7014       }
7015     }
7016     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7017       Op.addRegOrImmWithInputModsOperands(Inst, 2);
7018     } else if (Op.isImm()) {
7019       // Handle optional arguments
7020       OptionalIdx[Op.getImmTy()] = I;
7021     } else {
7022       llvm_unreachable("Invalid operand type");
7023     }
7024     SkippedVcc = false;
7025   }
7026 
7027   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7028       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7029       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7030     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7031     switch (BasicInstType) {
7032     case SIInstrFlags::VOP1:
7033       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7034       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7035         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7036       }
7037       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7038       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7039       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7040       break;
7041 
7042     case SIInstrFlags::VOP2:
7043       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7044       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7045         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7046       }
7047       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7048       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7049       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7050       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7051       break;
7052 
7053     case SIInstrFlags::VOPC:
7054       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7055         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7056       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7057       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7058       break;
7059 
7060     default:
7061       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7062     }
7063   }
7064 
7065   // special case v_mac_{f16, f32}:
7066   // it has src2 register operand that is tied to dst operand
7067   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7068       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7069     auto it = Inst.begin();
7070     std::advance(
7071       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7072     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7073   }
7074 }
7075 
7076 //===----------------------------------------------------------------------===//
7077 // mAI
7078 //===----------------------------------------------------------------------===//
7079 
7080 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7081   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7082 }
7083 
7084 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7085   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7086 }
7087 
7088 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7089   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7090 }
7091 
7092 /// Force static initialization.
7093 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7094   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7095   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7096 }
7097 
7098 #define GET_REGISTER_MATCHER
7099 #define GET_MATCHER_IMPLEMENTATION
7100 #define GET_MNEMONIC_SPELL_CHECKER
7101 #include "AMDGPUGenAsmMatcher.inc"
7102 
7103 // This fuction should be defined after auto-generated include so that we have
7104 // MatchClassKind enum defined
7105 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7106                                                      unsigned Kind) {
7107   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7108   // But MatchInstructionImpl() expects to meet token and fails to validate
7109   // operand. This method checks if we are given immediate operand but expect to
7110   // get corresponding token.
7111   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7112   switch (Kind) {
7113   case MCK_addr64:
7114     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7115   case MCK_gds:
7116     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7117   case MCK_lds:
7118     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7119   case MCK_glc:
7120     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7121   case MCK_idxen:
7122     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7123   case MCK_offen:
7124     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7125   case MCK_SSrcB32:
7126     // When operands have expression values, they will return true for isToken,
7127     // because it is not possible to distinguish between a token and an
7128     // expression at parse time. MatchInstructionImpl() will always try to
7129     // match an operand as a token, when isToken returns true, and when the
7130     // name of the expression is not a valid token, the match will fail,
7131     // so we need to handle it here.
7132     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7133   case MCK_SSrcF32:
7134     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7135   case MCK_SoppBrTarget:
7136     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7137   case MCK_VReg32OrOff:
7138     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7139   case MCK_InterpSlot:
7140     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7141   case MCK_Attr:
7142     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7143   case MCK_AttrChan:
7144     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7145   case MCK_ImmSMEMOffset:
7146     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
7147   case MCK_SReg_64:
7148   case MCK_SReg_64_XEXEC:
7149     // Null is defined as a 32-bit register but
7150     // it should also be enabled with 64-bit operands.
7151     // The following code enables it for SReg_64 operands
7152     // used as source and destination. Remaining source
7153     // operands are handled in isInlinableImm.
7154     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7155   default:
7156     return Match_InvalidOperand;
7157   }
7158 }
7159 
7160 //===----------------------------------------------------------------------===//
7161 // endpgm
7162 //===----------------------------------------------------------------------===//
7163 
7164 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7165   SMLoc S = Parser.getTok().getLoc();
7166   int64_t Imm = 0;
7167 
7168   if (!parseExpr(Imm)) {
7169     // The operand is optional, if not present default to 0
7170     Imm = 0;
7171   }
7172 
7173   if (!isUInt<16>(Imm)) {
7174     Error(S, "expected a 16-bit value");
7175     return MatchOperand_ParseFail;
7176   }
7177 
7178   Operands.push_back(
7179       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7180   return MatchOperand_Success;
7181 }
7182 
7183 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7184