1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTyTFE,
147     ImmTyD16,
148     ImmTyClampSI,
149     ImmTyOModSI,
150     ImmTyDppCtrl,
151     ImmTyDppRowMask,
152     ImmTyDppBankMask,
153     ImmTyDppBoundCtrl,
154     ImmTySdwaDstSel,
155     ImmTySdwaSrc0Sel,
156     ImmTySdwaSrc1Sel,
157     ImmTySdwaDstUnused,
158     ImmTyDMask,
159     ImmTyDim,
160     ImmTyUNorm,
161     ImmTyDA,
162     ImmTyR128A16,
163     ImmTyLWE,
164     ImmTyExpTgt,
165     ImmTyExpCompr,
166     ImmTyExpVM,
167     ImmTyFORMAT,
168     ImmTyHwreg,
169     ImmTyOff,
170     ImmTySendMsg,
171     ImmTyInterpSlot,
172     ImmTyInterpAttr,
173     ImmTyAttrChan,
174     ImmTyOpSel,
175     ImmTyOpSelHi,
176     ImmTyNegLo,
177     ImmTyNegHi,
178     ImmTySwizzle,
179     ImmTyGprIdxMode,
180     ImmTyEndpgm,
181     ImmTyHigh
182   };
183 
184 private:
185   struct TokOp {
186     const char *Data;
187     unsigned Length;
188   };
189 
190   struct ImmOp {
191     int64_t Val;
192     ImmTy Type;
193     bool IsFPImm;
194     Modifiers Mods;
195   };
196 
197   struct RegOp {
198     unsigned RegNo;
199     Modifiers Mods;
200   };
201 
202   union {
203     TokOp Tok;
204     ImmOp Imm;
205     RegOp Reg;
206     const MCExpr *Expr;
207   };
208 
209 public:
210   bool isToken() const override {
211     if (Kind == Token)
212       return true;
213 
214     if (Kind != Expression || !Expr)
215       return false;
216 
217     // When parsing operands, we can't always tell if something was meant to be
218     // a token, like 'gds', or an expression that references a global variable.
219     // In this case, we assume the string is an expression, and if we need to
220     // interpret is a token, then we treat the symbol name as the token.
221     return isa<MCSymbolRefExpr>(Expr);
222   }
223 
224   bool isImm() const override {
225     return Kind == Immediate;
226   }
227 
228   bool isInlinableImm(MVT type) const;
229   bool isLiteralImm(MVT type) const;
230 
231   bool isRegKind() const {
232     return Kind == Register;
233   }
234 
235   bool isReg() const override {
236     return isRegKind() && !hasModifiers();
237   }
238 
239   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
240     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
241   }
242 
243   bool isRegOrImmWithInt16InputMods() const {
244     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
245   }
246 
247   bool isRegOrImmWithInt32InputMods() const {
248     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
249   }
250 
251   bool isRegOrImmWithInt64InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
253   }
254 
255   bool isRegOrImmWithFP16InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
257   }
258 
259   bool isRegOrImmWithFP32InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
261   }
262 
263   bool isRegOrImmWithFP64InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
265   }
266 
267   bool isVReg() const {
268     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
269            isRegClass(AMDGPU::VReg_64RegClassID) ||
270            isRegClass(AMDGPU::VReg_96RegClassID) ||
271            isRegClass(AMDGPU::VReg_128RegClassID) ||
272            isRegClass(AMDGPU::VReg_256RegClassID) ||
273            isRegClass(AMDGPU::VReg_512RegClassID);
274   }
275 
276   bool isVReg32() const {
277     return isRegClass(AMDGPU::VGPR_32RegClassID);
278   }
279 
280   bool isVReg32OrOff() const {
281     return isOff() || isVReg32();
282   }
283 
284   bool isSDWAOperand(MVT type) const;
285   bool isSDWAFP16Operand() const;
286   bool isSDWAFP32Operand() const;
287   bool isSDWAInt16Operand() const;
288   bool isSDWAInt32Operand() const;
289 
290   bool isImmTy(ImmTy ImmT) const {
291     return isImm() && Imm.Type == ImmT;
292   }
293 
294   bool isImmModifier() const {
295     return isImm() && Imm.Type != ImmTyNone;
296   }
297 
298   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
299   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
300   bool isDMask() const { return isImmTy(ImmTyDMask); }
301   bool isDim() const { return isImmTy(ImmTyDim); }
302   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
303   bool isDA() const { return isImmTy(ImmTyDA); }
304   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
305   bool isLWE() const { return isImmTy(ImmTyLWE); }
306   bool isOff() const { return isImmTy(ImmTyOff); }
307   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
308   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
309   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
310   bool isOffen() const { return isImmTy(ImmTyOffen); }
311   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
312   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
313   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
314   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
315   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
316 
317   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
318   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
319   bool isGDS() const { return isImmTy(ImmTyGDS); }
320   bool isLDS() const { return isImmTy(ImmTyLDS); }
321   bool isDLC() const { return isImmTy(ImmTyDLC); }
322   bool isGLC() const { return isImmTy(ImmTyGLC); }
323   bool isSLC() const { return isImmTy(ImmTySLC); }
324   bool isTFE() const { return isImmTy(ImmTyTFE); }
325   bool isD16() const { return isImmTy(ImmTyD16); }
326   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
327   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
328   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
329   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
330   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
331   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
332   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
333   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
334   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
335   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
336   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
337   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
338   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
339   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
340   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
341   bool isHigh() const { return isImmTy(ImmTyHigh); }
342 
343   bool isMod() const {
344     return isClampSI() || isOModSI();
345   }
346 
347   bool isRegOrImm() const {
348     return isReg() || isImm();
349   }
350 
351   bool isRegClass(unsigned RCID) const;
352 
353   bool isInlineValue() const;
354 
355   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
356     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
357   }
358 
359   bool isSCSrcB16() const {
360     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
361   }
362 
363   bool isSCSrcV2B16() const {
364     return isSCSrcB16();
365   }
366 
367   bool isSCSrcB32() const {
368     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
369   }
370 
371   bool isSCSrcB64() const {
372     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
373   }
374 
375   bool isSCSrcF16() const {
376     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
377   }
378 
379   bool isSCSrcV2F16() const {
380     return isSCSrcF16();
381   }
382 
383   bool isSCSrcF32() const {
384     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
385   }
386 
387   bool isSCSrcF64() const {
388     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
389   }
390 
391   bool isSSrcB32() const {
392     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
393   }
394 
395   bool isSSrcB16() const {
396     return isSCSrcB16() || isLiteralImm(MVT::i16);
397   }
398 
399   bool isSSrcV2B16() const {
400     llvm_unreachable("cannot happen");
401     return isSSrcB16();
402   }
403 
404   bool isSSrcB64() const {
405     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
406     // See isVSrc64().
407     return isSCSrcB64() || isLiteralImm(MVT::i64);
408   }
409 
410   bool isSSrcF32() const {
411     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
412   }
413 
414   bool isSSrcF64() const {
415     return isSCSrcB64() || isLiteralImm(MVT::f64);
416   }
417 
418   bool isSSrcF16() const {
419     return isSCSrcB16() || isLiteralImm(MVT::f16);
420   }
421 
422   bool isSSrcV2F16() const {
423     llvm_unreachable("cannot happen");
424     return isSSrcF16();
425   }
426 
427   bool isSSrcOrLdsB32() const {
428     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
429            isLiteralImm(MVT::i32) || isExpr();
430   }
431 
432   bool isVCSrcB32() const {
433     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
434   }
435 
436   bool isVCSrcB64() const {
437     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
438   }
439 
440   bool isVCSrcB16() const {
441     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
442   }
443 
444   bool isVCSrcV2B16() const {
445     return isVCSrcB16();
446   }
447 
448   bool isVCSrcF32() const {
449     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
450   }
451 
452   bool isVCSrcF64() const {
453     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
454   }
455 
456   bool isVCSrcF16() const {
457     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
458   }
459 
460   bool isVCSrcV2F16() const {
461     return isVCSrcF16();
462   }
463 
464   bool isVSrcB32() const {
465     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
466   }
467 
468   bool isVSrcB64() const {
469     return isVCSrcF64() || isLiteralImm(MVT::i64);
470   }
471 
472   bool isVSrcB16() const {
473     return isVCSrcF16() || isLiteralImm(MVT::i16);
474   }
475 
476   bool isVSrcV2B16() const {
477     return isVSrcB16() || isLiteralImm(MVT::v2i16);
478   }
479 
480   bool isVSrcF32() const {
481     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
482   }
483 
484   bool isVSrcF64() const {
485     return isVCSrcF64() || isLiteralImm(MVT::f64);
486   }
487 
488   bool isVSrcF16() const {
489     return isVCSrcF16() || isLiteralImm(MVT::f16);
490   }
491 
492   bool isVSrcV2F16() const {
493     return isVSrcF16() || isLiteralImm(MVT::v2f16);
494   }
495 
496   bool isKImmFP32() const {
497     return isLiteralImm(MVT::f32);
498   }
499 
500   bool isKImmFP16() const {
501     return isLiteralImm(MVT::f16);
502   }
503 
504   bool isMem() const override {
505     return false;
506   }
507 
508   bool isExpr() const {
509     return Kind == Expression;
510   }
511 
512   bool isSoppBrTarget() const {
513     return isExpr() || isImm();
514   }
515 
516   bool isSWaitCnt() const;
517   bool isHwreg() const;
518   bool isSendMsg() const;
519   bool isSwizzle() const;
520   bool isSMRDOffset8() const;
521   bool isSMRDOffset20() const;
522   bool isSMRDLiteralOffset() const;
523   bool isDPPCtrl() const;
524   bool isGPRIdxMode() const;
525   bool isS16Imm() const;
526   bool isU16Imm() const;
527   bool isEndpgm() const;
528 
529   StringRef getExpressionAsToken() const {
530     assert(isExpr());
531     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
532     return S->getSymbol().getName();
533   }
534 
535   StringRef getToken() const {
536     assert(isToken());
537 
538     if (Kind == Expression)
539       return getExpressionAsToken();
540 
541     return StringRef(Tok.Data, Tok.Length);
542   }
543 
544   int64_t getImm() const {
545     assert(isImm());
546     return Imm.Val;
547   }
548 
549   ImmTy getImmTy() const {
550     assert(isImm());
551     return Imm.Type;
552   }
553 
554   unsigned getReg() const override {
555     assert(isRegKind());
556     return Reg.RegNo;
557   }
558 
559   SMLoc getStartLoc() const override {
560     return StartLoc;
561   }
562 
563   SMLoc getEndLoc() const override {
564     return EndLoc;
565   }
566 
567   SMRange getLocRange() const {
568     return SMRange(StartLoc, EndLoc);
569   }
570 
571   Modifiers getModifiers() const {
572     assert(isRegKind() || isImmTy(ImmTyNone));
573     return isRegKind() ? Reg.Mods : Imm.Mods;
574   }
575 
576   void setModifiers(Modifiers Mods) {
577     assert(isRegKind() || isImmTy(ImmTyNone));
578     if (isRegKind())
579       Reg.Mods = Mods;
580     else
581       Imm.Mods = Mods;
582   }
583 
584   bool hasModifiers() const {
585     return getModifiers().hasModifiers();
586   }
587 
588   bool hasFPModifiers() const {
589     return getModifiers().hasFPModifiers();
590   }
591 
592   bool hasIntModifiers() const {
593     return getModifiers().hasIntModifiers();
594   }
595 
596   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
597 
598   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
599 
600   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
601 
602   template <unsigned Bitwidth>
603   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
604 
605   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
606     addKImmFPOperands<16>(Inst, N);
607   }
608 
609   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
610     addKImmFPOperands<32>(Inst, N);
611   }
612 
613   void addRegOperands(MCInst &Inst, unsigned N) const;
614 
615   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
616     if (isRegKind())
617       addRegOperands(Inst, N);
618     else if (isExpr())
619       Inst.addOperand(MCOperand::createExpr(Expr));
620     else
621       addImmOperands(Inst, N);
622   }
623 
624   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
625     Modifiers Mods = getModifiers();
626     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
627     if (isRegKind()) {
628       addRegOperands(Inst, N);
629     } else {
630       addImmOperands(Inst, N, false);
631     }
632   }
633 
634   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
635     assert(!hasIntModifiers());
636     addRegOrImmWithInputModsOperands(Inst, N);
637   }
638 
639   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
640     assert(!hasFPModifiers());
641     addRegOrImmWithInputModsOperands(Inst, N);
642   }
643 
644   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
645     Modifiers Mods = getModifiers();
646     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
647     assert(isRegKind());
648     addRegOperands(Inst, N);
649   }
650 
651   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
652     assert(!hasIntModifiers());
653     addRegWithInputModsOperands(Inst, N);
654   }
655 
656   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
657     assert(!hasFPModifiers());
658     addRegWithInputModsOperands(Inst, N);
659   }
660 
661   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
662     if (isImm())
663       addImmOperands(Inst, N);
664     else {
665       assert(isExpr());
666       Inst.addOperand(MCOperand::createExpr(Expr));
667     }
668   }
669 
670   static void printImmTy(raw_ostream& OS, ImmTy Type) {
671     switch (Type) {
672     case ImmTyNone: OS << "None"; break;
673     case ImmTyGDS: OS << "GDS"; break;
674     case ImmTyLDS: OS << "LDS"; break;
675     case ImmTyOffen: OS << "Offen"; break;
676     case ImmTyIdxen: OS << "Idxen"; break;
677     case ImmTyAddr64: OS << "Addr64"; break;
678     case ImmTyOffset: OS << "Offset"; break;
679     case ImmTyInstOffset: OS << "InstOffset"; break;
680     case ImmTyOffset0: OS << "Offset0"; break;
681     case ImmTyOffset1: OS << "Offset1"; break;
682     case ImmTyDLC: OS << "DLC"; break;
683     case ImmTyGLC: OS << "GLC"; break;
684     case ImmTySLC: OS << "SLC"; break;
685     case ImmTyTFE: OS << "TFE"; break;
686     case ImmTyD16: OS << "D16"; break;
687     case ImmTyFORMAT: OS << "FORMAT"; break;
688     case ImmTyClampSI: OS << "ClampSI"; break;
689     case ImmTyOModSI: OS << "OModSI"; break;
690     case ImmTyDppCtrl: OS << "DppCtrl"; break;
691     case ImmTyDppRowMask: OS << "DppRowMask"; break;
692     case ImmTyDppBankMask: OS << "DppBankMask"; break;
693     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
694     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
695     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
696     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
697     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
698     case ImmTyDMask: OS << "DMask"; break;
699     case ImmTyDim: OS << "Dim"; break;
700     case ImmTyUNorm: OS << "UNorm"; break;
701     case ImmTyDA: OS << "DA"; break;
702     case ImmTyR128A16: OS << "R128A16"; break;
703     case ImmTyLWE: OS << "LWE"; break;
704     case ImmTyOff: OS << "Off"; break;
705     case ImmTyExpTgt: OS << "ExpTgt"; break;
706     case ImmTyExpCompr: OS << "ExpCompr"; break;
707     case ImmTyExpVM: OS << "ExpVM"; break;
708     case ImmTyHwreg: OS << "Hwreg"; break;
709     case ImmTySendMsg: OS << "SendMsg"; break;
710     case ImmTyInterpSlot: OS << "InterpSlot"; break;
711     case ImmTyInterpAttr: OS << "InterpAttr"; break;
712     case ImmTyAttrChan: OS << "AttrChan"; break;
713     case ImmTyOpSel: OS << "OpSel"; break;
714     case ImmTyOpSelHi: OS << "OpSelHi"; break;
715     case ImmTyNegLo: OS << "NegLo"; break;
716     case ImmTyNegHi: OS << "NegHi"; break;
717     case ImmTySwizzle: OS << "Swizzle"; break;
718     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
719     case ImmTyHigh: OS << "High"; break;
720     case ImmTyEndpgm:
721       OS << "Endpgm";
722       break;
723     }
724   }
725 
726   void print(raw_ostream &OS) const override {
727     switch (Kind) {
728     case Register:
729       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
730       break;
731     case Immediate:
732       OS << '<' << getImm();
733       if (getImmTy() != ImmTyNone) {
734         OS << " type: "; printImmTy(OS, getImmTy());
735       }
736       OS << " mods: " << Imm.Mods << '>';
737       break;
738     case Token:
739       OS << '\'' << getToken() << '\'';
740       break;
741     case Expression:
742       OS << "<expr " << *Expr << '>';
743       break;
744     }
745   }
746 
747   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
748                                       int64_t Val, SMLoc Loc,
749                                       ImmTy Type = ImmTyNone,
750                                       bool IsFPImm = false) {
751     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
752     Op->Imm.Val = Val;
753     Op->Imm.IsFPImm = IsFPImm;
754     Op->Imm.Type = Type;
755     Op->Imm.Mods = Modifiers();
756     Op->StartLoc = Loc;
757     Op->EndLoc = Loc;
758     return Op;
759   }
760 
761   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
762                                         StringRef Str, SMLoc Loc,
763                                         bool HasExplicitEncodingSize = true) {
764     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
765     Res->Tok.Data = Str.data();
766     Res->Tok.Length = Str.size();
767     Res->StartLoc = Loc;
768     Res->EndLoc = Loc;
769     return Res;
770   }
771 
772   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
773                                       unsigned RegNo, SMLoc S,
774                                       SMLoc E) {
775     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
776     Op->Reg.RegNo = RegNo;
777     Op->Reg.Mods = Modifiers();
778     Op->StartLoc = S;
779     Op->EndLoc = E;
780     return Op;
781   }
782 
783   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
784                                        const class MCExpr *Expr, SMLoc S) {
785     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
786     Op->Expr = Expr;
787     Op->StartLoc = S;
788     Op->EndLoc = S;
789     return Op;
790   }
791 };
792 
793 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
794   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
795   return OS;
796 }
797 
798 //===----------------------------------------------------------------------===//
799 // AsmParser
800 //===----------------------------------------------------------------------===//
801 
802 // Holds info related to the current kernel, e.g. count of SGPRs used.
803 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
804 // .amdgpu_hsa_kernel or at EOF.
805 class KernelScopeInfo {
806   int SgprIndexUnusedMin = -1;
807   int VgprIndexUnusedMin = -1;
808   MCContext *Ctx = nullptr;
809 
810   void usesSgprAt(int i) {
811     if (i >= SgprIndexUnusedMin) {
812       SgprIndexUnusedMin = ++i;
813       if (Ctx) {
814         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
815         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
816       }
817     }
818   }
819 
820   void usesVgprAt(int i) {
821     if (i >= VgprIndexUnusedMin) {
822       VgprIndexUnusedMin = ++i;
823       if (Ctx) {
824         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
825         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
826       }
827     }
828   }
829 
830 public:
831   KernelScopeInfo() = default;
832 
833   void initialize(MCContext &Context) {
834     Ctx = &Context;
835     usesSgprAt(SgprIndexUnusedMin = -1);
836     usesVgprAt(VgprIndexUnusedMin = -1);
837   }
838 
839   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
840     switch (RegKind) {
841       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
842       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
843       default: break;
844     }
845   }
846 };
847 
848 class AMDGPUAsmParser : public MCTargetAsmParser {
849   MCAsmParser &Parser;
850 
851   // Number of extra operands parsed after the first optional operand.
852   // This may be necessary to skip hardcoded mandatory operands.
853   static const unsigned MAX_OPR_LOOKAHEAD = 8;
854 
855   unsigned ForcedEncodingSize = 0;
856   bool ForcedDPP = false;
857   bool ForcedSDWA = false;
858   KernelScopeInfo KernelScope;
859 
860   /// @name Auto-generated Match Functions
861   /// {
862 
863 #define GET_ASSEMBLER_HEADER
864 #include "AMDGPUGenAsmMatcher.inc"
865 
866   /// }
867 
868 private:
869   bool ParseAsAbsoluteExpression(uint32_t &Ret);
870   bool OutOfRangeError(SMRange Range);
871   /// Calculate VGPR/SGPR blocks required for given target, reserved
872   /// registers, and user-specified NextFreeXGPR values.
873   ///
874   /// \param Features [in] Target features, used for bug corrections.
875   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
876   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
877   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
878   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
879   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
880   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
881   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
882   /// \param VGPRBlocks [out] Result VGPR block count.
883   /// \param SGPRBlocks [out] Result SGPR block count.
884   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
885                           bool FlatScrUsed, bool XNACKUsed,
886                           unsigned NextFreeVGPR, SMRange VGPRRange,
887                           unsigned NextFreeSGPR, SMRange SGPRRange,
888                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
889   bool ParseDirectiveAMDGCNTarget();
890   bool ParseDirectiveAMDHSAKernel();
891   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
892   bool ParseDirectiveHSACodeObjectVersion();
893   bool ParseDirectiveHSACodeObjectISA();
894   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
895   bool ParseDirectiveAMDKernelCodeT();
896   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
897   bool ParseDirectiveAMDGPUHsaKernel();
898 
899   bool ParseDirectiveISAVersion();
900   bool ParseDirectiveHSAMetadata();
901   bool ParseDirectivePALMetadataBegin();
902   bool ParseDirectivePALMetadata();
903 
904   /// Common code to parse out a block of text (typically YAML) between start and
905   /// end directives.
906   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
907                            const char *AssemblerDirectiveEnd,
908                            std::string &CollectString);
909 
910   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
911                              RegisterKind RegKind, unsigned Reg1,
912                              unsigned RegNum);
913   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
914                            unsigned& RegNum, unsigned& RegWidth,
915                            unsigned *DwordRegIndex);
916   bool isRegister();
917   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
918   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
919   void initializeGprCountSymbol(RegisterKind RegKind);
920   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
921                              unsigned RegWidth);
922   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
923                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
924   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
925                  bool IsGdsHardcoded);
926 
927 public:
928   enum AMDGPUMatchResultTy {
929     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
930   };
931   enum OperandMode {
932     OperandMode_Default,
933     OperandMode_NSA,
934   };
935 
936   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
937 
938   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
939                const MCInstrInfo &MII,
940                const MCTargetOptions &Options)
941       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
942     MCAsmParserExtension::Initialize(Parser);
943 
944     if (getFeatureBits().none()) {
945       // Set default features.
946       copySTI().ToggleFeature("southern-islands");
947     }
948 
949     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
950 
951     {
952       // TODO: make those pre-defined variables read-only.
953       // Currently there is none suitable machinery in the core llvm-mc for this.
954       // MCSymbol::isRedefinable is intended for another purpose, and
955       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
956       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
957       MCContext &Ctx = getContext();
958       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
959         MCSymbol *Sym =
960             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
961         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
962         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
963         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
964         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
965         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
966       } else {
967         MCSymbol *Sym =
968             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
969         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
970         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
971         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
972         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
973         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
974       }
975       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
976         initializeGprCountSymbol(IS_VGPR);
977         initializeGprCountSymbol(IS_SGPR);
978       } else
979         KernelScope.initialize(getContext());
980     }
981   }
982 
983   bool hasXNACK() const {
984     return AMDGPU::hasXNACK(getSTI());
985   }
986 
987   bool hasMIMG_R128() const {
988     return AMDGPU::hasMIMG_R128(getSTI());
989   }
990 
991   bool hasPackedD16() const {
992     return AMDGPU::hasPackedD16(getSTI());
993   }
994 
995   bool isSI() const {
996     return AMDGPU::isSI(getSTI());
997   }
998 
999   bool isCI() const {
1000     return AMDGPU::isCI(getSTI());
1001   }
1002 
1003   bool isVI() const {
1004     return AMDGPU::isVI(getSTI());
1005   }
1006 
1007   bool isGFX9() const {
1008     return AMDGPU::isGFX9(getSTI());
1009   }
1010 
1011   bool isGFX10() const {
1012     return AMDGPU::isGFX10(getSTI());
1013   }
1014 
1015   bool hasInv2PiInlineImm() const {
1016     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1017   }
1018 
1019   bool hasFlatOffsets() const {
1020     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1021   }
1022 
1023   bool hasSGPR102_SGPR103() const {
1024     return !isVI() && !isGFX9();
1025   }
1026 
1027   bool hasSGPR104_SGPR105() const {
1028     return isGFX10();
1029   }
1030 
1031   bool hasIntClamp() const {
1032     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1033   }
1034 
1035   AMDGPUTargetStreamer &getTargetStreamer() {
1036     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1037     return static_cast<AMDGPUTargetStreamer &>(TS);
1038   }
1039 
1040   const MCRegisterInfo *getMRI() const {
1041     // We need this const_cast because for some reason getContext() is not const
1042     // in MCAsmParser.
1043     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1044   }
1045 
1046   const MCInstrInfo *getMII() const {
1047     return &MII;
1048   }
1049 
1050   const FeatureBitset &getFeatureBits() const {
1051     return getSTI().getFeatureBits();
1052   }
1053 
1054   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1055   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1056   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1057 
1058   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1059   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1060   bool isForcedDPP() const { return ForcedDPP; }
1061   bool isForcedSDWA() const { return ForcedSDWA; }
1062   ArrayRef<unsigned> getMatchedVariants() const;
1063 
1064   std::unique_ptr<AMDGPUOperand> parseRegister();
1065   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1066   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1067   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1068                                       unsigned Kind) override;
1069   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1070                                OperandVector &Operands, MCStreamer &Out,
1071                                uint64_t &ErrorInfo,
1072                                bool MatchingInlineAsm) override;
1073   bool ParseDirective(AsmToken DirectiveID) override;
1074   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1075                                     OperandMode Mode = OperandMode_Default);
1076   StringRef parseMnemonicSuffix(StringRef Name);
1077   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1078                         SMLoc NameLoc, OperandVector &Operands) override;
1079   //bool ProcessInstruction(MCInst &Inst);
1080 
1081   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1082 
1083   OperandMatchResultTy
1084   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1085                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1086                      bool (*ConvertResult)(int64_t &) = nullptr);
1087 
1088   OperandMatchResultTy
1089   parseOperandArrayWithPrefix(const char *Prefix,
1090                               OperandVector &Operands,
1091                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1092                               bool (*ConvertResult)(int64_t&) = nullptr);
1093 
1094   OperandMatchResultTy
1095   parseNamedBit(const char *Name, OperandVector &Operands,
1096                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1097   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1098                                              StringRef &Value);
1099 
1100   bool isModifier();
1101   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1102   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1103   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1104   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1105   bool parseSP3NegModifier();
1106   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1107   OperandMatchResultTy parseReg(OperandVector &Operands);
1108   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1109   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1110   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1111   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1112   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1113   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1114   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1115 
1116   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1117   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1118   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1119   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1120 
1121   bool parseCnt(int64_t &IntVal);
1122   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1123   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1124 
1125 private:
1126   struct OperandInfoTy {
1127     int64_t Id;
1128     bool IsSymbolic = false;
1129 
1130     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1131   };
1132 
1133   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1134   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1135 
1136   void errorExpTgt();
1137   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1138 
1139   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1140   bool validateSOPLiteral(const MCInst &Inst) const;
1141   bool validateConstantBusLimitations(const MCInst &Inst);
1142   bool validateEarlyClobberLimitations(const MCInst &Inst);
1143   bool validateIntClampSupported(const MCInst &Inst);
1144   bool validateMIMGAtomicDMask(const MCInst &Inst);
1145   bool validateMIMGGatherDMask(const MCInst &Inst);
1146   bool validateMIMGDataSize(const MCInst &Inst);
1147   bool validateMIMGAddrSize(const MCInst &Inst);
1148   bool validateMIMGD16(const MCInst &Inst);
1149   bool validateMIMGDim(const MCInst &Inst);
1150   bool validateLdsDirect(const MCInst &Inst);
1151   bool validateVOP3Literal(const MCInst &Inst) const;
1152   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1153   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1154   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1155 
1156   bool isId(const StringRef Id) const;
1157   bool isId(const AsmToken &Token, const StringRef Id) const;
1158   bool isToken(const AsmToken::TokenKind Kind) const;
1159   bool trySkipId(const StringRef Id);
1160   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1161   bool trySkipToken(const AsmToken::TokenKind Kind);
1162   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1163   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1164   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1165   AsmToken::TokenKind getTokenKind() const;
1166   bool parseExpr(int64_t &Imm);
1167   StringRef getTokenStr() const;
1168   AsmToken peekToken();
1169   AsmToken getToken() const;
1170   SMLoc getLoc() const;
1171   void lex();
1172 
1173 public:
1174   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1175   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1176 
1177   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1178   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1179   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1180   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1181   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1182 
1183   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1184                             const unsigned MinVal,
1185                             const unsigned MaxVal,
1186                             const StringRef ErrMsg);
1187   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1188   bool parseSwizzleOffset(int64_t &Imm);
1189   bool parseSwizzleMacro(int64_t &Imm);
1190   bool parseSwizzleQuadPerm(int64_t &Imm);
1191   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1192   bool parseSwizzleBroadcast(int64_t &Imm);
1193   bool parseSwizzleSwap(int64_t &Imm);
1194   bool parseSwizzleReverse(int64_t &Imm);
1195 
1196   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1197   int64_t parseGPRIdxMacro();
1198 
1199   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1200   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1201   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1202   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1203   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1204 
1205   AMDGPUOperand::Ptr defaultDLC() const;
1206   AMDGPUOperand::Ptr defaultGLC() const;
1207   AMDGPUOperand::Ptr defaultSLC() const;
1208 
1209   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1210   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1211   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1212   AMDGPUOperand::Ptr defaultOffsetU12() const;
1213   AMDGPUOperand::Ptr defaultOffsetS13() const;
1214 
1215   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1216 
1217   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1218                OptionalImmIndexMap &OptionalIdx);
1219   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1220   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1221   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1222 
1223   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1224 
1225   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1226                bool IsAtomic = false);
1227   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1228 
1229   OperandMatchResultTy parseDim(OperandVector &Operands);
1230   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1231   AMDGPUOperand::Ptr defaultRowMask() const;
1232   AMDGPUOperand::Ptr defaultBankMask() const;
1233   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1234   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1235 
1236   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1237                                     AMDGPUOperand::ImmTy Type);
1238   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1239   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1240   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1241   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1242   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1243   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1244                 uint64_t BasicInstType, bool skipVcc = false);
1245 
1246   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1247   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1248 };
1249 
1250 struct OptionalOperand {
1251   const char *Name;
1252   AMDGPUOperand::ImmTy Type;
1253   bool IsBit;
1254   bool (*ConvertResult)(int64_t&);
1255 };
1256 
1257 } // end anonymous namespace
1258 
1259 // May be called with integer type with equivalent bitwidth.
1260 static const fltSemantics *getFltSemantics(unsigned Size) {
1261   switch (Size) {
1262   case 4:
1263     return &APFloat::IEEEsingle();
1264   case 8:
1265     return &APFloat::IEEEdouble();
1266   case 2:
1267     return &APFloat::IEEEhalf();
1268   default:
1269     llvm_unreachable("unsupported fp type");
1270   }
1271 }
1272 
1273 static const fltSemantics *getFltSemantics(MVT VT) {
1274   return getFltSemantics(VT.getSizeInBits() / 8);
1275 }
1276 
1277 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1278   switch (OperandType) {
1279   case AMDGPU::OPERAND_REG_IMM_INT32:
1280   case AMDGPU::OPERAND_REG_IMM_FP32:
1281   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1282   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1283     return &APFloat::IEEEsingle();
1284   case AMDGPU::OPERAND_REG_IMM_INT64:
1285   case AMDGPU::OPERAND_REG_IMM_FP64:
1286   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1287   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1288     return &APFloat::IEEEdouble();
1289   case AMDGPU::OPERAND_REG_IMM_INT16:
1290   case AMDGPU::OPERAND_REG_IMM_FP16:
1291   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1292   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1293   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1294   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1295   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1296   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1297     return &APFloat::IEEEhalf();
1298   default:
1299     llvm_unreachable("unsupported fp type");
1300   }
1301 }
1302 
1303 //===----------------------------------------------------------------------===//
1304 // Operand
1305 //===----------------------------------------------------------------------===//
1306 
1307 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1308   bool Lost;
1309 
1310   // Convert literal to single precision
1311   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1312                                                APFloat::rmNearestTiesToEven,
1313                                                &Lost);
1314   // We allow precision lost but not overflow or underflow
1315   if (Status != APFloat::opOK &&
1316       Lost &&
1317       ((Status & APFloat::opOverflow)  != 0 ||
1318        (Status & APFloat::opUnderflow) != 0)) {
1319     return false;
1320   }
1321 
1322   return true;
1323 }
1324 
1325 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1326   return isUIntN(Size, Val) || isIntN(Size, Val);
1327 }
1328 
1329 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1330 
1331   // This is a hack to enable named inline values like
1332   // shared_base with both 32-bit and 64-bit operands.
1333   // Note that these values are defined as
1334   // 32-bit operands only.
1335   if (isInlineValue()) {
1336     return true;
1337   }
1338 
1339   if (!isImmTy(ImmTyNone)) {
1340     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1341     return false;
1342   }
1343   // TODO: We should avoid using host float here. It would be better to
1344   // check the float bit values which is what a few other places do.
1345   // We've had bot failures before due to weird NaN support on mips hosts.
1346 
1347   APInt Literal(64, Imm.Val);
1348 
1349   if (Imm.IsFPImm) { // We got fp literal token
1350     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1351       return AMDGPU::isInlinableLiteral64(Imm.Val,
1352                                           AsmParser->hasInv2PiInlineImm());
1353     }
1354 
1355     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1356     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1357       return false;
1358 
1359     if (type.getScalarSizeInBits() == 16) {
1360       return AMDGPU::isInlinableLiteral16(
1361         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1362         AsmParser->hasInv2PiInlineImm());
1363     }
1364 
1365     // Check if single precision literal is inlinable
1366     return AMDGPU::isInlinableLiteral32(
1367       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1368       AsmParser->hasInv2PiInlineImm());
1369   }
1370 
1371   // We got int literal token.
1372   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1373     return AMDGPU::isInlinableLiteral64(Imm.Val,
1374                                         AsmParser->hasInv2PiInlineImm());
1375   }
1376 
1377   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1378     return false;
1379   }
1380 
1381   if (type.getScalarSizeInBits() == 16) {
1382     return AMDGPU::isInlinableLiteral16(
1383       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1384       AsmParser->hasInv2PiInlineImm());
1385   }
1386 
1387   return AMDGPU::isInlinableLiteral32(
1388     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1389     AsmParser->hasInv2PiInlineImm());
1390 }
1391 
1392 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1393   // Check that this immediate can be added as literal
1394   if (!isImmTy(ImmTyNone)) {
1395     return false;
1396   }
1397 
1398   if (!Imm.IsFPImm) {
1399     // We got int literal token.
1400 
1401     if (type == MVT::f64 && hasFPModifiers()) {
1402       // Cannot apply fp modifiers to int literals preserving the same semantics
1403       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1404       // disable these cases.
1405       return false;
1406     }
1407 
1408     unsigned Size = type.getSizeInBits();
1409     if (Size == 64)
1410       Size = 32;
1411 
1412     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1413     // types.
1414     return isSafeTruncation(Imm.Val, Size);
1415   }
1416 
1417   // We got fp literal token
1418   if (type == MVT::f64) { // Expected 64-bit fp operand
1419     // We would set low 64-bits of literal to zeroes but we accept this literals
1420     return true;
1421   }
1422 
1423   if (type == MVT::i64) { // Expected 64-bit int operand
1424     // We don't allow fp literals in 64-bit integer instructions. It is
1425     // unclear how we should encode them.
1426     return false;
1427   }
1428 
1429   // We allow fp literals with f16x2 operands assuming that the specified
1430   // literal goes into the lower half and the upper half is zero. We also
1431   // require that the literal may be losslesly converted to f16.
1432   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1433                      (type == MVT::v2i16)? MVT::i16 : type;
1434 
1435   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1436   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1437 }
1438 
1439 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1440   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1441 }
1442 
1443 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1444   if (AsmParser->isVI())
1445     return isVReg32();
1446   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1447     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1448   else
1449     return false;
1450 }
1451 
1452 bool AMDGPUOperand::isSDWAFP16Operand() const {
1453   return isSDWAOperand(MVT::f16);
1454 }
1455 
1456 bool AMDGPUOperand::isSDWAFP32Operand() const {
1457   return isSDWAOperand(MVT::f32);
1458 }
1459 
1460 bool AMDGPUOperand::isSDWAInt16Operand() const {
1461   return isSDWAOperand(MVT::i16);
1462 }
1463 
1464 bool AMDGPUOperand::isSDWAInt32Operand() const {
1465   return isSDWAOperand(MVT::i32);
1466 }
1467 
1468 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1469 {
1470   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1471   assert(Size == 2 || Size == 4 || Size == 8);
1472 
1473   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1474 
1475   if (Imm.Mods.Abs) {
1476     Val &= ~FpSignMask;
1477   }
1478   if (Imm.Mods.Neg) {
1479     Val ^= FpSignMask;
1480   }
1481 
1482   return Val;
1483 }
1484 
1485 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1486   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1487                              Inst.getNumOperands())) {
1488     addLiteralImmOperand(Inst, Imm.Val,
1489                          ApplyModifiers &
1490                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1491   } else {
1492     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1493     Inst.addOperand(MCOperand::createImm(Imm.Val));
1494   }
1495 }
1496 
1497 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1498   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1499   auto OpNum = Inst.getNumOperands();
1500   // Check that this operand accepts literals
1501   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1502 
1503   if (ApplyModifiers) {
1504     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1505     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1506     Val = applyInputFPModifiers(Val, Size);
1507   }
1508 
1509   APInt Literal(64, Val);
1510   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1511 
1512   if (Imm.IsFPImm) { // We got fp literal token
1513     switch (OpTy) {
1514     case AMDGPU::OPERAND_REG_IMM_INT64:
1515     case AMDGPU::OPERAND_REG_IMM_FP64:
1516     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1517     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1518       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1519                                        AsmParser->hasInv2PiInlineImm())) {
1520         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1521         return;
1522       }
1523 
1524       // Non-inlineable
1525       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1526         // For fp operands we check if low 32 bits are zeros
1527         if (Literal.getLoBits(32) != 0) {
1528           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1529           "Can't encode literal as exact 64-bit floating-point operand. "
1530           "Low 32-bits will be set to zero");
1531         }
1532 
1533         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1534         return;
1535       }
1536 
1537       // We don't allow fp literals in 64-bit integer instructions. It is
1538       // unclear how we should encode them. This case should be checked earlier
1539       // in predicate methods (isLiteralImm())
1540       llvm_unreachable("fp literal in 64-bit integer instruction.");
1541 
1542     case AMDGPU::OPERAND_REG_IMM_INT32:
1543     case AMDGPU::OPERAND_REG_IMM_FP32:
1544     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1545     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1546     case AMDGPU::OPERAND_REG_IMM_INT16:
1547     case AMDGPU::OPERAND_REG_IMM_FP16:
1548     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1549     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1550     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1551     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1552     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1553     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1554       bool lost;
1555       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1556       // Convert literal to single precision
1557       FPLiteral.convert(*getOpFltSemantics(OpTy),
1558                         APFloat::rmNearestTiesToEven, &lost);
1559       // We allow precision lost but not overflow or underflow. This should be
1560       // checked earlier in isLiteralImm()
1561 
1562       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1563       Inst.addOperand(MCOperand::createImm(ImmVal));
1564       return;
1565     }
1566     default:
1567       llvm_unreachable("invalid operand size");
1568     }
1569 
1570     return;
1571   }
1572 
1573   // We got int literal token.
1574   // Only sign extend inline immediates.
1575   switch (OpTy) {
1576   case AMDGPU::OPERAND_REG_IMM_INT32:
1577   case AMDGPU::OPERAND_REG_IMM_FP32:
1578   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1579   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1580   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1581   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1582     if (isSafeTruncation(Val, 32) &&
1583         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1584                                      AsmParser->hasInv2PiInlineImm())) {
1585       Inst.addOperand(MCOperand::createImm(Val));
1586       return;
1587     }
1588 
1589     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1590     return;
1591 
1592   case AMDGPU::OPERAND_REG_IMM_INT64:
1593   case AMDGPU::OPERAND_REG_IMM_FP64:
1594   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1595   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1596     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1597       Inst.addOperand(MCOperand::createImm(Val));
1598       return;
1599     }
1600 
1601     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1602     return;
1603 
1604   case AMDGPU::OPERAND_REG_IMM_INT16:
1605   case AMDGPU::OPERAND_REG_IMM_FP16:
1606   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1607   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1608     if (isSafeTruncation(Val, 16) &&
1609         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1610                                      AsmParser->hasInv2PiInlineImm())) {
1611       Inst.addOperand(MCOperand::createImm(Val));
1612       return;
1613     }
1614 
1615     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1616     return;
1617 
1618   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1619   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1620     assert(isSafeTruncation(Val, 16));
1621     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1622                                         AsmParser->hasInv2PiInlineImm()));
1623 
1624     Inst.addOperand(MCOperand::createImm(Val));
1625     return;
1626   }
1627   default:
1628     llvm_unreachable("invalid operand size");
1629   }
1630 }
1631 
1632 template <unsigned Bitwidth>
1633 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1634   APInt Literal(64, Imm.Val);
1635 
1636   if (!Imm.IsFPImm) {
1637     // We got int literal token.
1638     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1639     return;
1640   }
1641 
1642   bool Lost;
1643   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1644   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1645                     APFloat::rmNearestTiesToEven, &Lost);
1646   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1647 }
1648 
1649 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1650   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1651 }
1652 
1653 static bool isInlineValue(unsigned Reg) {
1654   switch (Reg) {
1655   case AMDGPU::SRC_SHARED_BASE:
1656   case AMDGPU::SRC_SHARED_LIMIT:
1657   case AMDGPU::SRC_PRIVATE_BASE:
1658   case AMDGPU::SRC_PRIVATE_LIMIT:
1659   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1660     return true;
1661   default:
1662     return false;
1663   }
1664 }
1665 
1666 bool AMDGPUOperand::isInlineValue() const {
1667   return isRegKind() && ::isInlineValue(getReg());
1668 }
1669 
1670 //===----------------------------------------------------------------------===//
1671 // AsmParser
1672 //===----------------------------------------------------------------------===//
1673 
1674 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1675   if (Is == IS_VGPR) {
1676     switch (RegWidth) {
1677       default: return -1;
1678       case 1: return AMDGPU::VGPR_32RegClassID;
1679       case 2: return AMDGPU::VReg_64RegClassID;
1680       case 3: return AMDGPU::VReg_96RegClassID;
1681       case 4: return AMDGPU::VReg_128RegClassID;
1682       case 8: return AMDGPU::VReg_256RegClassID;
1683       case 16: return AMDGPU::VReg_512RegClassID;
1684     }
1685   } else if (Is == IS_TTMP) {
1686     switch (RegWidth) {
1687       default: return -1;
1688       case 1: return AMDGPU::TTMP_32RegClassID;
1689       case 2: return AMDGPU::TTMP_64RegClassID;
1690       case 4: return AMDGPU::TTMP_128RegClassID;
1691       case 8: return AMDGPU::TTMP_256RegClassID;
1692       case 16: return AMDGPU::TTMP_512RegClassID;
1693     }
1694   } else if (Is == IS_SGPR) {
1695     switch (RegWidth) {
1696       default: return -1;
1697       case 1: return AMDGPU::SGPR_32RegClassID;
1698       case 2: return AMDGPU::SGPR_64RegClassID;
1699       case 4: return AMDGPU::SGPR_128RegClassID;
1700       case 8: return AMDGPU::SGPR_256RegClassID;
1701       case 16: return AMDGPU::SGPR_512RegClassID;
1702     }
1703   }
1704   return -1;
1705 }
1706 
1707 static unsigned getSpecialRegForName(StringRef RegName) {
1708   return StringSwitch<unsigned>(RegName)
1709     .Case("exec", AMDGPU::EXEC)
1710     .Case("vcc", AMDGPU::VCC)
1711     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1712     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1713     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1714     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1715     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1716     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1717     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1718     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1719     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1720     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1721     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1722     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1723     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1724     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1725     .Case("m0", AMDGPU::M0)
1726     .Case("scc", AMDGPU::SCC)
1727     .Case("tba", AMDGPU::TBA)
1728     .Case("tma", AMDGPU::TMA)
1729     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1730     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1731     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1732     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1733     .Case("vcc_lo", AMDGPU::VCC_LO)
1734     .Case("vcc_hi", AMDGPU::VCC_HI)
1735     .Case("exec_lo", AMDGPU::EXEC_LO)
1736     .Case("exec_hi", AMDGPU::EXEC_HI)
1737     .Case("tma_lo", AMDGPU::TMA_LO)
1738     .Case("tma_hi", AMDGPU::TMA_HI)
1739     .Case("tba_lo", AMDGPU::TBA_LO)
1740     .Case("tba_hi", AMDGPU::TBA_HI)
1741     .Case("null", AMDGPU::SGPR_NULL)
1742     .Default(0);
1743 }
1744 
1745 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1746                                     SMLoc &EndLoc) {
1747   auto R = parseRegister();
1748   if (!R) return true;
1749   assert(R->isReg());
1750   RegNo = R->getReg();
1751   StartLoc = R->getStartLoc();
1752   EndLoc = R->getEndLoc();
1753   return false;
1754 }
1755 
1756 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1757                                             RegisterKind RegKind, unsigned Reg1,
1758                                             unsigned RegNum) {
1759   switch (RegKind) {
1760   case IS_SPECIAL:
1761     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1762       Reg = AMDGPU::EXEC;
1763       RegWidth = 2;
1764       return true;
1765     }
1766     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1767       Reg = AMDGPU::FLAT_SCR;
1768       RegWidth = 2;
1769       return true;
1770     }
1771     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1772       Reg = AMDGPU::XNACK_MASK;
1773       RegWidth = 2;
1774       return true;
1775     }
1776     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1777       Reg = AMDGPU::VCC;
1778       RegWidth = 2;
1779       return true;
1780     }
1781     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1782       Reg = AMDGPU::TBA;
1783       RegWidth = 2;
1784       return true;
1785     }
1786     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1787       Reg = AMDGPU::TMA;
1788       RegWidth = 2;
1789       return true;
1790     }
1791     return false;
1792   case IS_VGPR:
1793   case IS_SGPR:
1794   case IS_TTMP:
1795     if (Reg1 != Reg + RegWidth) {
1796       return false;
1797     }
1798     RegWidth++;
1799     return true;
1800   default:
1801     llvm_unreachable("unexpected register kind");
1802   }
1803 }
1804 
1805 static const StringRef Registers[] = {
1806   { "v" },
1807   { "s" },
1808   { "ttmp" },
1809 };
1810 
1811 bool
1812 AMDGPUAsmParser::isRegister(const AsmToken &Token,
1813                             const AsmToken &NextToken) const {
1814 
1815   // A list of consecutive registers: [s0,s1,s2,s3]
1816   if (Token.is(AsmToken::LBrac))
1817     return true;
1818 
1819   if (!Token.is(AsmToken::Identifier))
1820     return false;
1821 
1822   // A single register like s0 or a range of registers like s[0:1]
1823 
1824   StringRef RegName = Token.getString();
1825 
1826   for (StringRef Reg : Registers) {
1827     if (RegName.startswith(Reg)) {
1828       if (Reg.size() < RegName.size()) {
1829         unsigned RegNum;
1830         // A single register with an index: rXX
1831         if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
1832           return true;
1833       } else {
1834         // A range of registers: r[XX:YY].
1835         if (NextToken.is(AsmToken::LBrac))
1836           return true;
1837       }
1838     }
1839   }
1840 
1841   return getSpecialRegForName(RegName);
1842 }
1843 
1844 bool
1845 AMDGPUAsmParser::isRegister()
1846 {
1847   return isRegister(getToken(), peekToken());
1848 }
1849 
1850 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1851                                           unsigned &RegNum, unsigned &RegWidth,
1852                                           unsigned *DwordRegIndex) {
1853   if (DwordRegIndex) { *DwordRegIndex = 0; }
1854   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1855   if (getLexer().is(AsmToken::Identifier)) {
1856     StringRef RegName = Parser.getTok().getString();
1857     if ((Reg = getSpecialRegForName(RegName))) {
1858       Parser.Lex();
1859       RegKind = IS_SPECIAL;
1860     } else {
1861       unsigned RegNumIndex = 0;
1862       if (RegName[0] == 'v') {
1863         RegNumIndex = 1;
1864         RegKind = IS_VGPR;
1865       } else if (RegName[0] == 's') {
1866         RegNumIndex = 1;
1867         RegKind = IS_SGPR;
1868       } else if (RegName.startswith("ttmp")) {
1869         RegNumIndex = strlen("ttmp");
1870         RegKind = IS_TTMP;
1871       } else {
1872         return false;
1873       }
1874       if (RegName.size() > RegNumIndex) {
1875         // Single 32-bit register: vXX.
1876         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1877           return false;
1878         Parser.Lex();
1879         RegWidth = 1;
1880       } else {
1881         // Range of registers: v[XX:YY]. ":YY" is optional.
1882         Parser.Lex();
1883         int64_t RegLo, RegHi;
1884         if (getLexer().isNot(AsmToken::LBrac))
1885           return false;
1886         Parser.Lex();
1887 
1888         if (getParser().parseAbsoluteExpression(RegLo))
1889           return false;
1890 
1891         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1892         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1893           return false;
1894         Parser.Lex();
1895 
1896         if (isRBrace) {
1897           RegHi = RegLo;
1898         } else {
1899           if (getParser().parseAbsoluteExpression(RegHi))
1900             return false;
1901 
1902           if (getLexer().isNot(AsmToken::RBrac))
1903             return false;
1904           Parser.Lex();
1905         }
1906         RegNum = (unsigned) RegLo;
1907         RegWidth = (RegHi - RegLo) + 1;
1908       }
1909     }
1910   } else if (getLexer().is(AsmToken::LBrac)) {
1911     // List of consecutive registers: [s0,s1,s2,s3]
1912     Parser.Lex();
1913     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1914       return false;
1915     if (RegWidth != 1)
1916       return false;
1917     RegisterKind RegKind1;
1918     unsigned Reg1, RegNum1, RegWidth1;
1919     do {
1920       if (getLexer().is(AsmToken::Comma)) {
1921         Parser.Lex();
1922       } else if (getLexer().is(AsmToken::RBrac)) {
1923         Parser.Lex();
1924         break;
1925       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1926         if (RegWidth1 != 1) {
1927           return false;
1928         }
1929         if (RegKind1 != RegKind) {
1930           return false;
1931         }
1932         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1933           return false;
1934         }
1935       } else {
1936         return false;
1937       }
1938     } while (true);
1939   } else {
1940     return false;
1941   }
1942   switch (RegKind) {
1943   case IS_SPECIAL:
1944     RegNum = 0;
1945     RegWidth = 1;
1946     break;
1947   case IS_VGPR:
1948   case IS_SGPR:
1949   case IS_TTMP:
1950   {
1951     unsigned Size = 1;
1952     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1953       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1954       Size = std::min(RegWidth, 4u);
1955     }
1956     if (RegNum % Size != 0)
1957       return false;
1958     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1959     RegNum = RegNum / Size;
1960     int RCID = getRegClass(RegKind, RegWidth);
1961     if (RCID == -1)
1962       return false;
1963     const MCRegisterClass RC = TRI->getRegClass(RCID);
1964     if (RegNum >= RC.getNumRegs())
1965       return false;
1966     Reg = RC.getRegister(RegNum);
1967     break;
1968   }
1969 
1970   default:
1971     llvm_unreachable("unexpected register kind");
1972   }
1973 
1974   if (!subtargetHasRegister(*TRI, Reg))
1975     return false;
1976   return true;
1977 }
1978 
1979 Optional<StringRef>
1980 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
1981   switch (RegKind) {
1982   case IS_VGPR:
1983     return StringRef(".amdgcn.next_free_vgpr");
1984   case IS_SGPR:
1985     return StringRef(".amdgcn.next_free_sgpr");
1986   default:
1987     return None;
1988   }
1989 }
1990 
1991 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
1992   auto SymbolName = getGprCountSymbolName(RegKind);
1993   assert(SymbolName && "initializing invalid register kind");
1994   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1995   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
1996 }
1997 
1998 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
1999                                             unsigned DwordRegIndex,
2000                                             unsigned RegWidth) {
2001   // Symbols are only defined for GCN targets
2002   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2003     return true;
2004 
2005   auto SymbolName = getGprCountSymbolName(RegKind);
2006   if (!SymbolName)
2007     return true;
2008   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2009 
2010   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2011   int64_t OldCount;
2012 
2013   if (!Sym->isVariable())
2014     return !Error(getParser().getTok().getLoc(),
2015                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2016   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2017     return !Error(
2018         getParser().getTok().getLoc(),
2019         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2020 
2021   if (OldCount <= NewMax)
2022     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2023 
2024   return true;
2025 }
2026 
2027 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2028   const auto &Tok = Parser.getTok();
2029   SMLoc StartLoc = Tok.getLoc();
2030   SMLoc EndLoc = Tok.getEndLoc();
2031   RegisterKind RegKind;
2032   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2033 
2034   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2035     //FIXME: improve error messages (bug 41303).
2036     Error(StartLoc, "not a valid operand.");
2037     return nullptr;
2038   }
2039   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2040     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2041       return nullptr;
2042   } else
2043     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2044   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2045 }
2046 
2047 OperandMatchResultTy
2048 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2049   // TODO: add syntactic sugar for 1/(2*PI)
2050 
2051   assert(!isRegister());
2052   assert(!isModifier());
2053 
2054   const auto& Tok = getToken();
2055   const auto& NextTok = peekToken();
2056   bool IsReal = Tok.is(AsmToken::Real);
2057   SMLoc S = getLoc();
2058   bool Negate = false;
2059 
2060   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2061     lex();
2062     IsReal = true;
2063     Negate = true;
2064   }
2065 
2066   if (IsReal) {
2067     // Floating-point expressions are not supported.
2068     // Can only allow floating-point literals with an
2069     // optional sign.
2070 
2071     StringRef Num = getTokenStr();
2072     lex();
2073 
2074     APFloat RealVal(APFloat::IEEEdouble());
2075     auto roundMode = APFloat::rmNearestTiesToEven;
2076     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2077       return MatchOperand_ParseFail;
2078     }
2079     if (Negate)
2080       RealVal.changeSign();
2081 
2082     Operands.push_back(
2083       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2084                                AMDGPUOperand::ImmTyNone, true));
2085 
2086     return MatchOperand_Success;
2087 
2088   } else {
2089     int64_t IntVal;
2090     const MCExpr *Expr;
2091     SMLoc S = getLoc();
2092 
2093     if (HasSP3AbsModifier) {
2094       // This is a workaround for handling expressions
2095       // as arguments of SP3 'abs' modifier, for example:
2096       //     |1.0|
2097       //     |-1|
2098       //     |1+x|
2099       // This syntax is not compatible with syntax of standard
2100       // MC expressions (due to the trailing '|').
2101       SMLoc EndLoc;
2102       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2103         return MatchOperand_ParseFail;
2104     } else {
2105       if (Parser.parseExpression(Expr))
2106         return MatchOperand_ParseFail;
2107     }
2108 
2109     if (Expr->evaluateAsAbsolute(IntVal)) {
2110       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2111     } else {
2112       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2113     }
2114 
2115     return MatchOperand_Success;
2116   }
2117 
2118   return MatchOperand_NoMatch;
2119 }
2120 
2121 OperandMatchResultTy
2122 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2123   if (!isRegister())
2124     return MatchOperand_NoMatch;
2125 
2126   if (auto R = parseRegister()) {
2127     assert(R->isReg());
2128     Operands.push_back(std::move(R));
2129     return MatchOperand_Success;
2130   }
2131   return MatchOperand_ParseFail;
2132 }
2133 
2134 OperandMatchResultTy
2135 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2136   auto res = parseReg(Operands);
2137   if (res != MatchOperand_NoMatch) {
2138     return res;
2139   } else if (isModifier()) {
2140     return MatchOperand_NoMatch;
2141   } else {
2142     return parseImm(Operands, HasSP3AbsMod);
2143   }
2144 }
2145 
2146 bool
2147 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2148   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2149     const auto &str = Token.getString();
2150     return str == "abs" || str == "neg" || str == "sext";
2151   }
2152   return false;
2153 }
2154 
2155 bool
2156 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2157   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2158 }
2159 
2160 bool
2161 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2162   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2163 }
2164 
2165 bool
2166 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2167   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2168 }
2169 
2170 // Check if this is an operand modifier or an opcode modifier
2171 // which may look like an expression but it is not. We should
2172 // avoid parsing these modifiers as expressions. Currently
2173 // recognized sequences are:
2174 //   |...|
2175 //   abs(...)
2176 //   neg(...)
2177 //   sext(...)
2178 //   -reg
2179 //   -|...|
2180 //   -abs(...)
2181 //   name:...
2182 // Note that simple opcode modifiers like 'gds' may be parsed as
2183 // expressions; this is a special case. See getExpressionAsToken.
2184 //
2185 bool
2186 AMDGPUAsmParser::isModifier() {
2187 
2188   AsmToken Tok = getToken();
2189   AsmToken NextToken[2];
2190   peekTokens(NextToken);
2191 
2192   return isOperandModifier(Tok, NextToken[0]) ||
2193          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2194          isOpcodeModifierWithVal(Tok, NextToken[0]);
2195 }
2196 
2197 // Check if the current token is an SP3 'neg' modifier.
2198 // Currently this modifier is allowed in the following context:
2199 //
2200 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2201 // 2. Before an 'abs' modifier: -abs(...)
2202 // 3. Before an SP3 'abs' modifier: -|...|
2203 //
2204 // In all other cases "-" is handled as a part
2205 // of an expression that follows the sign.
2206 //
2207 // Note: When "-" is followed by an integer literal,
2208 // this is interpreted as integer negation rather
2209 // than a floating-point NEG modifier applied to N.
2210 // Beside being contr-intuitive, such use of floating-point
2211 // NEG modifier would have resulted in different meaning
2212 // of integer literals used with VOP1/2/C and VOP3,
2213 // for example:
2214 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2215 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2216 // Negative fp literals with preceding "-" are
2217 // handled likewise for unifomtity
2218 //
2219 bool
2220 AMDGPUAsmParser::parseSP3NegModifier() {
2221 
2222   AsmToken NextToken[2];
2223   peekTokens(NextToken);
2224 
2225   if (isToken(AsmToken::Minus) &&
2226       (isRegister(NextToken[0], NextToken[1]) ||
2227        NextToken[0].is(AsmToken::Pipe) ||
2228        isId(NextToken[0], "abs"))) {
2229     lex();
2230     return true;
2231   }
2232 
2233   return false;
2234 }
2235 
2236 OperandMatchResultTy
2237 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2238                                               bool AllowImm) {
2239   bool Neg, SP3Neg;
2240   bool Abs, SP3Abs;
2241   SMLoc Loc;
2242 
2243   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2244   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2245     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2246     return MatchOperand_ParseFail;
2247   }
2248 
2249   SP3Neg = parseSP3NegModifier();
2250 
2251   Loc = getLoc();
2252   Neg = trySkipId("neg");
2253   if (Neg && SP3Neg) {
2254     Error(Loc, "expected register or immediate");
2255     return MatchOperand_ParseFail;
2256   }
2257   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2258     return MatchOperand_ParseFail;
2259 
2260   Abs = trySkipId("abs");
2261   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2262     return MatchOperand_ParseFail;
2263 
2264   Loc = getLoc();
2265   SP3Abs = trySkipToken(AsmToken::Pipe);
2266   if (Abs && SP3Abs) {
2267     Error(Loc, "expected register or immediate");
2268     return MatchOperand_ParseFail;
2269   }
2270 
2271   OperandMatchResultTy Res;
2272   if (AllowImm) {
2273     Res = parseRegOrImm(Operands, SP3Abs);
2274   } else {
2275     Res = parseReg(Operands);
2276   }
2277   if (Res != MatchOperand_Success) {
2278     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2279   }
2280 
2281   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2282     return MatchOperand_ParseFail;
2283   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2284     return MatchOperand_ParseFail;
2285   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2286     return MatchOperand_ParseFail;
2287 
2288   AMDGPUOperand::Modifiers Mods;
2289   Mods.Abs = Abs || SP3Abs;
2290   Mods.Neg = Neg || SP3Neg;
2291 
2292   if (Mods.hasFPModifiers()) {
2293     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2294     if (Op.isExpr()) {
2295       Error(Op.getStartLoc(), "expected an absolute expression");
2296       return MatchOperand_ParseFail;
2297     }
2298     Op.setModifiers(Mods);
2299   }
2300   return MatchOperand_Success;
2301 }
2302 
2303 OperandMatchResultTy
2304 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2305                                                bool AllowImm) {
2306   bool Sext = trySkipId("sext");
2307   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2308     return MatchOperand_ParseFail;
2309 
2310   OperandMatchResultTy Res;
2311   if (AllowImm) {
2312     Res = parseRegOrImm(Operands);
2313   } else {
2314     Res = parseReg(Operands);
2315   }
2316   if (Res != MatchOperand_Success) {
2317     return Sext? MatchOperand_ParseFail : Res;
2318   }
2319 
2320   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2321     return MatchOperand_ParseFail;
2322 
2323   AMDGPUOperand::Modifiers Mods;
2324   Mods.Sext = Sext;
2325 
2326   if (Mods.hasIntModifiers()) {
2327     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2328     if (Op.isExpr()) {
2329       Error(Op.getStartLoc(), "expected an absolute expression");
2330       return MatchOperand_ParseFail;
2331     }
2332     Op.setModifiers(Mods);
2333   }
2334 
2335   return MatchOperand_Success;
2336 }
2337 
2338 OperandMatchResultTy
2339 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2340   return parseRegOrImmWithFPInputMods(Operands, false);
2341 }
2342 
2343 OperandMatchResultTy
2344 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2345   return parseRegOrImmWithIntInputMods(Operands, false);
2346 }
2347 
2348 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2349   auto Loc = getLoc();
2350   if (trySkipId("off")) {
2351     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2352                                                 AMDGPUOperand::ImmTyOff, false));
2353     return MatchOperand_Success;
2354   }
2355 
2356   if (!isRegister())
2357     return MatchOperand_NoMatch;
2358 
2359   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2360   if (Reg) {
2361     Operands.push_back(std::move(Reg));
2362     return MatchOperand_Success;
2363   }
2364 
2365   return MatchOperand_ParseFail;
2366 
2367 }
2368 
2369 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2370   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2371 
2372   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2373       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2374       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2375       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2376     return Match_InvalidOperand;
2377 
2378   if ((TSFlags & SIInstrFlags::VOP3) &&
2379       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2380       getForcedEncodingSize() != 64)
2381     return Match_PreferE32;
2382 
2383   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2384       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2385     // v_mac_f32/16 allow only dst_sel == DWORD;
2386     auto OpNum =
2387         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2388     const auto &Op = Inst.getOperand(OpNum);
2389     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2390       return Match_InvalidOperand;
2391     }
2392   }
2393 
2394   if (TSFlags & SIInstrFlags::FLAT) {
2395     // FIXME: Produces error without correct column reported.
2396     auto Opcode = Inst.getOpcode();
2397     auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
2398 
2399     const auto &Op = Inst.getOperand(OpNum);
2400     if (!hasFlatOffsets() && Op.getImm() != 0)
2401       return Match_InvalidOperand;
2402 
2403     // GFX10: Address offset is 12-bit signed byte offset. Must be positive for
2404     // FLAT segment. For FLAT segment MSB is ignored and forced to zero.
2405     if (isGFX10()) {
2406       if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
2407         if (!isInt<12>(Op.getImm()))
2408           return Match_InvalidOperand;
2409       } else {
2410         if (!isUInt<11>(Op.getImm()))
2411           return Match_InvalidOperand;
2412       }
2413     }
2414   }
2415 
2416   return Match_Success;
2417 }
2418 
2419 // What asm variants we should check
2420 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2421   if (getForcedEncodingSize() == 32) {
2422     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2423     return makeArrayRef(Variants);
2424   }
2425 
2426   if (isForcedVOP3()) {
2427     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2428     return makeArrayRef(Variants);
2429   }
2430 
2431   if (isForcedSDWA()) {
2432     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2433                                         AMDGPUAsmVariants::SDWA9};
2434     return makeArrayRef(Variants);
2435   }
2436 
2437   if (isForcedDPP()) {
2438     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2439     return makeArrayRef(Variants);
2440   }
2441 
2442   static const unsigned Variants[] = {
2443     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2444     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2445   };
2446 
2447   return makeArrayRef(Variants);
2448 }
2449 
2450 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2451   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2452   const unsigned Num = Desc.getNumImplicitUses();
2453   for (unsigned i = 0; i < Num; ++i) {
2454     unsigned Reg = Desc.ImplicitUses[i];
2455     switch (Reg) {
2456     case AMDGPU::FLAT_SCR:
2457     case AMDGPU::VCC:
2458     case AMDGPU::VCC_LO:
2459     case AMDGPU::VCC_HI:
2460     case AMDGPU::M0:
2461     case AMDGPU::SGPR_NULL:
2462       return Reg;
2463     default:
2464       break;
2465     }
2466   }
2467   return AMDGPU::NoRegister;
2468 }
2469 
2470 // NB: This code is correct only when used to check constant
2471 // bus limitations because GFX7 support no f16 inline constants.
2472 // Note that there are no cases when a GFX7 opcode violates
2473 // constant bus limitations due to the use of an f16 constant.
2474 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2475                                        unsigned OpIdx) const {
2476   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2477 
2478   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2479     return false;
2480   }
2481 
2482   const MCOperand &MO = Inst.getOperand(OpIdx);
2483 
2484   int64_t Val = MO.getImm();
2485   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2486 
2487   switch (OpSize) { // expected operand size
2488   case 8:
2489     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2490   case 4:
2491     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2492   case 2: {
2493     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2494     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2495         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2496         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2497         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2498       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2499     } else {
2500       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2501     }
2502   }
2503   default:
2504     llvm_unreachable("invalid operand size");
2505   }
2506 }
2507 
2508 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2509   const MCOperand &MO = Inst.getOperand(OpIdx);
2510   if (MO.isImm()) {
2511     return !isInlineConstant(Inst, OpIdx);
2512   }
2513   return !MO.isReg() ||
2514          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2515 }
2516 
2517 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2518   const unsigned Opcode = Inst.getOpcode();
2519   const MCInstrDesc &Desc = MII.get(Opcode);
2520   unsigned ConstantBusUseCount = 0;
2521   unsigned NumLiterals = 0;
2522   unsigned LiteralSize;
2523 
2524   if (Desc.TSFlags &
2525       (SIInstrFlags::VOPC |
2526        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2527        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2528        SIInstrFlags::SDWA)) {
2529     // Check special imm operands (used by madmk, etc)
2530     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2531       ++ConstantBusUseCount;
2532     }
2533 
2534     SmallDenseSet<unsigned> SGPRsUsed;
2535     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2536     if (SGPRUsed != AMDGPU::NoRegister) {
2537       SGPRsUsed.insert(SGPRUsed);
2538       ++ConstantBusUseCount;
2539     }
2540 
2541     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2542     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2543     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2544 
2545     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2546 
2547     for (int OpIdx : OpIndices) {
2548       if (OpIdx == -1) break;
2549 
2550       const MCOperand &MO = Inst.getOperand(OpIdx);
2551       if (usesConstantBus(Inst, OpIdx)) {
2552         if (MO.isReg()) {
2553           const unsigned Reg = mc2PseudoReg(MO.getReg());
2554           // Pairs of registers with a partial intersections like these
2555           //   s0, s[0:1]
2556           //   flat_scratch_lo, flat_scratch
2557           //   flat_scratch_lo, flat_scratch_hi
2558           // are theoretically valid but they are disabled anyway.
2559           // Note that this code mimics SIInstrInfo::verifyInstruction
2560           if (!SGPRsUsed.count(Reg)) {
2561             SGPRsUsed.insert(Reg);
2562             ++ConstantBusUseCount;
2563           }
2564           SGPRUsed = Reg;
2565         } else { // Expression or a literal
2566 
2567           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2568             continue; // special operand like VINTERP attr_chan
2569 
2570           // An instruction may use only one literal.
2571           // This has been validated on the previous step.
2572           // See validateVOP3Literal.
2573           // This literal may be used as more than one operand.
2574           // If all these operands are of the same size,
2575           // this literal counts as one scalar value.
2576           // Otherwise it counts as 2 scalar values.
2577           // See "GFX10 Shader Programming", section 3.6.2.3.
2578 
2579           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2580           if (Size < 4) Size = 4;
2581 
2582           if (NumLiterals == 0) {
2583             NumLiterals = 1;
2584             LiteralSize = Size;
2585           } else if (LiteralSize != Size) {
2586             NumLiterals = 2;
2587           }
2588         }
2589       }
2590     }
2591   }
2592   ConstantBusUseCount += NumLiterals;
2593 
2594   if (isGFX10())
2595     return ConstantBusUseCount <= 2;
2596 
2597   return ConstantBusUseCount <= 1;
2598 }
2599 
2600 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2601   const unsigned Opcode = Inst.getOpcode();
2602   const MCInstrDesc &Desc = MII.get(Opcode);
2603 
2604   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2605   if (DstIdx == -1 ||
2606       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2607     return true;
2608   }
2609 
2610   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2611 
2612   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2613   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2614   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2615 
2616   assert(DstIdx != -1);
2617   const MCOperand &Dst = Inst.getOperand(DstIdx);
2618   assert(Dst.isReg());
2619   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2620 
2621   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2622 
2623   for (int SrcIdx : SrcIndices) {
2624     if (SrcIdx == -1) break;
2625     const MCOperand &Src = Inst.getOperand(SrcIdx);
2626     if (Src.isReg()) {
2627       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2628       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2629         return false;
2630       }
2631     }
2632   }
2633 
2634   return true;
2635 }
2636 
2637 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2638 
2639   const unsigned Opc = Inst.getOpcode();
2640   const MCInstrDesc &Desc = MII.get(Opc);
2641 
2642   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2643     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2644     assert(ClampIdx != -1);
2645     return Inst.getOperand(ClampIdx).getImm() == 0;
2646   }
2647 
2648   return true;
2649 }
2650 
2651 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2652 
2653   const unsigned Opc = Inst.getOpcode();
2654   const MCInstrDesc &Desc = MII.get(Opc);
2655 
2656   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2657     return true;
2658 
2659   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2660   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2661   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2662 
2663   assert(VDataIdx != -1);
2664   assert(DMaskIdx != -1);
2665   assert(TFEIdx != -1);
2666 
2667   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2668   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2669   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2670   if (DMask == 0)
2671     DMask = 1;
2672 
2673   unsigned DataSize =
2674     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2675   if (hasPackedD16()) {
2676     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2677     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2678       DataSize = (DataSize + 1) / 2;
2679   }
2680 
2681   return (VDataSize / 4) == DataSize + TFESize;
2682 }
2683 
2684 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2685   const unsigned Opc = Inst.getOpcode();
2686   const MCInstrDesc &Desc = MII.get(Opc);
2687 
2688   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2689     return true;
2690 
2691   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2692   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2693       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2694   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2695   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2696   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2697 
2698   assert(VAddr0Idx != -1);
2699   assert(SrsrcIdx != -1);
2700   assert(DimIdx != -1);
2701   assert(SrsrcIdx > VAddr0Idx);
2702 
2703   unsigned Dim = Inst.getOperand(DimIdx).getImm();
2704   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2705   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2706   unsigned VAddrSize =
2707       IsNSA ? SrsrcIdx - VAddr0Idx
2708             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2709 
2710   unsigned AddrSize = BaseOpcode->NumExtraArgs +
2711                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2712                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2713                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2714   if (!IsNSA) {
2715     if (AddrSize > 8)
2716       AddrSize = 16;
2717     else if (AddrSize > 4)
2718       AddrSize = 8;
2719   }
2720 
2721   return VAddrSize == AddrSize;
2722 }
2723 
2724 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2725 
2726   const unsigned Opc = Inst.getOpcode();
2727   const MCInstrDesc &Desc = MII.get(Opc);
2728 
2729   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2730     return true;
2731   if (!Desc.mayLoad() || !Desc.mayStore())
2732     return true; // Not atomic
2733 
2734   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2735   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2736 
2737   // This is an incomplete check because image_atomic_cmpswap
2738   // may only use 0x3 and 0xf while other atomic operations
2739   // may use 0x1 and 0x3. However these limitations are
2740   // verified when we check that dmask matches dst size.
2741   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2742 }
2743 
2744 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2745 
2746   const unsigned Opc = Inst.getOpcode();
2747   const MCInstrDesc &Desc = MII.get(Opc);
2748 
2749   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2750     return true;
2751 
2752   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2753   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2754 
2755   // GATHER4 instructions use dmask in a different fashion compared to
2756   // other MIMG instructions. The only useful DMASK values are
2757   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2758   // (red,red,red,red) etc.) The ISA document doesn't mention
2759   // this.
2760   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2761 }
2762 
2763 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2764 
2765   const unsigned Opc = Inst.getOpcode();
2766   const MCInstrDesc &Desc = MII.get(Opc);
2767 
2768   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2769     return true;
2770 
2771   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2772   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2773     if (isCI() || isSI())
2774       return false;
2775   }
2776 
2777   return true;
2778 }
2779 
2780 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
2781   const unsigned Opc = Inst.getOpcode();
2782   const MCInstrDesc &Desc = MII.get(Opc);
2783 
2784   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2785     return true;
2786 
2787   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2788   if (DimIdx < 0)
2789     return true;
2790 
2791   long Imm = Inst.getOperand(DimIdx).getImm();
2792   if (Imm < 0 || Imm >= 8)
2793     return false;
2794 
2795   return true;
2796 }
2797 
2798 static bool IsRevOpcode(const unsigned Opcode)
2799 {
2800   switch (Opcode) {
2801   case AMDGPU::V_SUBREV_F32_e32:
2802   case AMDGPU::V_SUBREV_F32_e64:
2803   case AMDGPU::V_SUBREV_F32_e32_gfx10:
2804   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
2805   case AMDGPU::V_SUBREV_F32_e32_vi:
2806   case AMDGPU::V_SUBREV_F32_e64_gfx10:
2807   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
2808   case AMDGPU::V_SUBREV_F32_e64_vi:
2809 
2810   case AMDGPU::V_SUBREV_I32_e32:
2811   case AMDGPU::V_SUBREV_I32_e64:
2812   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
2813   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
2814 
2815   case AMDGPU::V_SUBBREV_U32_e32:
2816   case AMDGPU::V_SUBBREV_U32_e64:
2817   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
2818   case AMDGPU::V_SUBBREV_U32_e32_vi:
2819   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
2820   case AMDGPU::V_SUBBREV_U32_e64_vi:
2821 
2822   case AMDGPU::V_SUBREV_U32_e32:
2823   case AMDGPU::V_SUBREV_U32_e64:
2824   case AMDGPU::V_SUBREV_U32_e32_gfx9:
2825   case AMDGPU::V_SUBREV_U32_e32_vi:
2826   case AMDGPU::V_SUBREV_U32_e64_gfx9:
2827   case AMDGPU::V_SUBREV_U32_e64_vi:
2828 
2829   case AMDGPU::V_SUBREV_F16_e32:
2830   case AMDGPU::V_SUBREV_F16_e64:
2831   case AMDGPU::V_SUBREV_F16_e32_gfx10:
2832   case AMDGPU::V_SUBREV_F16_e32_vi:
2833   case AMDGPU::V_SUBREV_F16_e64_gfx10:
2834   case AMDGPU::V_SUBREV_F16_e64_vi:
2835 
2836   case AMDGPU::V_SUBREV_U16_e32:
2837   case AMDGPU::V_SUBREV_U16_e64:
2838   case AMDGPU::V_SUBREV_U16_e32_vi:
2839   case AMDGPU::V_SUBREV_U16_e64_vi:
2840 
2841   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
2842   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
2843   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
2844 
2845   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
2846   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
2847 
2848   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
2849   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
2850 
2851   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
2852   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
2853 
2854   case AMDGPU::V_LSHRREV_B32_e32:
2855   case AMDGPU::V_LSHRREV_B32_e64:
2856   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
2857   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
2858   case AMDGPU::V_LSHRREV_B32_e32_vi:
2859   case AMDGPU::V_LSHRREV_B32_e64_vi:
2860   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
2861   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
2862 
2863   case AMDGPU::V_ASHRREV_I32_e32:
2864   case AMDGPU::V_ASHRREV_I32_e64:
2865   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
2866   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
2867   case AMDGPU::V_ASHRREV_I32_e32_vi:
2868   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
2869   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
2870   case AMDGPU::V_ASHRREV_I32_e64_vi:
2871 
2872   case AMDGPU::V_LSHLREV_B32_e32:
2873   case AMDGPU::V_LSHLREV_B32_e64:
2874   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
2875   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
2876   case AMDGPU::V_LSHLREV_B32_e32_vi:
2877   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
2878   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
2879   case AMDGPU::V_LSHLREV_B32_e64_vi:
2880 
2881   case AMDGPU::V_LSHLREV_B16_e32:
2882   case AMDGPU::V_LSHLREV_B16_e64:
2883   case AMDGPU::V_LSHLREV_B16_e32_vi:
2884   case AMDGPU::V_LSHLREV_B16_e64_vi:
2885   case AMDGPU::V_LSHLREV_B16_gfx10:
2886 
2887   case AMDGPU::V_LSHRREV_B16_e32:
2888   case AMDGPU::V_LSHRREV_B16_e64:
2889   case AMDGPU::V_LSHRREV_B16_e32_vi:
2890   case AMDGPU::V_LSHRREV_B16_e64_vi:
2891   case AMDGPU::V_LSHRREV_B16_gfx10:
2892 
2893   case AMDGPU::V_ASHRREV_I16_e32:
2894   case AMDGPU::V_ASHRREV_I16_e64:
2895   case AMDGPU::V_ASHRREV_I16_e32_vi:
2896   case AMDGPU::V_ASHRREV_I16_e64_vi:
2897   case AMDGPU::V_ASHRREV_I16_gfx10:
2898 
2899   case AMDGPU::V_LSHLREV_B64:
2900   case AMDGPU::V_LSHLREV_B64_gfx10:
2901   case AMDGPU::V_LSHLREV_B64_vi:
2902 
2903   case AMDGPU::V_LSHRREV_B64:
2904   case AMDGPU::V_LSHRREV_B64_gfx10:
2905   case AMDGPU::V_LSHRREV_B64_vi:
2906 
2907   case AMDGPU::V_ASHRREV_I64:
2908   case AMDGPU::V_ASHRREV_I64_gfx10:
2909   case AMDGPU::V_ASHRREV_I64_vi:
2910 
2911   case AMDGPU::V_PK_LSHLREV_B16:
2912   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
2913   case AMDGPU::V_PK_LSHLREV_B16_vi:
2914 
2915   case AMDGPU::V_PK_LSHRREV_B16:
2916   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
2917   case AMDGPU::V_PK_LSHRREV_B16_vi:
2918   case AMDGPU::V_PK_ASHRREV_I16:
2919   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
2920   case AMDGPU::V_PK_ASHRREV_I16_vi:
2921     return true;
2922   default:
2923     return false;
2924   }
2925 }
2926 
2927 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
2928 
2929   using namespace SIInstrFlags;
2930   const unsigned Opcode = Inst.getOpcode();
2931   const MCInstrDesc &Desc = MII.get(Opcode);
2932 
2933   // lds_direct register is defined so that it can be used
2934   // with 9-bit operands only. Ignore encodings which do not accept these.
2935   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
2936     return true;
2937 
2938   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2939   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2940   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2941 
2942   const int SrcIndices[] = { Src1Idx, Src2Idx };
2943 
2944   // lds_direct cannot be specified as either src1 or src2.
2945   for (int SrcIdx : SrcIndices) {
2946     if (SrcIdx == -1) break;
2947     const MCOperand &Src = Inst.getOperand(SrcIdx);
2948     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
2949       return false;
2950     }
2951   }
2952 
2953   if (Src0Idx == -1)
2954     return true;
2955 
2956   const MCOperand &Src = Inst.getOperand(Src0Idx);
2957   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
2958     return true;
2959 
2960   // lds_direct is specified as src0. Check additional limitations.
2961   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
2962 }
2963 
2964 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
2965   unsigned Opcode = Inst.getOpcode();
2966   const MCInstrDesc &Desc = MII.get(Opcode);
2967   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
2968     return true;
2969 
2970   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2971   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2972 
2973   const int OpIndices[] = { Src0Idx, Src1Idx };
2974 
2975   unsigned NumLiterals = 0;
2976   uint32_t LiteralValue;
2977 
2978   for (int OpIdx : OpIndices) {
2979     if (OpIdx == -1) break;
2980 
2981     const MCOperand &MO = Inst.getOperand(OpIdx);
2982     if (MO.isImm() &&
2983         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
2984         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
2985         !isInlineConstant(Inst, OpIdx)) {
2986       uint32_t Value = static_cast<uint32_t>(MO.getImm());
2987       if (NumLiterals == 0 || LiteralValue != Value) {
2988         LiteralValue = Value;
2989         ++NumLiterals;
2990       }
2991     }
2992   }
2993 
2994   return NumLiterals <= 1;
2995 }
2996 
2997 // VOP3 literal is only allowed in GFX10+ and only one can be used
2998 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
2999   unsigned Opcode = Inst.getOpcode();
3000   const MCInstrDesc &Desc = MII.get(Opcode);
3001   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3002     return true;
3003 
3004   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3005   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3006   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3007 
3008   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3009 
3010   unsigned NumLiterals = 0;
3011   uint32_t LiteralValue;
3012 
3013   for (int OpIdx : OpIndices) {
3014     if (OpIdx == -1) break;
3015 
3016     const MCOperand &MO = Inst.getOperand(OpIdx);
3017     if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
3018       continue;
3019 
3020     if (!isInlineConstant(Inst, OpIdx)) {
3021       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3022       if (NumLiterals == 0 || LiteralValue != Value) {
3023         LiteralValue = Value;
3024         ++NumLiterals;
3025       }
3026     }
3027   }
3028 
3029   return !NumLiterals ||
3030          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3031 }
3032 
3033 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3034                                           const SMLoc &IDLoc) {
3035   if (!validateLdsDirect(Inst)) {
3036     Error(IDLoc,
3037       "invalid use of lds_direct");
3038     return false;
3039   }
3040   if (!validateSOPLiteral(Inst)) {
3041     Error(IDLoc,
3042       "only one literal operand is allowed");
3043     return false;
3044   }
3045   if (!validateVOP3Literal(Inst)) {
3046     Error(IDLoc,
3047       "invalid literal operand");
3048     return false;
3049   }
3050   if (!validateConstantBusLimitations(Inst)) {
3051     Error(IDLoc,
3052       "invalid operand (violates constant bus restrictions)");
3053     return false;
3054   }
3055   if (!validateEarlyClobberLimitations(Inst)) {
3056     Error(IDLoc,
3057       "destination must be different than all sources");
3058     return false;
3059   }
3060   if (!validateIntClampSupported(Inst)) {
3061     Error(IDLoc,
3062       "integer clamping is not supported on this GPU");
3063     return false;
3064   }
3065   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3066   if (!validateMIMGD16(Inst)) {
3067     Error(IDLoc,
3068       "d16 modifier is not supported on this GPU");
3069     return false;
3070   }
3071   if (!validateMIMGDim(Inst)) {
3072     Error(IDLoc, "dim modifier is required on this GPU");
3073     return false;
3074   }
3075   if (!validateMIMGDataSize(Inst)) {
3076     Error(IDLoc,
3077       "image data size does not match dmask and tfe");
3078     return false;
3079   }
3080   if (!validateMIMGAddrSize(Inst)) {
3081     Error(IDLoc,
3082       "image address size does not match dim and a16");
3083     return false;
3084   }
3085   if (!validateMIMGAtomicDMask(Inst)) {
3086     Error(IDLoc,
3087       "invalid atomic image dmask");
3088     return false;
3089   }
3090   if (!validateMIMGGatherDMask(Inst)) {
3091     Error(IDLoc,
3092       "invalid image_gather dmask: only one bit must be set");
3093     return false;
3094   }
3095 
3096   return true;
3097 }
3098 
3099 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3100                                             const FeatureBitset &FBS,
3101                                             unsigned VariantID = 0);
3102 
3103 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3104                                               OperandVector &Operands,
3105                                               MCStreamer &Out,
3106                                               uint64_t &ErrorInfo,
3107                                               bool MatchingInlineAsm) {
3108   MCInst Inst;
3109   unsigned Result = Match_Success;
3110   for (auto Variant : getMatchedVariants()) {
3111     uint64_t EI;
3112     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3113                                   Variant);
3114     // We order match statuses from least to most specific. We use most specific
3115     // status as resulting
3116     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3117     if ((R == Match_Success) ||
3118         (R == Match_PreferE32) ||
3119         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3120         (R == Match_InvalidOperand && Result != Match_MissingFeature
3121                                    && Result != Match_PreferE32) ||
3122         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3123                                    && Result != Match_MissingFeature
3124                                    && Result != Match_PreferE32)) {
3125       Result = R;
3126       ErrorInfo = EI;
3127     }
3128     if (R == Match_Success)
3129       break;
3130   }
3131 
3132   switch (Result) {
3133   default: break;
3134   case Match_Success:
3135     if (!validateInstruction(Inst, IDLoc)) {
3136       return true;
3137     }
3138     Inst.setLoc(IDLoc);
3139     Out.EmitInstruction(Inst, getSTI());
3140     return false;
3141 
3142   case Match_MissingFeature:
3143     return Error(IDLoc, "instruction not supported on this GPU");
3144 
3145   case Match_MnemonicFail: {
3146     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3147     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3148         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3149     return Error(IDLoc, "invalid instruction" + Suggestion,
3150                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3151   }
3152 
3153   case Match_InvalidOperand: {
3154     SMLoc ErrorLoc = IDLoc;
3155     if (ErrorInfo != ~0ULL) {
3156       if (ErrorInfo >= Operands.size()) {
3157         return Error(IDLoc, "too few operands for instruction");
3158       }
3159       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3160       if (ErrorLoc == SMLoc())
3161         ErrorLoc = IDLoc;
3162     }
3163     return Error(ErrorLoc, "invalid operand for instruction");
3164   }
3165 
3166   case Match_PreferE32:
3167     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3168                         "should be encoded as e32");
3169   }
3170   llvm_unreachable("Implement any new match types added!");
3171 }
3172 
3173 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3174   int64_t Tmp = -1;
3175   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3176     return true;
3177   }
3178   if (getParser().parseAbsoluteExpression(Tmp)) {
3179     return true;
3180   }
3181   Ret = static_cast<uint32_t>(Tmp);
3182   return false;
3183 }
3184 
3185 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3186                                                uint32_t &Minor) {
3187   if (ParseAsAbsoluteExpression(Major))
3188     return TokError("invalid major version");
3189 
3190   if (getLexer().isNot(AsmToken::Comma))
3191     return TokError("minor version number required, comma expected");
3192   Lex();
3193 
3194   if (ParseAsAbsoluteExpression(Minor))
3195     return TokError("invalid minor version");
3196 
3197   return false;
3198 }
3199 
3200 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3201   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3202     return TokError("directive only supported for amdgcn architecture");
3203 
3204   std::string Target;
3205 
3206   SMLoc TargetStart = getTok().getLoc();
3207   if (getParser().parseEscapedString(Target))
3208     return true;
3209   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3210 
3211   std::string ExpectedTarget;
3212   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3213   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3214 
3215   if (Target != ExpectedTargetOS.str())
3216     return getParser().Error(TargetRange.Start, "target must match options",
3217                              TargetRange);
3218 
3219   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3220   return false;
3221 }
3222 
3223 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3224   return getParser().Error(Range.Start, "value out of range", Range);
3225 }
3226 
3227 bool AMDGPUAsmParser::calculateGPRBlocks(
3228     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3229     bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
3230     unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
3231     unsigned &SGPRBlocks) {
3232   // TODO(scott.linder): These calculations are duplicated from
3233   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3234   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3235 
3236   unsigned NumVGPRs = NextFreeVGPR;
3237   unsigned NumSGPRs = NextFreeSGPR;
3238 
3239   if (Version.Major >= 10)
3240     NumSGPRs = 0;
3241   else {
3242     unsigned MaxAddressableNumSGPRs =
3243         IsaInfo::getAddressableNumSGPRs(&getSTI());
3244 
3245     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3246         NumSGPRs > MaxAddressableNumSGPRs)
3247       return OutOfRangeError(SGPRRange);
3248 
3249     NumSGPRs +=
3250         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3251 
3252     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3253         NumSGPRs > MaxAddressableNumSGPRs)
3254       return OutOfRangeError(SGPRRange);
3255 
3256     if (Features.test(FeatureSGPRInitBug))
3257       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3258   }
3259 
3260   VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
3261   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3262 
3263   return false;
3264 }
3265 
3266 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3267   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3268     return TokError("directive only supported for amdgcn architecture");
3269 
3270   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3271     return TokError("directive only supported for amdhsa OS");
3272 
3273   StringRef KernelName;
3274   if (getParser().parseIdentifier(KernelName))
3275     return true;
3276 
3277   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3278 
3279   StringSet<> Seen;
3280 
3281   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3282 
3283   SMRange VGPRRange;
3284   uint64_t NextFreeVGPR = 0;
3285   SMRange SGPRRange;
3286   uint64_t NextFreeSGPR = 0;
3287   unsigned UserSGPRCount = 0;
3288   bool ReserveVCC = true;
3289   bool ReserveFlatScr = true;
3290   bool ReserveXNACK = hasXNACK();
3291 
3292   while (true) {
3293     while (getLexer().is(AsmToken::EndOfStatement))
3294       Lex();
3295 
3296     if (getLexer().isNot(AsmToken::Identifier))
3297       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3298 
3299     StringRef ID = getTok().getIdentifier();
3300     SMRange IDRange = getTok().getLocRange();
3301     Lex();
3302 
3303     if (ID == ".end_amdhsa_kernel")
3304       break;
3305 
3306     if (Seen.find(ID) != Seen.end())
3307       return TokError(".amdhsa_ directives cannot be repeated");
3308     Seen.insert(ID);
3309 
3310     SMLoc ValStart = getTok().getLoc();
3311     int64_t IVal;
3312     if (getParser().parseAbsoluteExpression(IVal))
3313       return true;
3314     SMLoc ValEnd = getTok().getLoc();
3315     SMRange ValRange = SMRange(ValStart, ValEnd);
3316 
3317     if (IVal < 0)
3318       return OutOfRangeError(ValRange);
3319 
3320     uint64_t Val = IVal;
3321 
3322 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3323   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3324     return OutOfRangeError(RANGE);                                             \
3325   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3326 
3327     if (ID == ".amdhsa_group_segment_fixed_size") {
3328       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3329         return OutOfRangeError(ValRange);
3330       KD.group_segment_fixed_size = Val;
3331     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3332       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3333         return OutOfRangeError(ValRange);
3334       KD.private_segment_fixed_size = Val;
3335     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3336       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3337                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3338                        Val, ValRange);
3339       UserSGPRCount += 4;
3340     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3341       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3342                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3343                        ValRange);
3344       UserSGPRCount += 2;
3345     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3346       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3347                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3348                        ValRange);
3349       UserSGPRCount += 2;
3350     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3351       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3352                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3353                        Val, ValRange);
3354       UserSGPRCount += 2;
3355     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3356       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3357                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3358                        ValRange);
3359       UserSGPRCount += 2;
3360     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3361       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3362                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3363                        ValRange);
3364       UserSGPRCount += 2;
3365     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3366       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3367                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3368                        Val, ValRange);
3369       UserSGPRCount += 1;
3370     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3371       PARSE_BITS_ENTRY(
3372           KD.compute_pgm_rsrc2,
3373           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3374           ValRange);
3375     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3376       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3377                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3378                        ValRange);
3379     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3380       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3381                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3382                        ValRange);
3383     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3384       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3385                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3386                        ValRange);
3387     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3388       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3389                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3390                        ValRange);
3391     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3392       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3393                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3394                        ValRange);
3395     } else if (ID == ".amdhsa_next_free_vgpr") {
3396       VGPRRange = ValRange;
3397       NextFreeVGPR = Val;
3398     } else if (ID == ".amdhsa_next_free_sgpr") {
3399       SGPRRange = ValRange;
3400       NextFreeSGPR = Val;
3401     } else if (ID == ".amdhsa_reserve_vcc") {
3402       if (!isUInt<1>(Val))
3403         return OutOfRangeError(ValRange);
3404       ReserveVCC = Val;
3405     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3406       if (IVersion.Major < 7)
3407         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3408                                  IDRange);
3409       if (!isUInt<1>(Val))
3410         return OutOfRangeError(ValRange);
3411       ReserveFlatScr = Val;
3412     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3413       if (IVersion.Major < 8)
3414         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3415                                  IDRange);
3416       if (!isUInt<1>(Val))
3417         return OutOfRangeError(ValRange);
3418       ReserveXNACK = Val;
3419     } else if (ID == ".amdhsa_float_round_mode_32") {
3420       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3421                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3422     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3423       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3424                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3425     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3426       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3427                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3428     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3429       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3430                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3431                        ValRange);
3432     } else if (ID == ".amdhsa_dx10_clamp") {
3433       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3434                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3435     } else if (ID == ".amdhsa_ieee_mode") {
3436       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3437                        Val, ValRange);
3438     } else if (ID == ".amdhsa_fp16_overflow") {
3439       if (IVersion.Major < 9)
3440         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3441                                  IDRange);
3442       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3443                        ValRange);
3444     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3445       if (IVersion.Major < 10)
3446         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3447                                  IDRange);
3448       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3449                        ValRange);
3450     } else if (ID == ".amdhsa_memory_ordered") {
3451       if (IVersion.Major < 10)
3452         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3453                                  IDRange);
3454       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3455                        ValRange);
3456     } else if (ID == ".amdhsa_forward_progress") {
3457       if (IVersion.Major < 10)
3458         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3459                                  IDRange);
3460       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3461                        ValRange);
3462     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3463       PARSE_BITS_ENTRY(
3464           KD.compute_pgm_rsrc2,
3465           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3466           ValRange);
3467     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3468       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3469                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3470                        Val, ValRange);
3471     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3472       PARSE_BITS_ENTRY(
3473           KD.compute_pgm_rsrc2,
3474           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3475           ValRange);
3476     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3477       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3478                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3479                        Val, ValRange);
3480     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3481       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3482                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3483                        Val, ValRange);
3484     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3485       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3486                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3487                        Val, ValRange);
3488     } else if (ID == ".amdhsa_exception_int_div_zero") {
3489       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3490                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3491                        Val, ValRange);
3492     } else {
3493       return getParser().Error(IDRange.Start,
3494                                "unknown .amdhsa_kernel directive", IDRange);
3495     }
3496 
3497 #undef PARSE_BITS_ENTRY
3498   }
3499 
3500   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3501     return TokError(".amdhsa_next_free_vgpr directive is required");
3502 
3503   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3504     return TokError(".amdhsa_next_free_sgpr directive is required");
3505 
3506   unsigned VGPRBlocks;
3507   unsigned SGPRBlocks;
3508   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3509                          ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
3510                          SGPRRange, VGPRBlocks, SGPRBlocks))
3511     return true;
3512 
3513   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3514           VGPRBlocks))
3515     return OutOfRangeError(VGPRRange);
3516   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3517                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3518 
3519   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3520           SGPRBlocks))
3521     return OutOfRangeError(SGPRRange);
3522   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3523                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3524                   SGPRBlocks);
3525 
3526   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3527     return TokError("too many user SGPRs enabled");
3528   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3529                   UserSGPRCount);
3530 
3531   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3532       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3533       ReserveFlatScr, ReserveXNACK);
3534   return false;
3535 }
3536 
3537 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3538   uint32_t Major;
3539   uint32_t Minor;
3540 
3541   if (ParseDirectiveMajorMinor(Major, Minor))
3542     return true;
3543 
3544   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3545   return false;
3546 }
3547 
3548 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3549   uint32_t Major;
3550   uint32_t Minor;
3551   uint32_t Stepping;
3552   StringRef VendorName;
3553   StringRef ArchName;
3554 
3555   // If this directive has no arguments, then use the ISA version for the
3556   // targeted GPU.
3557   if (getLexer().is(AsmToken::EndOfStatement)) {
3558     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3559     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3560                                                       ISA.Stepping,
3561                                                       "AMD", "AMDGPU");
3562     return false;
3563   }
3564 
3565   if (ParseDirectiveMajorMinor(Major, Minor))
3566     return true;
3567 
3568   if (getLexer().isNot(AsmToken::Comma))
3569     return TokError("stepping version number required, comma expected");
3570   Lex();
3571 
3572   if (ParseAsAbsoluteExpression(Stepping))
3573     return TokError("invalid stepping version");
3574 
3575   if (getLexer().isNot(AsmToken::Comma))
3576     return TokError("vendor name required, comma expected");
3577   Lex();
3578 
3579   if (getLexer().isNot(AsmToken::String))
3580     return TokError("invalid vendor name");
3581 
3582   VendorName = getLexer().getTok().getStringContents();
3583   Lex();
3584 
3585   if (getLexer().isNot(AsmToken::Comma))
3586     return TokError("arch name required, comma expected");
3587   Lex();
3588 
3589   if (getLexer().isNot(AsmToken::String))
3590     return TokError("invalid arch name");
3591 
3592   ArchName = getLexer().getTok().getStringContents();
3593   Lex();
3594 
3595   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3596                                                     VendorName, ArchName);
3597   return false;
3598 }
3599 
3600 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3601                                                amd_kernel_code_t &Header) {
3602   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3603   // assembly for backwards compatibility.
3604   if (ID == "max_scratch_backing_memory_byte_size") {
3605     Parser.eatToEndOfStatement();
3606     return false;
3607   }
3608 
3609   SmallString<40> ErrStr;
3610   raw_svector_ostream Err(ErrStr);
3611   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3612     return TokError(Err.str());
3613   }
3614   Lex();
3615 
3616   if (ID == "enable_wgp_mode") {
3617     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
3618       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3619   }
3620 
3621   if (ID == "enable_mem_ordered") {
3622     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
3623       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3624   }
3625 
3626   if (ID == "enable_fwd_progress") {
3627     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
3628       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3629   }
3630 
3631   return false;
3632 }
3633 
3634 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3635   amd_kernel_code_t Header;
3636   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3637 
3638   while (true) {
3639     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3640     // will set the current token to EndOfStatement.
3641     while(getLexer().is(AsmToken::EndOfStatement))
3642       Lex();
3643 
3644     if (getLexer().isNot(AsmToken::Identifier))
3645       return TokError("expected value identifier or .end_amd_kernel_code_t");
3646 
3647     StringRef ID = getLexer().getTok().getIdentifier();
3648     Lex();
3649 
3650     if (ID == ".end_amd_kernel_code_t")
3651       break;
3652 
3653     if (ParseAMDKernelCodeTValue(ID, Header))
3654       return true;
3655   }
3656 
3657   getTargetStreamer().EmitAMDKernelCodeT(Header);
3658 
3659   return false;
3660 }
3661 
3662 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3663   if (getLexer().isNot(AsmToken::Identifier))
3664     return TokError("expected symbol name");
3665 
3666   StringRef KernelName = Parser.getTok().getString();
3667 
3668   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3669                                            ELF::STT_AMDGPU_HSA_KERNEL);
3670   Lex();
3671   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3672     KernelScope.initialize(getContext());
3673   return false;
3674 }
3675 
3676 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3677   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3678     return Error(getParser().getTok().getLoc(),
3679                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3680                  "architectures");
3681   }
3682 
3683   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3684 
3685   std::string ISAVersionStringFromSTI;
3686   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3687   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3688 
3689   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3690     return Error(getParser().getTok().getLoc(),
3691                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3692                  "arguments specified through the command line");
3693   }
3694 
3695   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3696   Lex();
3697 
3698   return false;
3699 }
3700 
3701 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3702   const char *AssemblerDirectiveBegin;
3703   const char *AssemblerDirectiveEnd;
3704   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3705       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3706           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3707                             HSAMD::V3::AssemblerDirectiveEnd)
3708           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3709                             HSAMD::AssemblerDirectiveEnd);
3710 
3711   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3712     return Error(getParser().getTok().getLoc(),
3713                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
3714                  "not available on non-amdhsa OSes")).str());
3715   }
3716 
3717   std::string HSAMetadataString;
3718   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
3719                           HSAMetadataString))
3720     return true;
3721 
3722   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3723     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3724       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3725   } else {
3726     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3727       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3728   }
3729 
3730   return false;
3731 }
3732 
3733 /// Common code to parse out a block of text (typically YAML) between start and
3734 /// end directives.
3735 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
3736                                           const char *AssemblerDirectiveEnd,
3737                                           std::string &CollectString) {
3738 
3739   raw_string_ostream CollectStream(CollectString);
3740 
3741   getLexer().setSkipSpace(false);
3742 
3743   bool FoundEnd = false;
3744   while (!getLexer().is(AsmToken::Eof)) {
3745     while (getLexer().is(AsmToken::Space)) {
3746       CollectStream << getLexer().getTok().getString();
3747       Lex();
3748     }
3749 
3750     if (getLexer().is(AsmToken::Identifier)) {
3751       StringRef ID = getLexer().getTok().getIdentifier();
3752       if (ID == AssemblerDirectiveEnd) {
3753         Lex();
3754         FoundEnd = true;
3755         break;
3756       }
3757     }
3758 
3759     CollectStream << Parser.parseStringToEndOfStatement()
3760                   << getContext().getAsmInfo()->getSeparatorString();
3761 
3762     Parser.eatToEndOfStatement();
3763   }
3764 
3765   getLexer().setSkipSpace(true);
3766 
3767   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3768     return TokError(Twine("expected directive ") +
3769                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
3770   }
3771 
3772   CollectStream.flush();
3773   return false;
3774 }
3775 
3776 /// Parse the assembler directive for new MsgPack-format PAL metadata.
3777 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
3778   std::string String;
3779   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
3780                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
3781     return true;
3782 
3783   auto PALMetadata = getTargetStreamer().getPALMetadata();
3784   if (!PALMetadata->setFromString(String))
3785     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
3786   return false;
3787 }
3788 
3789 /// Parse the assembler directive for old linear-format PAL metadata.
3790 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3791   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3792     return Error(getParser().getTok().getLoc(),
3793                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3794                  "not available on non-amdpal OSes")).str());
3795   }
3796 
3797   auto PALMetadata = getTargetStreamer().getPALMetadata();
3798   PALMetadata->setLegacy();
3799   for (;;) {
3800     uint32_t Key, Value;
3801     if (ParseAsAbsoluteExpression(Key)) {
3802       return TokError(Twine("invalid value in ") +
3803                       Twine(PALMD::AssemblerDirective));
3804     }
3805     if (getLexer().isNot(AsmToken::Comma)) {
3806       return TokError(Twine("expected an even number of values in ") +
3807                       Twine(PALMD::AssemblerDirective));
3808     }
3809     Lex();
3810     if (ParseAsAbsoluteExpression(Value)) {
3811       return TokError(Twine("invalid value in ") +
3812                       Twine(PALMD::AssemblerDirective));
3813     }
3814     PALMetadata->setRegister(Key, Value);
3815     if (getLexer().isNot(AsmToken::Comma))
3816       break;
3817     Lex();
3818   }
3819   return false;
3820 }
3821 
3822 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3823   StringRef IDVal = DirectiveID.getString();
3824 
3825   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3826     if (IDVal == ".amdgcn_target")
3827       return ParseDirectiveAMDGCNTarget();
3828 
3829     if (IDVal == ".amdhsa_kernel")
3830       return ParseDirectiveAMDHSAKernel();
3831 
3832     // TODO: Restructure/combine with PAL metadata directive.
3833     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3834       return ParseDirectiveHSAMetadata();
3835   } else {
3836     if (IDVal == ".hsa_code_object_version")
3837       return ParseDirectiveHSACodeObjectVersion();
3838 
3839     if (IDVal == ".hsa_code_object_isa")
3840       return ParseDirectiveHSACodeObjectISA();
3841 
3842     if (IDVal == ".amd_kernel_code_t")
3843       return ParseDirectiveAMDKernelCodeT();
3844 
3845     if (IDVal == ".amdgpu_hsa_kernel")
3846       return ParseDirectiveAMDGPUHsaKernel();
3847 
3848     if (IDVal == ".amd_amdgpu_isa")
3849       return ParseDirectiveISAVersion();
3850 
3851     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3852       return ParseDirectiveHSAMetadata();
3853   }
3854 
3855   if (IDVal == PALMD::AssemblerDirectiveBegin)
3856     return ParseDirectivePALMetadataBegin();
3857 
3858   if (IDVal == PALMD::AssemblerDirective)
3859     return ParseDirectivePALMetadata();
3860 
3861   return true;
3862 }
3863 
3864 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
3865                                            unsigned RegNo) const {
3866 
3867   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
3868        R.isValid(); ++R) {
3869     if (*R == RegNo)
3870       return isGFX9() || isGFX10();
3871   }
3872 
3873   // GFX10 has 2 more SGPRs 104 and 105.
3874   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
3875        R.isValid(); ++R) {
3876     if (*R == RegNo)
3877       return hasSGPR104_SGPR105();
3878   }
3879 
3880   switch (RegNo) {
3881   case AMDGPU::TBA:
3882   case AMDGPU::TBA_LO:
3883   case AMDGPU::TBA_HI:
3884   case AMDGPU::TMA:
3885   case AMDGPU::TMA_LO:
3886   case AMDGPU::TMA_HI:
3887     return !isGFX9() && !isGFX10();
3888   case AMDGPU::XNACK_MASK:
3889   case AMDGPU::XNACK_MASK_LO:
3890   case AMDGPU::XNACK_MASK_HI:
3891     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
3892   case AMDGPU::SGPR_NULL:
3893     return isGFX10();
3894   default:
3895     break;
3896   }
3897 
3898   if (isInlineValue(RegNo))
3899     return !isCI() && !isSI() && !isVI();
3900 
3901   if (isCI())
3902     return true;
3903 
3904   if (isSI() || isGFX10()) {
3905     // No flat_scr on SI.
3906     // On GFX10 flat scratch is not a valid register operand and can only be
3907     // accessed with s_setreg/s_getreg.
3908     switch (RegNo) {
3909     case AMDGPU::FLAT_SCR:
3910     case AMDGPU::FLAT_SCR_LO:
3911     case AMDGPU::FLAT_SCR_HI:
3912       return false;
3913     default:
3914       return true;
3915     }
3916   }
3917 
3918   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
3919   // SI/CI have.
3920   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
3921        R.isValid(); ++R) {
3922     if (*R == RegNo)
3923       return hasSGPR102_SGPR103();
3924   }
3925 
3926   return true;
3927 }
3928 
3929 OperandMatchResultTy
3930 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
3931                               OperandMode Mode) {
3932   // Try to parse with a custom parser
3933   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
3934 
3935   // If we successfully parsed the operand or if there as an error parsing,
3936   // we are done.
3937   //
3938   // If we are parsing after we reach EndOfStatement then this means we
3939   // are appending default values to the Operands list.  This is only done
3940   // by custom parser, so we shouldn't continue on to the generic parsing.
3941   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
3942       getLexer().is(AsmToken::EndOfStatement))
3943     return ResTy;
3944 
3945   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
3946     unsigned Prefix = Operands.size();
3947     SMLoc LBraceLoc = getTok().getLoc();
3948     Parser.Lex(); // eat the '['
3949 
3950     for (;;) {
3951       ResTy = parseReg(Operands);
3952       if (ResTy != MatchOperand_Success)
3953         return ResTy;
3954 
3955       if (getLexer().is(AsmToken::RBrac))
3956         break;
3957 
3958       if (getLexer().isNot(AsmToken::Comma))
3959         return MatchOperand_ParseFail;
3960       Parser.Lex();
3961     }
3962 
3963     if (Operands.size() - Prefix > 1) {
3964       Operands.insert(Operands.begin() + Prefix,
3965                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
3966       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
3967                                                     getTok().getLoc()));
3968     }
3969 
3970     Parser.Lex(); // eat the ']'
3971     return MatchOperand_Success;
3972   }
3973 
3974   return parseRegOrImm(Operands);
3975 }
3976 
3977 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
3978   // Clear any forced encodings from the previous instruction.
3979   setForcedEncodingSize(0);
3980   setForcedDPP(false);
3981   setForcedSDWA(false);
3982 
3983   if (Name.endswith("_e64")) {
3984     setForcedEncodingSize(64);
3985     return Name.substr(0, Name.size() - 4);
3986   } else if (Name.endswith("_e32")) {
3987     setForcedEncodingSize(32);
3988     return Name.substr(0, Name.size() - 4);
3989   } else if (Name.endswith("_dpp")) {
3990     setForcedDPP(true);
3991     return Name.substr(0, Name.size() - 4);
3992   } else if (Name.endswith("_sdwa")) {
3993     setForcedSDWA(true);
3994     return Name.substr(0, Name.size() - 5);
3995   }
3996   return Name;
3997 }
3998 
3999 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4000                                        StringRef Name,
4001                                        SMLoc NameLoc, OperandVector &Operands) {
4002   // Add the instruction mnemonic
4003   Name = parseMnemonicSuffix(Name);
4004   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4005 
4006   bool IsMIMG = Name.startswith("image_");
4007 
4008   while (!getLexer().is(AsmToken::EndOfStatement)) {
4009     OperandMode Mode = OperandMode_Default;
4010     if (IsMIMG && isGFX10() && Operands.size() == 2)
4011       Mode = OperandMode_NSA;
4012     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4013 
4014     // Eat the comma or space if there is one.
4015     if (getLexer().is(AsmToken::Comma))
4016       Parser.Lex();
4017 
4018     switch (Res) {
4019       case MatchOperand_Success: break;
4020       case MatchOperand_ParseFail:
4021         // FIXME: use real operand location rather than the current location.
4022         Error(getLexer().getLoc(), "failed parsing operand.");
4023         while (!getLexer().is(AsmToken::EndOfStatement)) {
4024           Parser.Lex();
4025         }
4026         return true;
4027       case MatchOperand_NoMatch:
4028         // FIXME: use real operand location rather than the current location.
4029         Error(getLexer().getLoc(), "not a valid operand.");
4030         while (!getLexer().is(AsmToken::EndOfStatement)) {
4031           Parser.Lex();
4032         }
4033         return true;
4034     }
4035   }
4036 
4037   return false;
4038 }
4039 
4040 //===----------------------------------------------------------------------===//
4041 // Utility functions
4042 //===----------------------------------------------------------------------===//
4043 
4044 OperandMatchResultTy
4045 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4046 
4047   if (!trySkipId(Prefix, AsmToken::Colon))
4048     return MatchOperand_NoMatch;
4049 
4050   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4051 }
4052 
4053 OperandMatchResultTy
4054 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4055                                     AMDGPUOperand::ImmTy ImmTy,
4056                                     bool (*ConvertResult)(int64_t&)) {
4057   SMLoc S = getLoc();
4058   int64_t Value = 0;
4059 
4060   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4061   if (Res != MatchOperand_Success)
4062     return Res;
4063 
4064   if (ConvertResult && !ConvertResult(Value)) {
4065     Error(S, "invalid " + StringRef(Prefix) + " value.");
4066   }
4067 
4068   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4069   return MatchOperand_Success;
4070 }
4071 
4072 OperandMatchResultTy
4073 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4074                                              OperandVector &Operands,
4075                                              AMDGPUOperand::ImmTy ImmTy,
4076                                              bool (*ConvertResult)(int64_t&)) {
4077   SMLoc S = getLoc();
4078   if (!trySkipId(Prefix, AsmToken::Colon))
4079     return MatchOperand_NoMatch;
4080 
4081   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4082     return MatchOperand_ParseFail;
4083 
4084   unsigned Val = 0;
4085   const unsigned MaxSize = 4;
4086 
4087   // FIXME: How to verify the number of elements matches the number of src
4088   // operands?
4089   for (int I = 0; ; ++I) {
4090     int64_t Op;
4091     SMLoc Loc = getLoc();
4092     if (!parseExpr(Op))
4093       return MatchOperand_ParseFail;
4094 
4095     if (Op != 0 && Op != 1) {
4096       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4097       return MatchOperand_ParseFail;
4098     }
4099 
4100     Val |= (Op << I);
4101 
4102     if (trySkipToken(AsmToken::RBrac))
4103       break;
4104 
4105     if (I + 1 == MaxSize) {
4106       Error(getLoc(), "expected a closing square bracket");
4107       return MatchOperand_ParseFail;
4108     }
4109 
4110     if (!skipToken(AsmToken::Comma, "expected a comma"))
4111       return MatchOperand_ParseFail;
4112   }
4113 
4114   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4115   return MatchOperand_Success;
4116 }
4117 
4118 OperandMatchResultTy
4119 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4120                                AMDGPUOperand::ImmTy ImmTy) {
4121   int64_t Bit = 0;
4122   SMLoc S = Parser.getTok().getLoc();
4123 
4124   // We are at the end of the statement, and this is a default argument, so
4125   // use a default value.
4126   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4127     switch(getLexer().getKind()) {
4128       case AsmToken::Identifier: {
4129         StringRef Tok = Parser.getTok().getString();
4130         if (Tok == Name) {
4131           if (Tok == "r128" && isGFX9())
4132             Error(S, "r128 modifier is not supported on this GPU");
4133           if (Tok == "a16" && !isGFX9())
4134             Error(S, "a16 modifier is not supported on this GPU");
4135           Bit = 1;
4136           Parser.Lex();
4137         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4138           Bit = 0;
4139           Parser.Lex();
4140         } else {
4141           return MatchOperand_NoMatch;
4142         }
4143         break;
4144       }
4145       default:
4146         return MatchOperand_NoMatch;
4147     }
4148   }
4149 
4150   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4151     return MatchOperand_ParseFail;
4152 
4153   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4154   return MatchOperand_Success;
4155 }
4156 
4157 static void addOptionalImmOperand(
4158   MCInst& Inst, const OperandVector& Operands,
4159   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4160   AMDGPUOperand::ImmTy ImmT,
4161   int64_t Default = 0) {
4162   auto i = OptionalIdx.find(ImmT);
4163   if (i != OptionalIdx.end()) {
4164     unsigned Idx = i->second;
4165     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4166   } else {
4167     Inst.addOperand(MCOperand::createImm(Default));
4168   }
4169 }
4170 
4171 OperandMatchResultTy
4172 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4173   if (getLexer().isNot(AsmToken::Identifier)) {
4174     return MatchOperand_NoMatch;
4175   }
4176   StringRef Tok = Parser.getTok().getString();
4177   if (Tok != Prefix) {
4178     return MatchOperand_NoMatch;
4179   }
4180 
4181   Parser.Lex();
4182   if (getLexer().isNot(AsmToken::Colon)) {
4183     return MatchOperand_ParseFail;
4184   }
4185 
4186   Parser.Lex();
4187   if (getLexer().isNot(AsmToken::Identifier)) {
4188     return MatchOperand_ParseFail;
4189   }
4190 
4191   Value = Parser.getTok().getString();
4192   return MatchOperand_Success;
4193 }
4194 
4195 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4196 // values to live in a joint format operand in the MCInst encoding.
4197 OperandMatchResultTy
4198 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4199   SMLoc S = Parser.getTok().getLoc();
4200   int64_t Dfmt = 0, Nfmt = 0;
4201   // dfmt and nfmt can appear in either order, and each is optional.
4202   bool GotDfmt = false, GotNfmt = false;
4203   while (!GotDfmt || !GotNfmt) {
4204     if (!GotDfmt) {
4205       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4206       if (Res != MatchOperand_NoMatch) {
4207         if (Res != MatchOperand_Success)
4208           return Res;
4209         if (Dfmt >= 16) {
4210           Error(Parser.getTok().getLoc(), "out of range dfmt");
4211           return MatchOperand_ParseFail;
4212         }
4213         GotDfmt = true;
4214         Parser.Lex();
4215         continue;
4216       }
4217     }
4218     if (!GotNfmt) {
4219       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4220       if (Res != MatchOperand_NoMatch) {
4221         if (Res != MatchOperand_Success)
4222           return Res;
4223         if (Nfmt >= 8) {
4224           Error(Parser.getTok().getLoc(), "out of range nfmt");
4225           return MatchOperand_ParseFail;
4226         }
4227         GotNfmt = true;
4228         Parser.Lex();
4229         continue;
4230       }
4231     }
4232     break;
4233   }
4234   if (!GotDfmt && !GotNfmt)
4235     return MatchOperand_NoMatch;
4236   auto Format = Dfmt | Nfmt << 4;
4237   Operands.push_back(
4238       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4239   return MatchOperand_Success;
4240 }
4241 
4242 //===----------------------------------------------------------------------===//
4243 // ds
4244 //===----------------------------------------------------------------------===//
4245 
4246 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4247                                     const OperandVector &Operands) {
4248   OptionalImmIndexMap OptionalIdx;
4249 
4250   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4251     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4252 
4253     // Add the register arguments
4254     if (Op.isReg()) {
4255       Op.addRegOperands(Inst, 1);
4256       continue;
4257     }
4258 
4259     // Handle optional arguments
4260     OptionalIdx[Op.getImmTy()] = i;
4261   }
4262 
4263   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4264   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4265   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4266 
4267   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4268 }
4269 
4270 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4271                                 bool IsGdsHardcoded) {
4272   OptionalImmIndexMap OptionalIdx;
4273 
4274   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4275     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4276 
4277     // Add the register arguments
4278     if (Op.isReg()) {
4279       Op.addRegOperands(Inst, 1);
4280       continue;
4281     }
4282 
4283     if (Op.isToken() && Op.getToken() == "gds") {
4284       IsGdsHardcoded = true;
4285       continue;
4286     }
4287 
4288     // Handle optional arguments
4289     OptionalIdx[Op.getImmTy()] = i;
4290   }
4291 
4292   AMDGPUOperand::ImmTy OffsetType =
4293     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4294      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4295      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4296                                                       AMDGPUOperand::ImmTyOffset;
4297 
4298   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4299 
4300   if (!IsGdsHardcoded) {
4301     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4302   }
4303   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4304 }
4305 
4306 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4307   OptionalImmIndexMap OptionalIdx;
4308 
4309   unsigned OperandIdx[4];
4310   unsigned EnMask = 0;
4311   int SrcIdx = 0;
4312 
4313   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4314     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4315 
4316     // Add the register arguments
4317     if (Op.isReg()) {
4318       assert(SrcIdx < 4);
4319       OperandIdx[SrcIdx] = Inst.size();
4320       Op.addRegOperands(Inst, 1);
4321       ++SrcIdx;
4322       continue;
4323     }
4324 
4325     if (Op.isOff()) {
4326       assert(SrcIdx < 4);
4327       OperandIdx[SrcIdx] = Inst.size();
4328       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4329       ++SrcIdx;
4330       continue;
4331     }
4332 
4333     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4334       Op.addImmOperands(Inst, 1);
4335       continue;
4336     }
4337 
4338     if (Op.isToken() && Op.getToken() == "done")
4339       continue;
4340 
4341     // Handle optional arguments
4342     OptionalIdx[Op.getImmTy()] = i;
4343   }
4344 
4345   assert(SrcIdx == 4);
4346 
4347   bool Compr = false;
4348   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4349     Compr = true;
4350     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4351     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4352     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4353   }
4354 
4355   for (auto i = 0; i < SrcIdx; ++i) {
4356     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4357       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4358     }
4359   }
4360 
4361   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4362   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4363 
4364   Inst.addOperand(MCOperand::createImm(EnMask));
4365 }
4366 
4367 //===----------------------------------------------------------------------===//
4368 // s_waitcnt
4369 //===----------------------------------------------------------------------===//
4370 
4371 static bool
4372 encodeCnt(
4373   const AMDGPU::IsaVersion ISA,
4374   int64_t &IntVal,
4375   int64_t CntVal,
4376   bool Saturate,
4377   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4378   unsigned (*decode)(const IsaVersion &Version, unsigned))
4379 {
4380   bool Failed = false;
4381 
4382   IntVal = encode(ISA, IntVal, CntVal);
4383   if (CntVal != decode(ISA, IntVal)) {
4384     if (Saturate) {
4385       IntVal = encode(ISA, IntVal, -1);
4386     } else {
4387       Failed = true;
4388     }
4389   }
4390   return Failed;
4391 }
4392 
4393 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4394   StringRef CntName = Parser.getTok().getString();
4395   int64_t CntVal;
4396 
4397   Parser.Lex();
4398   if (getLexer().isNot(AsmToken::LParen))
4399     return true;
4400 
4401   Parser.Lex();
4402   if (getLexer().isNot(AsmToken::Integer))
4403     return true;
4404 
4405   SMLoc ValLoc = Parser.getTok().getLoc();
4406   if (getParser().parseAbsoluteExpression(CntVal))
4407     return true;
4408 
4409   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4410 
4411   bool Failed = true;
4412   bool Sat = CntName.endswith("_sat");
4413 
4414   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4415     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4416   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4417     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4418   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4419     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4420   }
4421 
4422   if (Failed) {
4423     Error(ValLoc, "too large value for " + CntName);
4424     return true;
4425   }
4426 
4427   if (getLexer().isNot(AsmToken::RParen)) {
4428     return true;
4429   }
4430 
4431   Parser.Lex();
4432   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
4433     const AsmToken NextToken = getLexer().peekTok();
4434     if (NextToken.is(AsmToken::Identifier)) {
4435       Parser.Lex();
4436     }
4437   }
4438 
4439   return false;
4440 }
4441 
4442 OperandMatchResultTy
4443 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4444   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4445   int64_t Waitcnt = getWaitcntBitMask(ISA);
4446   SMLoc S = Parser.getTok().getLoc();
4447 
4448   switch(getLexer().getKind()) {
4449     default: return MatchOperand_ParseFail;
4450     case AsmToken::Integer:
4451       // The operand can be an integer value.
4452       if (getParser().parseAbsoluteExpression(Waitcnt))
4453         return MatchOperand_ParseFail;
4454       break;
4455 
4456     case AsmToken::Identifier:
4457       do {
4458         if (parseCnt(Waitcnt))
4459           return MatchOperand_ParseFail;
4460       } while(getLexer().isNot(AsmToken::EndOfStatement));
4461       break;
4462   }
4463   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4464   return MatchOperand_Success;
4465 }
4466 
4467 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
4468                                           int64_t &Width) {
4469   using namespace llvm::AMDGPU::Hwreg;
4470 
4471   if (Parser.getTok().getString() != "hwreg")
4472     return true;
4473   Parser.Lex();
4474 
4475   if (getLexer().isNot(AsmToken::LParen))
4476     return true;
4477   Parser.Lex();
4478 
4479   if (getLexer().is(AsmToken::Identifier)) {
4480     HwReg.IsSymbolic = true;
4481     HwReg.Id = ID_UNKNOWN_;
4482     const StringRef tok = Parser.getTok().getString();
4483     int Last = ID_SYMBOLIC_LAST_;
4484     if (isSI() || isCI() || isVI())
4485       Last = ID_SYMBOLIC_FIRST_GFX9_;
4486     else if (isGFX9())
4487       Last = ID_SYMBOLIC_FIRST_GFX10_;
4488     for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
4489       if (tok == IdSymbolic[i]) {
4490         HwReg.Id = i;
4491         break;
4492       }
4493     }
4494     Parser.Lex();
4495   } else {
4496     HwReg.IsSymbolic = false;
4497     if (getLexer().isNot(AsmToken::Integer))
4498       return true;
4499     if (getParser().parseAbsoluteExpression(HwReg.Id))
4500       return true;
4501   }
4502 
4503   if (getLexer().is(AsmToken::RParen)) {
4504     Parser.Lex();
4505     return false;
4506   }
4507 
4508   // optional params
4509   if (getLexer().isNot(AsmToken::Comma))
4510     return true;
4511   Parser.Lex();
4512 
4513   if (getLexer().isNot(AsmToken::Integer))
4514     return true;
4515   if (getParser().parseAbsoluteExpression(Offset))
4516     return true;
4517 
4518   if (getLexer().isNot(AsmToken::Comma))
4519     return true;
4520   Parser.Lex();
4521 
4522   if (getLexer().isNot(AsmToken::Integer))
4523     return true;
4524   if (getParser().parseAbsoluteExpression(Width))
4525     return true;
4526 
4527   if (getLexer().isNot(AsmToken::RParen))
4528     return true;
4529   Parser.Lex();
4530 
4531   return false;
4532 }
4533 
4534 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4535   using namespace llvm::AMDGPU::Hwreg;
4536 
4537   int64_t Imm16Val = 0;
4538   SMLoc S = Parser.getTok().getLoc();
4539 
4540   switch(getLexer().getKind()) {
4541     default: return MatchOperand_NoMatch;
4542     case AsmToken::Integer:
4543       // The operand can be an integer value.
4544       if (getParser().parseAbsoluteExpression(Imm16Val))
4545         return MatchOperand_NoMatch;
4546       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4547         Error(S, "invalid immediate: only 16-bit values are legal");
4548         // Do not return error code, but create an imm operand anyway and proceed
4549         // to the next operand, if any. That avoids unneccessary error messages.
4550       }
4551       break;
4552 
4553     case AsmToken::Identifier: {
4554         OperandInfoTy HwReg(ID_UNKNOWN_);
4555         int64_t Offset = OFFSET_DEFAULT_;
4556         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
4557         if (parseHwregConstruct(HwReg, Offset, Width))
4558           return MatchOperand_ParseFail;
4559         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
4560           if (HwReg.IsSymbolic)
4561             Error(S, "invalid symbolic name of hardware register");
4562           else
4563             Error(S, "invalid code of hardware register: only 6-bit values are legal");
4564         }
4565         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
4566           Error(S, "invalid bit offset: only 5-bit values are legal");
4567         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
4568           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
4569         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
4570       }
4571       break;
4572   }
4573   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
4574   return MatchOperand_Success;
4575 }
4576 
4577 bool AMDGPUOperand::isSWaitCnt() const {
4578   return isImm();
4579 }
4580 
4581 bool AMDGPUOperand::isHwreg() const {
4582   return isImmTy(ImmTyHwreg);
4583 }
4584 
4585 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
4586   using namespace llvm::AMDGPU::SendMsg;
4587 
4588   if (Parser.getTok().getString() != "sendmsg")
4589     return true;
4590   Parser.Lex();
4591 
4592   if (getLexer().isNot(AsmToken::LParen))
4593     return true;
4594   Parser.Lex();
4595 
4596   if (getLexer().is(AsmToken::Identifier)) {
4597     Msg.IsSymbolic = true;
4598     Msg.Id = ID_UNKNOWN_;
4599     const std::string tok = Parser.getTok().getString();
4600     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
4601       switch(i) {
4602         default: continue; // Omit gaps.
4603         case ID_GS_ALLOC_REQ:
4604           if (isSI() || isCI() || isVI())
4605             continue;
4606           break;
4607         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:
4608         case ID_SYSMSG: break;
4609       }
4610       if (tok == IdSymbolic[i]) {
4611         Msg.Id = i;
4612         break;
4613       }
4614     }
4615     Parser.Lex();
4616   } else {
4617     Msg.IsSymbolic = false;
4618     if (getLexer().isNot(AsmToken::Integer))
4619       return true;
4620     if (getParser().parseAbsoluteExpression(Msg.Id))
4621       return true;
4622     if (getLexer().is(AsmToken::Integer))
4623       if (getParser().parseAbsoluteExpression(Msg.Id))
4624         Msg.Id = ID_UNKNOWN_;
4625   }
4626   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
4627     return false;
4628 
4629   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
4630     if (getLexer().isNot(AsmToken::RParen))
4631       return true;
4632     Parser.Lex();
4633     return false;
4634   }
4635 
4636   if (getLexer().isNot(AsmToken::Comma))
4637     return true;
4638   Parser.Lex();
4639 
4640   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
4641   Operation.Id = ID_UNKNOWN_;
4642   if (getLexer().is(AsmToken::Identifier)) {
4643     Operation.IsSymbolic = true;
4644     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
4645     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
4646     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
4647     const StringRef Tok = Parser.getTok().getString();
4648     for (int i = F; i < L; ++i) {
4649       if (Tok == S[i]) {
4650         Operation.Id = i;
4651         break;
4652       }
4653     }
4654     Parser.Lex();
4655   } else {
4656     Operation.IsSymbolic = false;
4657     if (getLexer().isNot(AsmToken::Integer))
4658       return true;
4659     if (getParser().parseAbsoluteExpression(Operation.Id))
4660       return true;
4661   }
4662 
4663   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4664     // Stream id is optional.
4665     if (getLexer().is(AsmToken::RParen)) {
4666       Parser.Lex();
4667       return false;
4668     }
4669 
4670     if (getLexer().isNot(AsmToken::Comma))
4671       return true;
4672     Parser.Lex();
4673 
4674     if (getLexer().isNot(AsmToken::Integer))
4675       return true;
4676     if (getParser().parseAbsoluteExpression(StreamId))
4677       return true;
4678   }
4679 
4680   if (getLexer().isNot(AsmToken::RParen))
4681     return true;
4682   Parser.Lex();
4683   return false;
4684 }
4685 
4686 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
4687   if (getLexer().getKind() != AsmToken::Identifier)
4688     return MatchOperand_NoMatch;
4689 
4690   StringRef Str = Parser.getTok().getString();
4691   int Slot = StringSwitch<int>(Str)
4692     .Case("p10", 0)
4693     .Case("p20", 1)
4694     .Case("p0", 2)
4695     .Default(-1);
4696 
4697   SMLoc S = Parser.getTok().getLoc();
4698   if (Slot == -1)
4699     return MatchOperand_ParseFail;
4700 
4701   Parser.Lex();
4702   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
4703                                               AMDGPUOperand::ImmTyInterpSlot));
4704   return MatchOperand_Success;
4705 }
4706 
4707 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
4708   if (getLexer().getKind() != AsmToken::Identifier)
4709     return MatchOperand_NoMatch;
4710 
4711   StringRef Str = Parser.getTok().getString();
4712   if (!Str.startswith("attr"))
4713     return MatchOperand_NoMatch;
4714 
4715   StringRef Chan = Str.take_back(2);
4716   int AttrChan = StringSwitch<int>(Chan)
4717     .Case(".x", 0)
4718     .Case(".y", 1)
4719     .Case(".z", 2)
4720     .Case(".w", 3)
4721     .Default(-1);
4722   if (AttrChan == -1)
4723     return MatchOperand_ParseFail;
4724 
4725   Str = Str.drop_back(2).drop_front(4);
4726 
4727   uint8_t Attr;
4728   if (Str.getAsInteger(10, Attr))
4729     return MatchOperand_ParseFail;
4730 
4731   SMLoc S = Parser.getTok().getLoc();
4732   Parser.Lex();
4733   if (Attr > 63) {
4734     Error(S, "out of bounds attr");
4735     return MatchOperand_Success;
4736   }
4737 
4738   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4739 
4740   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4741                                               AMDGPUOperand::ImmTyInterpAttr));
4742   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4743                                               AMDGPUOperand::ImmTyAttrChan));
4744   return MatchOperand_Success;
4745 }
4746 
4747 void AMDGPUAsmParser::errorExpTgt() {
4748   Error(Parser.getTok().getLoc(), "invalid exp target");
4749 }
4750 
4751 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4752                                                       uint8_t &Val) {
4753   if (Str == "null") {
4754     Val = 9;
4755     return MatchOperand_Success;
4756   }
4757 
4758   if (Str.startswith("mrt")) {
4759     Str = Str.drop_front(3);
4760     if (Str == "z") { // == mrtz
4761       Val = 8;
4762       return MatchOperand_Success;
4763     }
4764 
4765     if (Str.getAsInteger(10, Val))
4766       return MatchOperand_ParseFail;
4767 
4768     if (Val > 7)
4769       errorExpTgt();
4770 
4771     return MatchOperand_Success;
4772   }
4773 
4774   if (Str.startswith("pos")) {
4775     Str = Str.drop_front(3);
4776     if (Str.getAsInteger(10, Val))
4777       return MatchOperand_ParseFail;
4778 
4779     if (Val > 4 || (Val == 4 && !isGFX10()))
4780       errorExpTgt();
4781 
4782     Val += 12;
4783     return MatchOperand_Success;
4784   }
4785 
4786   if (isGFX10() && Str == "prim") {
4787     Val = 20;
4788     return MatchOperand_Success;
4789   }
4790 
4791   if (Str.startswith("param")) {
4792     Str = Str.drop_front(5);
4793     if (Str.getAsInteger(10, Val))
4794       return MatchOperand_ParseFail;
4795 
4796     if (Val >= 32)
4797       errorExpTgt();
4798 
4799     Val += 32;
4800     return MatchOperand_Success;
4801   }
4802 
4803   if (Str.startswith("invalid_target_")) {
4804     Str = Str.drop_front(15);
4805     if (Str.getAsInteger(10, Val))
4806       return MatchOperand_ParseFail;
4807 
4808     errorExpTgt();
4809     return MatchOperand_Success;
4810   }
4811 
4812   return MatchOperand_NoMatch;
4813 }
4814 
4815 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4816   uint8_t Val;
4817   StringRef Str = Parser.getTok().getString();
4818 
4819   auto Res = parseExpTgtImpl(Str, Val);
4820   if (Res != MatchOperand_Success)
4821     return Res;
4822 
4823   SMLoc S = Parser.getTok().getLoc();
4824   Parser.Lex();
4825 
4826   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4827                                               AMDGPUOperand::ImmTyExpTgt));
4828   return MatchOperand_Success;
4829 }
4830 
4831 OperandMatchResultTy
4832 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4833   using namespace llvm::AMDGPU::SendMsg;
4834 
4835   int64_t Imm16Val = 0;
4836   SMLoc S = Parser.getTok().getLoc();
4837 
4838   switch(getLexer().getKind()) {
4839   default:
4840     return MatchOperand_NoMatch;
4841   case AsmToken::Integer:
4842     // The operand can be an integer value.
4843     if (getParser().parseAbsoluteExpression(Imm16Val))
4844       return MatchOperand_NoMatch;
4845     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4846       Error(S, "invalid immediate: only 16-bit values are legal");
4847       // Do not return error code, but create an imm operand anyway and proceed
4848       // to the next operand, if any. That avoids unneccessary error messages.
4849     }
4850     break;
4851   case AsmToken::Identifier: {
4852       OperandInfoTy Msg(ID_UNKNOWN_);
4853       OperandInfoTy Operation(OP_UNKNOWN_);
4854       int64_t StreamId = STREAM_ID_DEFAULT_;
4855       if (parseSendMsgConstruct(Msg, Operation, StreamId))
4856         return MatchOperand_ParseFail;
4857       do {
4858         // Validate and encode message ID.
4859         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4860                 || (Msg.Id == ID_GS_ALLOC_REQ && !isSI() && !isCI() && !isVI())
4861                 || Msg.Id == ID_SYSMSG)) {
4862           if (Msg.IsSymbolic)
4863             Error(S, "invalid/unsupported symbolic name of message");
4864           else
4865             Error(S, "invalid/unsupported code of message");
4866           break;
4867         }
4868         Imm16Val = (Msg.Id << ID_SHIFT_);
4869         // Validate and encode operation ID.
4870         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4871           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4872             if (Operation.IsSymbolic)
4873               Error(S, "invalid symbolic name of GS_OP");
4874             else
4875               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4876             break;
4877           }
4878           if (Operation.Id == OP_GS_NOP
4879               && Msg.Id != ID_GS_DONE) {
4880             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
4881             break;
4882           }
4883           Imm16Val |= (Operation.Id << OP_SHIFT_);
4884         }
4885         if (Msg.Id == ID_SYSMSG) {
4886           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
4887             if (Operation.IsSymbolic)
4888               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
4889             else
4890               Error(S, "invalid/unsupported code of SYSMSG_OP");
4891             break;
4892           }
4893           Imm16Val |= (Operation.Id << OP_SHIFT_);
4894         }
4895         // Validate and encode stream ID.
4896         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4897           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
4898             Error(S, "invalid stream id: only 2-bit values are legal");
4899             break;
4900           }
4901           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
4902         }
4903       } while (false);
4904     }
4905     break;
4906   }
4907   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
4908   return MatchOperand_Success;
4909 }
4910 
4911 bool AMDGPUOperand::isSendMsg() const {
4912   return isImmTy(ImmTySendMsg);
4913 }
4914 
4915 //===----------------------------------------------------------------------===//
4916 // parser helpers
4917 //===----------------------------------------------------------------------===//
4918 
4919 bool
4920 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
4921   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
4922 }
4923 
4924 bool
4925 AMDGPUAsmParser::isId(const StringRef Id) const {
4926   return isId(getToken(), Id);
4927 }
4928 
4929 bool
4930 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
4931   return getTokenKind() == Kind;
4932 }
4933 
4934 bool
4935 AMDGPUAsmParser::trySkipId(const StringRef Id) {
4936   if (isId(Id)) {
4937     lex();
4938     return true;
4939   }
4940   return false;
4941 }
4942 
4943 bool
4944 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
4945   if (isId(Id) && peekToken().is(Kind)) {
4946     lex();
4947     lex();
4948     return true;
4949   }
4950   return false;
4951 }
4952 
4953 bool
4954 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
4955   if (isToken(Kind)) {
4956     lex();
4957     return true;
4958   }
4959   return false;
4960 }
4961 
4962 bool
4963 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
4964                            const StringRef ErrMsg) {
4965   if (!trySkipToken(Kind)) {
4966     Error(getLoc(), ErrMsg);
4967     return false;
4968   }
4969   return true;
4970 }
4971 
4972 bool
4973 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
4974   return !getParser().parseAbsoluteExpression(Imm);
4975 }
4976 
4977 bool
4978 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
4979   if (isToken(AsmToken::String)) {
4980     Val = getToken().getStringContents();
4981     lex();
4982     return true;
4983   } else {
4984     Error(getLoc(), ErrMsg);
4985     return false;
4986   }
4987 }
4988 
4989 AsmToken
4990 AMDGPUAsmParser::getToken() const {
4991   return Parser.getTok();
4992 }
4993 
4994 AsmToken
4995 AMDGPUAsmParser::peekToken() {
4996   return getLexer().peekTok();
4997 }
4998 
4999 void
5000 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5001   auto TokCount = getLexer().peekTokens(Tokens);
5002 
5003   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5004     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5005 }
5006 
5007 AsmToken::TokenKind
5008 AMDGPUAsmParser::getTokenKind() const {
5009   return getLexer().getKind();
5010 }
5011 
5012 SMLoc
5013 AMDGPUAsmParser::getLoc() const {
5014   return getToken().getLoc();
5015 }
5016 
5017 StringRef
5018 AMDGPUAsmParser::getTokenStr() const {
5019   return getToken().getString();
5020 }
5021 
5022 void
5023 AMDGPUAsmParser::lex() {
5024   Parser.Lex();
5025 }
5026 
5027 //===----------------------------------------------------------------------===//
5028 // swizzle
5029 //===----------------------------------------------------------------------===//
5030 
5031 LLVM_READNONE
5032 static unsigned
5033 encodeBitmaskPerm(const unsigned AndMask,
5034                   const unsigned OrMask,
5035                   const unsigned XorMask) {
5036   using namespace llvm::AMDGPU::Swizzle;
5037 
5038   return BITMASK_PERM_ENC |
5039          (AndMask << BITMASK_AND_SHIFT) |
5040          (OrMask  << BITMASK_OR_SHIFT)  |
5041          (XorMask << BITMASK_XOR_SHIFT);
5042 }
5043 
5044 bool
5045 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5046                                       const unsigned MinVal,
5047                                       const unsigned MaxVal,
5048                                       const StringRef ErrMsg) {
5049   for (unsigned i = 0; i < OpNum; ++i) {
5050     if (!skipToken(AsmToken::Comma, "expected a comma")){
5051       return false;
5052     }
5053     SMLoc ExprLoc = Parser.getTok().getLoc();
5054     if (!parseExpr(Op[i])) {
5055       return false;
5056     }
5057     if (Op[i] < MinVal || Op[i] > MaxVal) {
5058       Error(ExprLoc, ErrMsg);
5059       return false;
5060     }
5061   }
5062 
5063   return true;
5064 }
5065 
5066 bool
5067 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5068   using namespace llvm::AMDGPU::Swizzle;
5069 
5070   int64_t Lane[LANE_NUM];
5071   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5072                            "expected a 2-bit lane id")) {
5073     Imm = QUAD_PERM_ENC;
5074     for (unsigned I = 0; I < LANE_NUM; ++I) {
5075       Imm |= Lane[I] << (LANE_SHIFT * I);
5076     }
5077     return true;
5078   }
5079   return false;
5080 }
5081 
5082 bool
5083 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5084   using namespace llvm::AMDGPU::Swizzle;
5085 
5086   SMLoc S = Parser.getTok().getLoc();
5087   int64_t GroupSize;
5088   int64_t LaneIdx;
5089 
5090   if (!parseSwizzleOperands(1, &GroupSize,
5091                             2, 32,
5092                             "group size must be in the interval [2,32]")) {
5093     return false;
5094   }
5095   if (!isPowerOf2_64(GroupSize)) {
5096     Error(S, "group size must be a power of two");
5097     return false;
5098   }
5099   if (parseSwizzleOperands(1, &LaneIdx,
5100                            0, GroupSize - 1,
5101                            "lane id must be in the interval [0,group size - 1]")) {
5102     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5103     return true;
5104   }
5105   return false;
5106 }
5107 
5108 bool
5109 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5110   using namespace llvm::AMDGPU::Swizzle;
5111 
5112   SMLoc S = Parser.getTok().getLoc();
5113   int64_t GroupSize;
5114 
5115   if (!parseSwizzleOperands(1, &GroupSize,
5116       2, 32, "group size must be in the interval [2,32]")) {
5117     return false;
5118   }
5119   if (!isPowerOf2_64(GroupSize)) {
5120     Error(S, "group size must be a power of two");
5121     return false;
5122   }
5123 
5124   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5125   return true;
5126 }
5127 
5128 bool
5129 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5130   using namespace llvm::AMDGPU::Swizzle;
5131 
5132   SMLoc S = Parser.getTok().getLoc();
5133   int64_t GroupSize;
5134 
5135   if (!parseSwizzleOperands(1, &GroupSize,
5136       1, 16, "group size must be in the interval [1,16]")) {
5137     return false;
5138   }
5139   if (!isPowerOf2_64(GroupSize)) {
5140     Error(S, "group size must be a power of two");
5141     return false;
5142   }
5143 
5144   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5145   return true;
5146 }
5147 
5148 bool
5149 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5150   using namespace llvm::AMDGPU::Swizzle;
5151 
5152   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5153     return false;
5154   }
5155 
5156   StringRef Ctl;
5157   SMLoc StrLoc = Parser.getTok().getLoc();
5158   if (!parseString(Ctl)) {
5159     return false;
5160   }
5161   if (Ctl.size() != BITMASK_WIDTH) {
5162     Error(StrLoc, "expected a 5-character mask");
5163     return false;
5164   }
5165 
5166   unsigned AndMask = 0;
5167   unsigned OrMask = 0;
5168   unsigned XorMask = 0;
5169 
5170   for (size_t i = 0; i < Ctl.size(); ++i) {
5171     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5172     switch(Ctl[i]) {
5173     default:
5174       Error(StrLoc, "invalid mask");
5175       return false;
5176     case '0':
5177       break;
5178     case '1':
5179       OrMask |= Mask;
5180       break;
5181     case 'p':
5182       AndMask |= Mask;
5183       break;
5184     case 'i':
5185       AndMask |= Mask;
5186       XorMask |= Mask;
5187       break;
5188     }
5189   }
5190 
5191   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5192   return true;
5193 }
5194 
5195 bool
5196 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5197 
5198   SMLoc OffsetLoc = Parser.getTok().getLoc();
5199 
5200   if (!parseExpr(Imm)) {
5201     return false;
5202   }
5203   if (!isUInt<16>(Imm)) {
5204     Error(OffsetLoc, "expected a 16-bit offset");
5205     return false;
5206   }
5207   return true;
5208 }
5209 
5210 bool
5211 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5212   using namespace llvm::AMDGPU::Swizzle;
5213 
5214   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5215 
5216     SMLoc ModeLoc = Parser.getTok().getLoc();
5217     bool Ok = false;
5218 
5219     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5220       Ok = parseSwizzleQuadPerm(Imm);
5221     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5222       Ok = parseSwizzleBitmaskPerm(Imm);
5223     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5224       Ok = parseSwizzleBroadcast(Imm);
5225     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5226       Ok = parseSwizzleSwap(Imm);
5227     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5228       Ok = parseSwizzleReverse(Imm);
5229     } else {
5230       Error(ModeLoc, "expected a swizzle mode");
5231     }
5232 
5233     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5234   }
5235 
5236   return false;
5237 }
5238 
5239 OperandMatchResultTy
5240 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5241   SMLoc S = Parser.getTok().getLoc();
5242   int64_t Imm = 0;
5243 
5244   if (trySkipId("offset")) {
5245 
5246     bool Ok = false;
5247     if (skipToken(AsmToken::Colon, "expected a colon")) {
5248       if (trySkipId("swizzle")) {
5249         Ok = parseSwizzleMacro(Imm);
5250       } else {
5251         Ok = parseSwizzleOffset(Imm);
5252       }
5253     }
5254 
5255     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5256 
5257     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5258   } else {
5259     // Swizzle "offset" operand is optional.
5260     // If it is omitted, try parsing other optional operands.
5261     return parseOptionalOpr(Operands);
5262   }
5263 }
5264 
5265 bool
5266 AMDGPUOperand::isSwizzle() const {
5267   return isImmTy(ImmTySwizzle);
5268 }
5269 
5270 //===----------------------------------------------------------------------===//
5271 // VGPR Index Mode
5272 //===----------------------------------------------------------------------===//
5273 
5274 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5275 
5276   using namespace llvm::AMDGPU::VGPRIndexMode;
5277 
5278   if (trySkipToken(AsmToken::RParen)) {
5279     return OFF;
5280   }
5281 
5282   int64_t Imm = 0;
5283 
5284   while (true) {
5285     unsigned Mode = 0;
5286     SMLoc S = Parser.getTok().getLoc();
5287 
5288     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5289       if (trySkipId(IdSymbolic[ModeId])) {
5290         Mode = 1 << ModeId;
5291         break;
5292       }
5293     }
5294 
5295     if (Mode == 0) {
5296       Error(S, (Imm == 0)?
5297                "expected a VGPR index mode or a closing parenthesis" :
5298                "expected a VGPR index mode");
5299       break;
5300     }
5301 
5302     if (Imm & Mode) {
5303       Error(S, "duplicate VGPR index mode");
5304       break;
5305     }
5306     Imm |= Mode;
5307 
5308     if (trySkipToken(AsmToken::RParen))
5309       break;
5310     if (!skipToken(AsmToken::Comma,
5311                    "expected a comma or a closing parenthesis"))
5312       break;
5313   }
5314 
5315   return Imm;
5316 }
5317 
5318 OperandMatchResultTy
5319 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5320 
5321   int64_t Imm = 0;
5322   SMLoc S = Parser.getTok().getLoc();
5323 
5324   if (getLexer().getKind() == AsmToken::Identifier &&
5325       Parser.getTok().getString() == "gpr_idx" &&
5326       getLexer().peekTok().is(AsmToken::LParen)) {
5327 
5328     Parser.Lex();
5329     Parser.Lex();
5330 
5331     // If parse failed, trigger an error but do not return error code
5332     // to avoid excessive error messages.
5333     Imm = parseGPRIdxMacro();
5334 
5335   } else {
5336     if (getParser().parseAbsoluteExpression(Imm))
5337       return MatchOperand_NoMatch;
5338     if (Imm < 0 || !isUInt<4>(Imm)) {
5339       Error(S, "invalid immediate: only 4-bit values are legal");
5340     }
5341   }
5342 
5343   Operands.push_back(
5344       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5345   return MatchOperand_Success;
5346 }
5347 
5348 bool AMDGPUOperand::isGPRIdxMode() const {
5349   return isImmTy(ImmTyGprIdxMode);
5350 }
5351 
5352 //===----------------------------------------------------------------------===//
5353 // sopp branch targets
5354 //===----------------------------------------------------------------------===//
5355 
5356 OperandMatchResultTy
5357 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5358   SMLoc S = Parser.getTok().getLoc();
5359 
5360   switch (getLexer().getKind()) {
5361     default: return MatchOperand_ParseFail;
5362     case AsmToken::Integer: {
5363       int64_t Imm;
5364       if (getParser().parseAbsoluteExpression(Imm))
5365         return MatchOperand_ParseFail;
5366       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
5367       return MatchOperand_Success;
5368     }
5369 
5370     case AsmToken::Identifier:
5371       Operands.push_back(AMDGPUOperand::CreateExpr(this,
5372           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
5373                                   Parser.getTok().getString()), getContext()), S));
5374       Parser.Lex();
5375       return MatchOperand_Success;
5376   }
5377 }
5378 
5379 //===----------------------------------------------------------------------===//
5380 // mubuf
5381 //===----------------------------------------------------------------------===//
5382 
5383 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5384   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5385 }
5386 
5387 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5388   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5389 }
5390 
5391 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5392   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5393 }
5394 
5395 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5396                                const OperandVector &Operands,
5397                                bool IsAtomic,
5398                                bool IsAtomicReturn,
5399                                bool IsLds) {
5400   bool IsLdsOpcode = IsLds;
5401   bool HasLdsModifier = false;
5402   OptionalImmIndexMap OptionalIdx;
5403   assert(IsAtomicReturn ? IsAtomic : true);
5404   unsigned FirstOperandIdx = 1;
5405 
5406   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5407     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5408 
5409     // Add the register arguments
5410     if (Op.isReg()) {
5411       Op.addRegOperands(Inst, 1);
5412       // Insert a tied src for atomic return dst.
5413       // This cannot be postponed as subsequent calls to
5414       // addImmOperands rely on correct number of MC operands.
5415       if (IsAtomicReturn && i == FirstOperandIdx)
5416         Op.addRegOperands(Inst, 1);
5417       continue;
5418     }
5419 
5420     // Handle the case where soffset is an immediate
5421     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5422       Op.addImmOperands(Inst, 1);
5423       continue;
5424     }
5425 
5426     HasLdsModifier |= Op.isLDS();
5427 
5428     // Handle tokens like 'offen' which are sometimes hard-coded into the
5429     // asm string.  There are no MCInst operands for these.
5430     if (Op.isToken()) {
5431       continue;
5432     }
5433     assert(Op.isImm());
5434 
5435     // Handle optional arguments
5436     OptionalIdx[Op.getImmTy()] = i;
5437   }
5438 
5439   // This is a workaround for an llvm quirk which may result in an
5440   // incorrect instruction selection. Lds and non-lds versions of
5441   // MUBUF instructions are identical except that lds versions
5442   // have mandatory 'lds' modifier. However this modifier follows
5443   // optional modifiers and llvm asm matcher regards this 'lds'
5444   // modifier as an optional one. As a result, an lds version
5445   // of opcode may be selected even if it has no 'lds' modifier.
5446   if (IsLdsOpcode && !HasLdsModifier) {
5447     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5448     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5449       Inst.setOpcode(NoLdsOpcode);
5450       IsLdsOpcode = false;
5451     }
5452   }
5453 
5454   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5455   if (!IsAtomic) { // glc is hard-coded.
5456     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5457   }
5458   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5459 
5460   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5461     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5462   }
5463 
5464   if (isGFX10())
5465     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5466 }
5467 
5468 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5469   OptionalImmIndexMap OptionalIdx;
5470 
5471   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5472     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5473 
5474     // Add the register arguments
5475     if (Op.isReg()) {
5476       Op.addRegOperands(Inst, 1);
5477       continue;
5478     }
5479 
5480     // Handle the case where soffset is an immediate
5481     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5482       Op.addImmOperands(Inst, 1);
5483       continue;
5484     }
5485 
5486     // Handle tokens like 'offen' which are sometimes hard-coded into the
5487     // asm string.  There are no MCInst operands for these.
5488     if (Op.isToken()) {
5489       continue;
5490     }
5491     assert(Op.isImm());
5492 
5493     // Handle optional arguments
5494     OptionalIdx[Op.getImmTy()] = i;
5495   }
5496 
5497   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5498                         AMDGPUOperand::ImmTyOffset);
5499   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5500   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5501   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5502   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5503 
5504   if (isGFX10())
5505     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5506 }
5507 
5508 //===----------------------------------------------------------------------===//
5509 // mimg
5510 //===----------------------------------------------------------------------===//
5511 
5512 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5513                               bool IsAtomic) {
5514   unsigned I = 1;
5515   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5516   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5517     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5518   }
5519 
5520   if (IsAtomic) {
5521     // Add src, same as dst
5522     assert(Desc.getNumDefs() == 1);
5523     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5524   }
5525 
5526   OptionalImmIndexMap OptionalIdx;
5527 
5528   for (unsigned E = Operands.size(); I != E; ++I) {
5529     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5530 
5531     // Add the register arguments
5532     if (Op.isReg()) {
5533       Op.addRegOperands(Inst, 1);
5534     } else if (Op.isImmModifier()) {
5535       OptionalIdx[Op.getImmTy()] = I;
5536     } else if (!Op.isToken()) {
5537       llvm_unreachable("unexpected operand type");
5538     }
5539   }
5540 
5541   bool IsGFX10 = isGFX10();
5542 
5543   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5544   if (IsGFX10)
5545     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5546   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5547   if (IsGFX10)
5548     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5549   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5550   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5551   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5552   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5553   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5554   if (!IsGFX10)
5555     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5556   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5557 }
5558 
5559 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5560   cvtMIMG(Inst, Operands, true);
5561 }
5562 
5563 //===----------------------------------------------------------------------===//
5564 // smrd
5565 //===----------------------------------------------------------------------===//
5566 
5567 bool AMDGPUOperand::isSMRDOffset8() const {
5568   return isImm() && isUInt<8>(getImm());
5569 }
5570 
5571 bool AMDGPUOperand::isSMRDOffset20() const {
5572   return isImm() && isUInt<20>(getImm());
5573 }
5574 
5575 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5576   // 32-bit literals are only supported on CI and we only want to use them
5577   // when the offset is > 8-bits.
5578   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5579 }
5580 
5581 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5582   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5583 }
5584 
5585 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5586   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5587 }
5588 
5589 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5590   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5591 }
5592 
5593 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
5594   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5595 }
5596 
5597 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
5598   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5599 }
5600 
5601 //===----------------------------------------------------------------------===//
5602 // vop3
5603 //===----------------------------------------------------------------------===//
5604 
5605 static bool ConvertOmodMul(int64_t &Mul) {
5606   if (Mul != 1 && Mul != 2 && Mul != 4)
5607     return false;
5608 
5609   Mul >>= 1;
5610   return true;
5611 }
5612 
5613 static bool ConvertOmodDiv(int64_t &Div) {
5614   if (Div == 1) {
5615     Div = 0;
5616     return true;
5617   }
5618 
5619   if (Div == 2) {
5620     Div = 3;
5621     return true;
5622   }
5623 
5624   return false;
5625 }
5626 
5627 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5628   if (BoundCtrl == 0) {
5629     BoundCtrl = 1;
5630     return true;
5631   }
5632 
5633   if (BoundCtrl == -1) {
5634     BoundCtrl = 0;
5635     return true;
5636   }
5637 
5638   return false;
5639 }
5640 
5641 // Note: the order in this table matches the order of operands in AsmString.
5642 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5643   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5644   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5645   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5646   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5647   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5648   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5649   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5650   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5651   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5652   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
5653   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5654   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5655   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5656   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5657   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5658   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5659   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5660   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5661   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5662   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5663   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5664   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5665   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5666   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5667   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5668   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
5669   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5670   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5671   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5672   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5673   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5674   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5675   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5676   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5677   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5678   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5679   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5680   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5681   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
5682 };
5683 
5684 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5685   unsigned size = Operands.size();
5686   assert(size > 0);
5687 
5688   OperandMatchResultTy res = parseOptionalOpr(Operands);
5689 
5690   // This is a hack to enable hardcoded mandatory operands which follow
5691   // optional operands.
5692   //
5693   // Current design assumes that all operands after the first optional operand
5694   // are also optional. However implementation of some instructions violates
5695   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5696   //
5697   // To alleviate this problem, we have to (implicitly) parse extra operands
5698   // to make sure autogenerated parser of custom operands never hit hardcoded
5699   // mandatory operands.
5700 
5701   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5702 
5703     // We have parsed the first optional operand.
5704     // Parse as many operands as necessary to skip all mandatory operands.
5705 
5706     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5707       if (res != MatchOperand_Success ||
5708           getLexer().is(AsmToken::EndOfStatement)) break;
5709       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5710       res = parseOptionalOpr(Operands);
5711     }
5712   }
5713 
5714   return res;
5715 }
5716 
5717 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
5718   OperandMatchResultTy res;
5719   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
5720     // try to parse any optional operand here
5721     if (Op.IsBit) {
5722       res = parseNamedBit(Op.Name, Operands, Op.Type);
5723     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
5724       res = parseOModOperand(Operands);
5725     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
5726                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
5727                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
5728       res = parseSDWASel(Operands, Op.Name, Op.Type);
5729     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
5730       res = parseSDWADstUnused(Operands);
5731     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
5732                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
5733                Op.Type == AMDGPUOperand::ImmTyNegLo ||
5734                Op.Type == AMDGPUOperand::ImmTyNegHi) {
5735       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
5736                                         Op.ConvertResult);
5737     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
5738       res = parseDim(Operands);
5739     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
5740       res = parseDfmtNfmt(Operands);
5741     } else {
5742       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
5743     }
5744     if (res != MatchOperand_NoMatch) {
5745       return res;
5746     }
5747   }
5748   return MatchOperand_NoMatch;
5749 }
5750 
5751 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
5752   StringRef Name = Parser.getTok().getString();
5753   if (Name == "mul") {
5754     return parseIntWithPrefix("mul", Operands,
5755                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
5756   }
5757 
5758   if (Name == "div") {
5759     return parseIntWithPrefix("div", Operands,
5760                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
5761   }
5762 
5763   return MatchOperand_NoMatch;
5764 }
5765 
5766 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
5767   cvtVOP3P(Inst, Operands);
5768 
5769   int Opc = Inst.getOpcode();
5770 
5771   int SrcNum;
5772   const int Ops[] = { AMDGPU::OpName::src0,
5773                       AMDGPU::OpName::src1,
5774                       AMDGPU::OpName::src2 };
5775   for (SrcNum = 0;
5776        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
5777        ++SrcNum);
5778   assert(SrcNum > 0);
5779 
5780   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5781   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5782 
5783   if ((OpSel & (1 << SrcNum)) != 0) {
5784     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
5785     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
5786     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
5787   }
5788 }
5789 
5790 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
5791       // 1. This operand is input modifiers
5792   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
5793       // 2. This is not last operand
5794       && Desc.NumOperands > (OpNum + 1)
5795       // 3. Next operand is register class
5796       && Desc.OpInfo[OpNum + 1].RegClass != -1
5797       // 4. Next register is not tied to any other operand
5798       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
5799 }
5800 
5801 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
5802 {
5803   OptionalImmIndexMap OptionalIdx;
5804   unsigned Opc = Inst.getOpcode();
5805 
5806   unsigned I = 1;
5807   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5808   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5809     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5810   }
5811 
5812   for (unsigned E = Operands.size(); I != E; ++I) {
5813     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5814     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5815       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5816     } else if (Op.isInterpSlot() ||
5817                Op.isInterpAttr() ||
5818                Op.isAttrChan()) {
5819       Inst.addOperand(MCOperand::createImm(Op.getImm()));
5820     } else if (Op.isImmModifier()) {
5821       OptionalIdx[Op.getImmTy()] = I;
5822     } else {
5823       llvm_unreachable("unhandled operand type");
5824     }
5825   }
5826 
5827   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
5828     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
5829   }
5830 
5831   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5832     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5833   }
5834 
5835   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5836     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5837   }
5838 }
5839 
5840 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
5841                               OptionalImmIndexMap &OptionalIdx) {
5842   unsigned Opc = Inst.getOpcode();
5843 
5844   unsigned I = 1;
5845   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5846   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5847     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5848   }
5849 
5850   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
5851     // This instruction has src modifiers
5852     for (unsigned E = Operands.size(); I != E; ++I) {
5853       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5854       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5855         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5856       } else if (Op.isImmModifier()) {
5857         OptionalIdx[Op.getImmTy()] = I;
5858       } else if (Op.isRegOrImm()) {
5859         Op.addRegOrImmOperands(Inst, 1);
5860       } else {
5861         llvm_unreachable("unhandled operand type");
5862       }
5863     }
5864   } else {
5865     // No src modifiers
5866     for (unsigned E = Operands.size(); I != E; ++I) {
5867       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5868       if (Op.isMod()) {
5869         OptionalIdx[Op.getImmTy()] = I;
5870       } else {
5871         Op.addRegOrImmOperands(Inst, 1);
5872       }
5873     }
5874   }
5875 
5876   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5877     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5878   }
5879 
5880   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5881     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5882   }
5883 
5884   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
5885   // it has src2 register operand that is tied to dst operand
5886   // we don't allow modifiers for this operand in assembler so src2_modifiers
5887   // should be 0.
5888   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
5889       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
5890       Opc == AMDGPU::V_MAC_F32_e64_vi ||
5891       Opc == AMDGPU::V_MAC_F16_e64_vi ||
5892       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
5893       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
5894       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
5895     auto it = Inst.begin();
5896     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5897     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5898     ++it;
5899     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5900   }
5901 }
5902 
5903 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5904   OptionalImmIndexMap OptionalIdx;
5905   cvtVOP3(Inst, Operands, OptionalIdx);
5906 }
5907 
5908 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5909                                const OperandVector &Operands) {
5910   OptionalImmIndexMap OptIdx;
5911   const int Opc = Inst.getOpcode();
5912   const MCInstrDesc &Desc = MII.get(Opc);
5913 
5914   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5915 
5916   cvtVOP3(Inst, Operands, OptIdx);
5917 
5918   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5919     assert(!IsPacked);
5920     Inst.addOperand(Inst.getOperand(0));
5921   }
5922 
5923   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5924   // instruction, and then figure out where to actually put the modifiers
5925 
5926   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
5927 
5928   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5929   if (OpSelHiIdx != -1) {
5930     int DefaultVal = IsPacked ? -1 : 0;
5931     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
5932                           DefaultVal);
5933   }
5934 
5935   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
5936   if (NegLoIdx != -1) {
5937     assert(IsPacked);
5938     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
5939     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
5940   }
5941 
5942   const int Ops[] = { AMDGPU::OpName::src0,
5943                       AMDGPU::OpName::src1,
5944                       AMDGPU::OpName::src2 };
5945   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
5946                          AMDGPU::OpName::src1_modifiers,
5947                          AMDGPU::OpName::src2_modifiers };
5948 
5949   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5950 
5951   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5952   unsigned OpSelHi = 0;
5953   unsigned NegLo = 0;
5954   unsigned NegHi = 0;
5955 
5956   if (OpSelHiIdx != -1) {
5957     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5958   }
5959 
5960   if (NegLoIdx != -1) {
5961     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
5962     NegLo = Inst.getOperand(NegLoIdx).getImm();
5963     NegHi = Inst.getOperand(NegHiIdx).getImm();
5964   }
5965 
5966   for (int J = 0; J < 3; ++J) {
5967     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
5968     if (OpIdx == -1)
5969       break;
5970 
5971     uint32_t ModVal = 0;
5972 
5973     if ((OpSel & (1 << J)) != 0)
5974       ModVal |= SISrcMods::OP_SEL_0;
5975 
5976     if ((OpSelHi & (1 << J)) != 0)
5977       ModVal |= SISrcMods::OP_SEL_1;
5978 
5979     if ((NegLo & (1 << J)) != 0)
5980       ModVal |= SISrcMods::NEG;
5981 
5982     if ((NegHi & (1 << J)) != 0)
5983       ModVal |= SISrcMods::NEG_HI;
5984 
5985     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
5986 
5987     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
5988   }
5989 }
5990 
5991 //===----------------------------------------------------------------------===//
5992 // dpp
5993 //===----------------------------------------------------------------------===//
5994 
5995 bool AMDGPUOperand::isDPPCtrl() const {
5996   using namespace AMDGPU::DPP;
5997 
5998   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
5999   if (result) {
6000     int64_t Imm = getImm();
6001     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6002            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6003            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6004            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6005            (Imm == DppCtrl::WAVE_SHL1) ||
6006            (Imm == DppCtrl::WAVE_ROL1) ||
6007            (Imm == DppCtrl::WAVE_SHR1) ||
6008            (Imm == DppCtrl::WAVE_ROR1) ||
6009            (Imm == DppCtrl::ROW_MIRROR) ||
6010            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6011            (Imm == DppCtrl::BCAST15) ||
6012            (Imm == DppCtrl::BCAST31);
6013   }
6014   return false;
6015 }
6016 
6017 bool AMDGPUOperand::isS16Imm() const {
6018   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6019 }
6020 
6021 bool AMDGPUOperand::isU16Imm() const {
6022   return isImm() && isUInt<16>(getImm());
6023 }
6024 
6025 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6026   if (!isGFX10())
6027     return MatchOperand_NoMatch;
6028 
6029   SMLoc S = Parser.getTok().getLoc();
6030 
6031   if (getLexer().isNot(AsmToken::Identifier))
6032     return MatchOperand_NoMatch;
6033   if (getLexer().getTok().getString() != "dim")
6034     return MatchOperand_NoMatch;
6035 
6036   Parser.Lex();
6037   if (getLexer().isNot(AsmToken::Colon))
6038     return MatchOperand_ParseFail;
6039 
6040   Parser.Lex();
6041 
6042   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6043   // integer.
6044   std::string Token;
6045   if (getLexer().is(AsmToken::Integer)) {
6046     SMLoc Loc = getLexer().getTok().getEndLoc();
6047     Token = getLexer().getTok().getString();
6048     Parser.Lex();
6049     if (getLexer().getTok().getLoc() != Loc)
6050       return MatchOperand_ParseFail;
6051   }
6052   if (getLexer().isNot(AsmToken::Identifier))
6053     return MatchOperand_ParseFail;
6054   Token += getLexer().getTok().getString();
6055 
6056   StringRef DimId = Token;
6057   if (DimId.startswith("SQ_RSRC_IMG_"))
6058     DimId = DimId.substr(12);
6059 
6060   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6061   if (!DimInfo)
6062     return MatchOperand_ParseFail;
6063 
6064   Parser.Lex();
6065 
6066   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6067                                               AMDGPUOperand::ImmTyDim));
6068   return MatchOperand_Success;
6069 }
6070 
6071 OperandMatchResultTy
6072 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6073   using namespace AMDGPU::DPP;
6074 
6075   SMLoc S = Parser.getTok().getLoc();
6076   StringRef Prefix;
6077   int64_t Int;
6078 
6079   if (getLexer().getKind() == AsmToken::Identifier) {
6080     Prefix = Parser.getTok().getString();
6081   } else {
6082     return MatchOperand_NoMatch;
6083   }
6084 
6085   if (Prefix == "row_mirror") {
6086     Int = DppCtrl::ROW_MIRROR;
6087     Parser.Lex();
6088   } else if (Prefix == "row_half_mirror") {
6089     Int = DppCtrl::ROW_HALF_MIRROR;
6090     Parser.Lex();
6091   } else {
6092     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6093     if (Prefix != "quad_perm"
6094         && Prefix != "row_shl"
6095         && Prefix != "row_shr"
6096         && Prefix != "row_ror"
6097         && Prefix != "wave_shl"
6098         && Prefix != "wave_rol"
6099         && Prefix != "wave_shr"
6100         && Prefix != "wave_ror"
6101         && Prefix != "row_bcast") {
6102       return MatchOperand_NoMatch;
6103     }
6104 
6105     Parser.Lex();
6106     if (getLexer().isNot(AsmToken::Colon))
6107       return MatchOperand_ParseFail;
6108 
6109     if (Prefix == "quad_perm") {
6110       // quad_perm:[%d,%d,%d,%d]
6111       Parser.Lex();
6112       if (getLexer().isNot(AsmToken::LBrac))
6113         return MatchOperand_ParseFail;
6114       Parser.Lex();
6115 
6116       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6117         return MatchOperand_ParseFail;
6118 
6119       for (int i = 0; i < 3; ++i) {
6120         if (getLexer().isNot(AsmToken::Comma))
6121           return MatchOperand_ParseFail;
6122         Parser.Lex();
6123 
6124         int64_t Temp;
6125         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6126           return MatchOperand_ParseFail;
6127         const int shift = i*2 + 2;
6128         Int += (Temp << shift);
6129       }
6130 
6131       if (getLexer().isNot(AsmToken::RBrac))
6132         return MatchOperand_ParseFail;
6133       Parser.Lex();
6134     } else {
6135       // sel:%d
6136       Parser.Lex();
6137       if (getParser().parseAbsoluteExpression(Int))
6138         return MatchOperand_ParseFail;
6139 
6140       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6141         Int |= DppCtrl::ROW_SHL0;
6142       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6143         Int |= DppCtrl::ROW_SHR0;
6144       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6145         Int |= DppCtrl::ROW_ROR0;
6146       } else if (Prefix == "wave_shl" && 1 == Int) {
6147         Int = DppCtrl::WAVE_SHL1;
6148       } else if (Prefix == "wave_rol" && 1 == Int) {
6149         Int = DppCtrl::WAVE_ROL1;
6150       } else if (Prefix == "wave_shr" && 1 == Int) {
6151         Int = DppCtrl::WAVE_SHR1;
6152       } else if (Prefix == "wave_ror" && 1 == Int) {
6153         Int = DppCtrl::WAVE_ROR1;
6154       } else if (Prefix == "row_bcast") {
6155         if (Int == 15) {
6156           Int = DppCtrl::BCAST15;
6157         } else if (Int == 31) {
6158           Int = DppCtrl::BCAST31;
6159         } else {
6160           return MatchOperand_ParseFail;
6161         }
6162       } else {
6163         return MatchOperand_ParseFail;
6164       }
6165     }
6166   }
6167 
6168   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6169   return MatchOperand_Success;
6170 }
6171 
6172 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6173   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6174 }
6175 
6176 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6177   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6178 }
6179 
6180 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6181   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6182 }
6183 
6184 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6185   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6186 }
6187 
6188 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
6189   OptionalImmIndexMap OptionalIdx;
6190 
6191   unsigned I = 1;
6192   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6193   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6194     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6195   }
6196 
6197   for (unsigned E = Operands.size(); I != E; ++I) {
6198     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6199                                             MCOI::TIED_TO);
6200     if (TiedTo != -1) {
6201       assert((unsigned)TiedTo < Inst.getNumOperands());
6202       // handle tied old or src2 for MAC instructions
6203       Inst.addOperand(Inst.getOperand(TiedTo));
6204     }
6205     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6206     // Add the register arguments
6207     if (Op.isReg() && Op.getReg() == AMDGPU::VCC) {
6208       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6209       // Skip it.
6210       continue;
6211     }
6212     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6213       Op.addRegWithFPInputModsOperands(Inst, 2);
6214     } else if (Op.isDPPCtrl()) {
6215       Op.addImmOperands(Inst, 1);
6216     } else if (Op.isImm()) {
6217       // Handle optional arguments
6218       OptionalIdx[Op.getImmTy()] = I;
6219     } else {
6220       llvm_unreachable("Invalid operand type");
6221     }
6222   }
6223 
6224   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6225   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6226   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6227 }
6228 
6229 //===----------------------------------------------------------------------===//
6230 // sdwa
6231 //===----------------------------------------------------------------------===//
6232 
6233 OperandMatchResultTy
6234 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6235                               AMDGPUOperand::ImmTy Type) {
6236   using namespace llvm::AMDGPU::SDWA;
6237 
6238   SMLoc S = Parser.getTok().getLoc();
6239   StringRef Value;
6240   OperandMatchResultTy res;
6241 
6242   res = parseStringWithPrefix(Prefix, Value);
6243   if (res != MatchOperand_Success) {
6244     return res;
6245   }
6246 
6247   int64_t Int;
6248   Int = StringSwitch<int64_t>(Value)
6249         .Case("BYTE_0", SdwaSel::BYTE_0)
6250         .Case("BYTE_1", SdwaSel::BYTE_1)
6251         .Case("BYTE_2", SdwaSel::BYTE_2)
6252         .Case("BYTE_3", SdwaSel::BYTE_3)
6253         .Case("WORD_0", SdwaSel::WORD_0)
6254         .Case("WORD_1", SdwaSel::WORD_1)
6255         .Case("DWORD", SdwaSel::DWORD)
6256         .Default(0xffffffff);
6257   Parser.Lex(); // eat last token
6258 
6259   if (Int == 0xffffffff) {
6260     return MatchOperand_ParseFail;
6261   }
6262 
6263   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6264   return MatchOperand_Success;
6265 }
6266 
6267 OperandMatchResultTy
6268 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6269   using namespace llvm::AMDGPU::SDWA;
6270 
6271   SMLoc S = Parser.getTok().getLoc();
6272   StringRef Value;
6273   OperandMatchResultTy res;
6274 
6275   res = parseStringWithPrefix("dst_unused", Value);
6276   if (res != MatchOperand_Success) {
6277     return res;
6278   }
6279 
6280   int64_t Int;
6281   Int = StringSwitch<int64_t>(Value)
6282         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6283         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6284         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6285         .Default(0xffffffff);
6286   Parser.Lex(); // eat last token
6287 
6288   if (Int == 0xffffffff) {
6289     return MatchOperand_ParseFail;
6290   }
6291 
6292   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6293   return MatchOperand_Success;
6294 }
6295 
6296 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6297   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6298 }
6299 
6300 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6301   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6302 }
6303 
6304 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6305   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6306 }
6307 
6308 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6309   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6310 }
6311 
6312 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6313                               uint64_t BasicInstType, bool skipVcc) {
6314   using namespace llvm::AMDGPU::SDWA;
6315 
6316   OptionalImmIndexMap OptionalIdx;
6317   bool skippedVcc = false;
6318 
6319   unsigned I = 1;
6320   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6321   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6322     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6323   }
6324 
6325   for (unsigned E = Operands.size(); I != E; ++I) {
6326     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6327     if (skipVcc && !skippedVcc && Op.isReg() && Op.getReg() == AMDGPU::VCC) {
6328       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6329       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6330       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6331       // Skip VCC only if we didn't skip it on previous iteration.
6332       if (BasicInstType == SIInstrFlags::VOP2 &&
6333           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6334         skippedVcc = true;
6335         continue;
6336       } else if (BasicInstType == SIInstrFlags::VOPC &&
6337                  Inst.getNumOperands() == 0) {
6338         skippedVcc = true;
6339         continue;
6340       }
6341     }
6342     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6343       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6344     } else if (Op.isImm()) {
6345       // Handle optional arguments
6346       OptionalIdx[Op.getImmTy()] = I;
6347     } else {
6348       llvm_unreachable("Invalid operand type");
6349     }
6350     skippedVcc = false;
6351   }
6352 
6353   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6354       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6355       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6356     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6357     switch (BasicInstType) {
6358     case SIInstrFlags::VOP1:
6359       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6360       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6361         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6362       }
6363       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6364       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6365       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6366       break;
6367 
6368     case SIInstrFlags::VOP2:
6369       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6370       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6371         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6372       }
6373       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6374       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6375       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6376       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6377       break;
6378 
6379     case SIInstrFlags::VOPC:
6380       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6381         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6382       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6383       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6384       break;
6385 
6386     default:
6387       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6388     }
6389   }
6390 
6391   // special case v_mac_{f16, f32}:
6392   // it has src2 register operand that is tied to dst operand
6393   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6394       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6395     auto it = Inst.begin();
6396     std::advance(
6397       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6398     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6399   }
6400 }
6401 
6402 /// Force static initialization.
6403 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6404   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6405   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6406 }
6407 
6408 #define GET_REGISTER_MATCHER
6409 #define GET_MATCHER_IMPLEMENTATION
6410 #define GET_MNEMONIC_SPELL_CHECKER
6411 #include "AMDGPUGenAsmMatcher.inc"
6412 
6413 // This fuction should be defined after auto-generated include so that we have
6414 // MatchClassKind enum defined
6415 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6416                                                      unsigned Kind) {
6417   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6418   // But MatchInstructionImpl() expects to meet token and fails to validate
6419   // operand. This method checks if we are given immediate operand but expect to
6420   // get corresponding token.
6421   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6422   switch (Kind) {
6423   case MCK_addr64:
6424     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6425   case MCK_gds:
6426     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6427   case MCK_lds:
6428     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6429   case MCK_glc:
6430     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6431   case MCK_idxen:
6432     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6433   case MCK_offen:
6434     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6435   case MCK_SSrcB32:
6436     // When operands have expression values, they will return true for isToken,
6437     // because it is not possible to distinguish between a token and an
6438     // expression at parse time. MatchInstructionImpl() will always try to
6439     // match an operand as a token, when isToken returns true, and when the
6440     // name of the expression is not a valid token, the match will fail,
6441     // so we need to handle it here.
6442     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6443   case MCK_SSrcF32:
6444     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6445   case MCK_SoppBrTarget:
6446     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6447   case MCK_VReg32OrOff:
6448     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6449   case MCK_InterpSlot:
6450     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6451   case MCK_Attr:
6452     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6453   case MCK_AttrChan:
6454     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6455   default:
6456     return Match_InvalidOperand;
6457   }
6458 }
6459 
6460 //===----------------------------------------------------------------------===//
6461 // endpgm
6462 //===----------------------------------------------------------------------===//
6463 
6464 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6465   SMLoc S = Parser.getTok().getLoc();
6466   int64_t Imm = 0;
6467 
6468   if (!parseExpr(Imm)) {
6469     // The operand is optional, if not present default to 0
6470     Imm = 0;
6471   }
6472 
6473   if (!isUInt<16>(Imm)) {
6474     Error(S, "expected a 16-bit value");
6475     return MatchOperand_ParseFail;
6476   }
6477 
6478   Operands.push_back(
6479       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6480   return MatchOperand_Success;
6481 }
6482 
6483 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
6484