1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTyTFE,
147     ImmTyD16,
148     ImmTyClampSI,
149     ImmTyOModSI,
150     ImmTyDppCtrl,
151     ImmTyDppRowMask,
152     ImmTyDppBankMask,
153     ImmTyDppBoundCtrl,
154     ImmTySdwaDstSel,
155     ImmTySdwaSrc0Sel,
156     ImmTySdwaSrc1Sel,
157     ImmTySdwaDstUnused,
158     ImmTyDMask,
159     ImmTyDim,
160     ImmTyUNorm,
161     ImmTyDA,
162     ImmTyR128A16,
163     ImmTyLWE,
164     ImmTyExpTgt,
165     ImmTyExpCompr,
166     ImmTyExpVM,
167     ImmTyFORMAT,
168     ImmTyHwreg,
169     ImmTyOff,
170     ImmTySendMsg,
171     ImmTyInterpSlot,
172     ImmTyInterpAttr,
173     ImmTyAttrChan,
174     ImmTyOpSel,
175     ImmTyOpSelHi,
176     ImmTyNegLo,
177     ImmTyNegHi,
178     ImmTySwizzle,
179     ImmTyGprIdxMode,
180     ImmTyEndpgm,
181     ImmTyHigh
182   };
183 
184 private:
185   struct TokOp {
186     const char *Data;
187     unsigned Length;
188   };
189 
190   struct ImmOp {
191     int64_t Val;
192     ImmTy Type;
193     bool IsFPImm;
194     Modifiers Mods;
195   };
196 
197   struct RegOp {
198     unsigned RegNo;
199     Modifiers Mods;
200   };
201 
202   union {
203     TokOp Tok;
204     ImmOp Imm;
205     RegOp Reg;
206     const MCExpr *Expr;
207   };
208 
209 public:
210   bool isToken() const override {
211     if (Kind == Token)
212       return true;
213 
214     if (Kind != Expression || !Expr)
215       return false;
216 
217     // When parsing operands, we can't always tell if something was meant to be
218     // a token, like 'gds', or an expression that references a global variable.
219     // In this case, we assume the string is an expression, and if we need to
220     // interpret is a token, then we treat the symbol name as the token.
221     return isa<MCSymbolRefExpr>(Expr);
222   }
223 
224   bool isImm() const override {
225     return Kind == Immediate;
226   }
227 
228   bool isInlinableImm(MVT type) const;
229   bool isLiteralImm(MVT type) const;
230 
231   bool isRegKind() const {
232     return Kind == Register;
233   }
234 
235   bool isReg() const override {
236     return isRegKind() && !hasModifiers();
237   }
238 
239   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
240     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
241   }
242 
243   bool isRegOrImmWithInt16InputMods() const {
244     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
245   }
246 
247   bool isRegOrImmWithInt32InputMods() const {
248     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
249   }
250 
251   bool isRegOrImmWithInt64InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
253   }
254 
255   bool isRegOrImmWithFP16InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
257   }
258 
259   bool isRegOrImmWithFP32InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
261   }
262 
263   bool isRegOrImmWithFP64InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
265   }
266 
267   bool isVReg() const {
268     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
269            isRegClass(AMDGPU::VReg_64RegClassID) ||
270            isRegClass(AMDGPU::VReg_96RegClassID) ||
271            isRegClass(AMDGPU::VReg_128RegClassID) ||
272            isRegClass(AMDGPU::VReg_256RegClassID) ||
273            isRegClass(AMDGPU::VReg_512RegClassID);
274   }
275 
276   bool isVReg32() const {
277     return isRegClass(AMDGPU::VGPR_32RegClassID);
278   }
279 
280   bool isVReg32OrOff() const {
281     return isOff() || isVReg32();
282   }
283 
284   bool isSDWAOperand(MVT type) const;
285   bool isSDWAFP16Operand() const;
286   bool isSDWAFP32Operand() const;
287   bool isSDWAInt16Operand() const;
288   bool isSDWAInt32Operand() const;
289 
290   bool isImmTy(ImmTy ImmT) const {
291     return isImm() && Imm.Type == ImmT;
292   }
293 
294   bool isImmModifier() const {
295     return isImm() && Imm.Type != ImmTyNone;
296   }
297 
298   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
299   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
300   bool isDMask() const { return isImmTy(ImmTyDMask); }
301   bool isDim() const { return isImmTy(ImmTyDim); }
302   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
303   bool isDA() const { return isImmTy(ImmTyDA); }
304   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
305   bool isLWE() const { return isImmTy(ImmTyLWE); }
306   bool isOff() const { return isImmTy(ImmTyOff); }
307   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
308   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
309   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
310   bool isOffen() const { return isImmTy(ImmTyOffen); }
311   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
312   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
313   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
314   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
315   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
316 
317   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
318   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
319   bool isGDS() const { return isImmTy(ImmTyGDS); }
320   bool isLDS() const { return isImmTy(ImmTyLDS); }
321   bool isDLC() const { return isImmTy(ImmTyDLC); }
322   bool isGLC() const { return isImmTy(ImmTyGLC); }
323   bool isSLC() const { return isImmTy(ImmTySLC); }
324   bool isTFE() const { return isImmTy(ImmTyTFE); }
325   bool isD16() const { return isImmTy(ImmTyD16); }
326   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
327   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
328   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
329   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
330   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
331   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
332   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
333   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
334   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
335   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
336   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
337   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
338   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
339   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
340   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
341   bool isHigh() const { return isImmTy(ImmTyHigh); }
342 
343   bool isMod() const {
344     return isClampSI() || isOModSI();
345   }
346 
347   bool isRegOrImm() const {
348     return isReg() || isImm();
349   }
350 
351   bool isRegClass(unsigned RCID) const;
352 
353   bool isInlineValue() const;
354 
355   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
356     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
357   }
358 
359   bool isSCSrcB16() const {
360     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
361   }
362 
363   bool isSCSrcV2B16() const {
364     return isSCSrcB16();
365   }
366 
367   bool isSCSrcB32() const {
368     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
369   }
370 
371   bool isSCSrcB64() const {
372     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
373   }
374 
375   bool isSCSrcF16() const {
376     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
377   }
378 
379   bool isSCSrcV2F16() const {
380     return isSCSrcF16();
381   }
382 
383   bool isSCSrcF32() const {
384     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
385   }
386 
387   bool isSCSrcF64() const {
388     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
389   }
390 
391   bool isSSrcB32() const {
392     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
393   }
394 
395   bool isSSrcB16() const {
396     return isSCSrcB16() || isLiteralImm(MVT::i16);
397   }
398 
399   bool isSSrcV2B16() const {
400     llvm_unreachable("cannot happen");
401     return isSSrcB16();
402   }
403 
404   bool isSSrcB64() const {
405     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
406     // See isVSrc64().
407     return isSCSrcB64() || isLiteralImm(MVT::i64);
408   }
409 
410   bool isSSrcF32() const {
411     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
412   }
413 
414   bool isSSrcF64() const {
415     return isSCSrcB64() || isLiteralImm(MVT::f64);
416   }
417 
418   bool isSSrcF16() const {
419     return isSCSrcB16() || isLiteralImm(MVT::f16);
420   }
421 
422   bool isSSrcV2F16() const {
423     llvm_unreachable("cannot happen");
424     return isSSrcF16();
425   }
426 
427   bool isSSrcOrLdsB32() const {
428     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
429            isLiteralImm(MVT::i32) || isExpr();
430   }
431 
432   bool isVCSrcB32() const {
433     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
434   }
435 
436   bool isVCSrcB64() const {
437     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
438   }
439 
440   bool isVCSrcB16() const {
441     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
442   }
443 
444   bool isVCSrcV2B16() const {
445     return isVCSrcB16();
446   }
447 
448   bool isVCSrcF32() const {
449     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
450   }
451 
452   bool isVCSrcF64() const {
453     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
454   }
455 
456   bool isVCSrcF16() const {
457     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
458   }
459 
460   bool isVCSrcV2F16() const {
461     return isVCSrcF16();
462   }
463 
464   bool isVSrcB32() const {
465     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
466   }
467 
468   bool isVSrcB64() const {
469     return isVCSrcF64() || isLiteralImm(MVT::i64);
470   }
471 
472   bool isVSrcB16() const {
473     return isVCSrcF16() || isLiteralImm(MVT::i16);
474   }
475 
476   bool isVSrcV2B16() const {
477     return isVSrcB16() || isLiteralImm(MVT::v2i16);
478   }
479 
480   bool isVSrcF32() const {
481     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
482   }
483 
484   bool isVSrcF64() const {
485     return isVCSrcF64() || isLiteralImm(MVT::f64);
486   }
487 
488   bool isVSrcF16() const {
489     return isVCSrcF16() || isLiteralImm(MVT::f16);
490   }
491 
492   bool isVSrcV2F16() const {
493     return isVSrcF16() || isLiteralImm(MVT::v2f16);
494   }
495 
496   bool isKImmFP32() const {
497     return isLiteralImm(MVT::f32);
498   }
499 
500   bool isKImmFP16() const {
501     return isLiteralImm(MVT::f16);
502   }
503 
504   bool isMem() const override {
505     return false;
506   }
507 
508   bool isExpr() const {
509     return Kind == Expression;
510   }
511 
512   bool isSoppBrTarget() const {
513     return isExpr() || isImm();
514   }
515 
516   bool isSWaitCnt() const;
517   bool isHwreg() const;
518   bool isSendMsg() const;
519   bool isSwizzle() const;
520   bool isSMRDOffset8() const;
521   bool isSMRDOffset20() const;
522   bool isSMRDLiteralOffset() const;
523   bool isDPPCtrl() const;
524   bool isGPRIdxMode() const;
525   bool isS16Imm() const;
526   bool isU16Imm() const;
527   bool isEndpgm() const;
528 
529   StringRef getExpressionAsToken() const {
530     assert(isExpr());
531     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
532     return S->getSymbol().getName();
533   }
534 
535   StringRef getToken() const {
536     assert(isToken());
537 
538     if (Kind == Expression)
539       return getExpressionAsToken();
540 
541     return StringRef(Tok.Data, Tok.Length);
542   }
543 
544   int64_t getImm() const {
545     assert(isImm());
546     return Imm.Val;
547   }
548 
549   ImmTy getImmTy() const {
550     assert(isImm());
551     return Imm.Type;
552   }
553 
554   unsigned getReg() const override {
555     assert(isRegKind());
556     return Reg.RegNo;
557   }
558 
559   SMLoc getStartLoc() const override {
560     return StartLoc;
561   }
562 
563   SMLoc getEndLoc() const override {
564     return EndLoc;
565   }
566 
567   SMRange getLocRange() const {
568     return SMRange(StartLoc, EndLoc);
569   }
570 
571   Modifiers getModifiers() const {
572     assert(isRegKind() || isImmTy(ImmTyNone));
573     return isRegKind() ? Reg.Mods : Imm.Mods;
574   }
575 
576   void setModifiers(Modifiers Mods) {
577     assert(isRegKind() || isImmTy(ImmTyNone));
578     if (isRegKind())
579       Reg.Mods = Mods;
580     else
581       Imm.Mods = Mods;
582   }
583 
584   bool hasModifiers() const {
585     return getModifiers().hasModifiers();
586   }
587 
588   bool hasFPModifiers() const {
589     return getModifiers().hasFPModifiers();
590   }
591 
592   bool hasIntModifiers() const {
593     return getModifiers().hasIntModifiers();
594   }
595 
596   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
597 
598   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
599 
600   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
601 
602   template <unsigned Bitwidth>
603   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
604 
605   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
606     addKImmFPOperands<16>(Inst, N);
607   }
608 
609   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
610     addKImmFPOperands<32>(Inst, N);
611   }
612 
613   void addRegOperands(MCInst &Inst, unsigned N) const;
614 
615   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
616     if (isRegKind())
617       addRegOperands(Inst, N);
618     else if (isExpr())
619       Inst.addOperand(MCOperand::createExpr(Expr));
620     else
621       addImmOperands(Inst, N);
622   }
623 
624   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
625     Modifiers Mods = getModifiers();
626     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
627     if (isRegKind()) {
628       addRegOperands(Inst, N);
629     } else {
630       addImmOperands(Inst, N, false);
631     }
632   }
633 
634   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
635     assert(!hasIntModifiers());
636     addRegOrImmWithInputModsOperands(Inst, N);
637   }
638 
639   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
640     assert(!hasFPModifiers());
641     addRegOrImmWithInputModsOperands(Inst, N);
642   }
643 
644   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
645     Modifiers Mods = getModifiers();
646     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
647     assert(isRegKind());
648     addRegOperands(Inst, N);
649   }
650 
651   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
652     assert(!hasIntModifiers());
653     addRegWithInputModsOperands(Inst, N);
654   }
655 
656   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
657     assert(!hasFPModifiers());
658     addRegWithInputModsOperands(Inst, N);
659   }
660 
661   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
662     if (isImm())
663       addImmOperands(Inst, N);
664     else {
665       assert(isExpr());
666       Inst.addOperand(MCOperand::createExpr(Expr));
667     }
668   }
669 
670   static void printImmTy(raw_ostream& OS, ImmTy Type) {
671     switch (Type) {
672     case ImmTyNone: OS << "None"; break;
673     case ImmTyGDS: OS << "GDS"; break;
674     case ImmTyLDS: OS << "LDS"; break;
675     case ImmTyOffen: OS << "Offen"; break;
676     case ImmTyIdxen: OS << "Idxen"; break;
677     case ImmTyAddr64: OS << "Addr64"; break;
678     case ImmTyOffset: OS << "Offset"; break;
679     case ImmTyInstOffset: OS << "InstOffset"; break;
680     case ImmTyOffset0: OS << "Offset0"; break;
681     case ImmTyOffset1: OS << "Offset1"; break;
682     case ImmTyDLC: OS << "DLC"; break;
683     case ImmTyGLC: OS << "GLC"; break;
684     case ImmTySLC: OS << "SLC"; break;
685     case ImmTyTFE: OS << "TFE"; break;
686     case ImmTyD16: OS << "D16"; break;
687     case ImmTyFORMAT: OS << "FORMAT"; break;
688     case ImmTyClampSI: OS << "ClampSI"; break;
689     case ImmTyOModSI: OS << "OModSI"; break;
690     case ImmTyDppCtrl: OS << "DppCtrl"; break;
691     case ImmTyDppRowMask: OS << "DppRowMask"; break;
692     case ImmTyDppBankMask: OS << "DppBankMask"; break;
693     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
694     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
695     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
696     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
697     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
698     case ImmTyDMask: OS << "DMask"; break;
699     case ImmTyDim: OS << "Dim"; break;
700     case ImmTyUNorm: OS << "UNorm"; break;
701     case ImmTyDA: OS << "DA"; break;
702     case ImmTyR128A16: OS << "R128A16"; break;
703     case ImmTyLWE: OS << "LWE"; break;
704     case ImmTyOff: OS << "Off"; break;
705     case ImmTyExpTgt: OS << "ExpTgt"; break;
706     case ImmTyExpCompr: OS << "ExpCompr"; break;
707     case ImmTyExpVM: OS << "ExpVM"; break;
708     case ImmTyHwreg: OS << "Hwreg"; break;
709     case ImmTySendMsg: OS << "SendMsg"; break;
710     case ImmTyInterpSlot: OS << "InterpSlot"; break;
711     case ImmTyInterpAttr: OS << "InterpAttr"; break;
712     case ImmTyAttrChan: OS << "AttrChan"; break;
713     case ImmTyOpSel: OS << "OpSel"; break;
714     case ImmTyOpSelHi: OS << "OpSelHi"; break;
715     case ImmTyNegLo: OS << "NegLo"; break;
716     case ImmTyNegHi: OS << "NegHi"; break;
717     case ImmTySwizzle: OS << "Swizzle"; break;
718     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
719     case ImmTyHigh: OS << "High"; break;
720     case ImmTyEndpgm:
721       OS << "Endpgm";
722       break;
723     }
724   }
725 
726   void print(raw_ostream &OS) const override {
727     switch (Kind) {
728     case Register:
729       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
730       break;
731     case Immediate:
732       OS << '<' << getImm();
733       if (getImmTy() != ImmTyNone) {
734         OS << " type: "; printImmTy(OS, getImmTy());
735       }
736       OS << " mods: " << Imm.Mods << '>';
737       break;
738     case Token:
739       OS << '\'' << getToken() << '\'';
740       break;
741     case Expression:
742       OS << "<expr " << *Expr << '>';
743       break;
744     }
745   }
746 
747   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
748                                       int64_t Val, SMLoc Loc,
749                                       ImmTy Type = ImmTyNone,
750                                       bool IsFPImm = false) {
751     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
752     Op->Imm.Val = Val;
753     Op->Imm.IsFPImm = IsFPImm;
754     Op->Imm.Type = Type;
755     Op->Imm.Mods = Modifiers();
756     Op->StartLoc = Loc;
757     Op->EndLoc = Loc;
758     return Op;
759   }
760 
761   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
762                                         StringRef Str, SMLoc Loc,
763                                         bool HasExplicitEncodingSize = true) {
764     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
765     Res->Tok.Data = Str.data();
766     Res->Tok.Length = Str.size();
767     Res->StartLoc = Loc;
768     Res->EndLoc = Loc;
769     return Res;
770   }
771 
772   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
773                                       unsigned RegNo, SMLoc S,
774                                       SMLoc E) {
775     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
776     Op->Reg.RegNo = RegNo;
777     Op->Reg.Mods = Modifiers();
778     Op->StartLoc = S;
779     Op->EndLoc = E;
780     return Op;
781   }
782 
783   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
784                                        const class MCExpr *Expr, SMLoc S) {
785     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
786     Op->Expr = Expr;
787     Op->StartLoc = S;
788     Op->EndLoc = S;
789     return Op;
790   }
791 };
792 
793 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
794   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
795   return OS;
796 }
797 
798 //===----------------------------------------------------------------------===//
799 // AsmParser
800 //===----------------------------------------------------------------------===//
801 
802 // Holds info related to the current kernel, e.g. count of SGPRs used.
803 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
804 // .amdgpu_hsa_kernel or at EOF.
805 class KernelScopeInfo {
806   int SgprIndexUnusedMin = -1;
807   int VgprIndexUnusedMin = -1;
808   MCContext *Ctx = nullptr;
809 
810   void usesSgprAt(int i) {
811     if (i >= SgprIndexUnusedMin) {
812       SgprIndexUnusedMin = ++i;
813       if (Ctx) {
814         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
815         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
816       }
817     }
818   }
819 
820   void usesVgprAt(int i) {
821     if (i >= VgprIndexUnusedMin) {
822       VgprIndexUnusedMin = ++i;
823       if (Ctx) {
824         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
825         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
826       }
827     }
828   }
829 
830 public:
831   KernelScopeInfo() = default;
832 
833   void initialize(MCContext &Context) {
834     Ctx = &Context;
835     usesSgprAt(SgprIndexUnusedMin = -1);
836     usesVgprAt(VgprIndexUnusedMin = -1);
837   }
838 
839   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
840     switch (RegKind) {
841       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
842       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
843       default: break;
844     }
845   }
846 };
847 
848 class AMDGPUAsmParser : public MCTargetAsmParser {
849   MCAsmParser &Parser;
850 
851   // Number of extra operands parsed after the first optional operand.
852   // This may be necessary to skip hardcoded mandatory operands.
853   static const unsigned MAX_OPR_LOOKAHEAD = 8;
854 
855   unsigned ForcedEncodingSize = 0;
856   bool ForcedDPP = false;
857   bool ForcedSDWA = false;
858   KernelScopeInfo KernelScope;
859 
860   /// @name Auto-generated Match Functions
861   /// {
862 
863 #define GET_ASSEMBLER_HEADER
864 #include "AMDGPUGenAsmMatcher.inc"
865 
866   /// }
867 
868 private:
869   bool ParseAsAbsoluteExpression(uint32_t &Ret);
870   bool OutOfRangeError(SMRange Range);
871   /// Calculate VGPR/SGPR blocks required for given target, reserved
872   /// registers, and user-specified NextFreeXGPR values.
873   ///
874   /// \param Features [in] Target features, used for bug corrections.
875   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
876   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
877   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
878   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
879   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
880   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
881   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
882   /// \param VGPRBlocks [out] Result VGPR block count.
883   /// \param SGPRBlocks [out] Result SGPR block count.
884   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
885                           bool FlatScrUsed, bool XNACKUsed,
886                           unsigned NextFreeVGPR, SMRange VGPRRange,
887                           unsigned NextFreeSGPR, SMRange SGPRRange,
888                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
889   bool ParseDirectiveAMDGCNTarget();
890   bool ParseDirectiveAMDHSAKernel();
891   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
892   bool ParseDirectiveHSACodeObjectVersion();
893   bool ParseDirectiveHSACodeObjectISA();
894   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
895   bool ParseDirectiveAMDKernelCodeT();
896   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
897   bool ParseDirectiveAMDGPUHsaKernel();
898 
899   bool ParseDirectiveISAVersion();
900   bool ParseDirectiveHSAMetadata();
901   bool ParseDirectivePALMetadataBegin();
902   bool ParseDirectivePALMetadata();
903 
904   /// Common code to parse out a block of text (typically YAML) between start and
905   /// end directives.
906   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
907                            const char *AssemblerDirectiveEnd,
908                            std::string &CollectString);
909 
910   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
911                              RegisterKind RegKind, unsigned Reg1,
912                              unsigned RegNum);
913   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
914                            unsigned& RegNum, unsigned& RegWidth,
915                            unsigned *DwordRegIndex);
916   bool isRegister();
917   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
918   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
919   void initializeGprCountSymbol(RegisterKind RegKind);
920   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
921                              unsigned RegWidth);
922   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
923                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
924   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
925                  bool IsGdsHardcoded);
926 
927 public:
928   enum AMDGPUMatchResultTy {
929     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
930   };
931   enum OperandMode {
932     OperandMode_Default,
933     OperandMode_NSA,
934   };
935 
936   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
937 
938   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
939                const MCInstrInfo &MII,
940                const MCTargetOptions &Options)
941       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
942     MCAsmParserExtension::Initialize(Parser);
943 
944     if (getFeatureBits().none()) {
945       // Set default features.
946       copySTI().ToggleFeature("southern-islands");
947     }
948 
949     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
950 
951     {
952       // TODO: make those pre-defined variables read-only.
953       // Currently there is none suitable machinery in the core llvm-mc for this.
954       // MCSymbol::isRedefinable is intended for another purpose, and
955       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
956       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
957       MCContext &Ctx = getContext();
958       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
959         MCSymbol *Sym =
960             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
961         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
962         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
963         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
964         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
965         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
966       } else {
967         MCSymbol *Sym =
968             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
969         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
970         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
971         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
972         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
973         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
974       }
975       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
976         initializeGprCountSymbol(IS_VGPR);
977         initializeGprCountSymbol(IS_SGPR);
978       } else
979         KernelScope.initialize(getContext());
980     }
981   }
982 
983   bool hasXNACK() const {
984     return AMDGPU::hasXNACK(getSTI());
985   }
986 
987   bool hasMIMG_R128() const {
988     return AMDGPU::hasMIMG_R128(getSTI());
989   }
990 
991   bool hasPackedD16() const {
992     return AMDGPU::hasPackedD16(getSTI());
993   }
994 
995   bool isSI() const {
996     return AMDGPU::isSI(getSTI());
997   }
998 
999   bool isCI() const {
1000     return AMDGPU::isCI(getSTI());
1001   }
1002 
1003   bool isVI() const {
1004     return AMDGPU::isVI(getSTI());
1005   }
1006 
1007   bool isGFX9() const {
1008     return AMDGPU::isGFX9(getSTI());
1009   }
1010 
1011   bool isGFX10() const {
1012     return AMDGPU::isGFX10(getSTI());
1013   }
1014 
1015   bool hasInv2PiInlineImm() const {
1016     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1017   }
1018 
1019   bool hasFlatOffsets() const {
1020     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1021   }
1022 
1023   bool hasSGPR102_SGPR103() const {
1024     return !isVI() && !isGFX9();
1025   }
1026 
1027   bool hasSGPR104_SGPR105() const {
1028     return isGFX10();
1029   }
1030 
1031   bool hasIntClamp() const {
1032     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1033   }
1034 
1035   AMDGPUTargetStreamer &getTargetStreamer() {
1036     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1037     return static_cast<AMDGPUTargetStreamer &>(TS);
1038   }
1039 
1040   const MCRegisterInfo *getMRI() const {
1041     // We need this const_cast because for some reason getContext() is not const
1042     // in MCAsmParser.
1043     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1044   }
1045 
1046   const MCInstrInfo *getMII() const {
1047     return &MII;
1048   }
1049 
1050   const FeatureBitset &getFeatureBits() const {
1051     return getSTI().getFeatureBits();
1052   }
1053 
1054   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1055   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1056   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1057 
1058   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1059   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1060   bool isForcedDPP() const { return ForcedDPP; }
1061   bool isForcedSDWA() const { return ForcedSDWA; }
1062   ArrayRef<unsigned> getMatchedVariants() const;
1063 
1064   std::unique_ptr<AMDGPUOperand> parseRegister();
1065   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1066   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1067   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1068                                       unsigned Kind) override;
1069   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1070                                OperandVector &Operands, MCStreamer &Out,
1071                                uint64_t &ErrorInfo,
1072                                bool MatchingInlineAsm) override;
1073   bool ParseDirective(AsmToken DirectiveID) override;
1074   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1075                                     OperandMode Mode = OperandMode_Default);
1076   StringRef parseMnemonicSuffix(StringRef Name);
1077   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1078                         SMLoc NameLoc, OperandVector &Operands) override;
1079   //bool ProcessInstruction(MCInst &Inst);
1080 
1081   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1082 
1083   OperandMatchResultTy
1084   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1085                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1086                      bool (*ConvertResult)(int64_t &) = nullptr);
1087 
1088   OperandMatchResultTy
1089   parseOperandArrayWithPrefix(const char *Prefix,
1090                               OperandVector &Operands,
1091                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1092                               bool (*ConvertResult)(int64_t&) = nullptr);
1093 
1094   OperandMatchResultTy
1095   parseNamedBit(const char *Name, OperandVector &Operands,
1096                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1097   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1098                                              StringRef &Value);
1099 
1100   bool isModifier();
1101   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1102   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1103   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1104   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1105   bool parseSP3NegModifier();
1106   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1107   OperandMatchResultTy parseReg(OperandVector &Operands);
1108   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1109   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1110   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1111   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1112   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1113   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1114   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1115 
1116   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1117   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1118   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1119   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1120 
1121   bool parseCnt(int64_t &IntVal);
1122   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1123   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1124 
1125 private:
1126   struct OperandInfoTy {
1127     int64_t Id;
1128     bool IsSymbolic = false;
1129 
1130     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1131   };
1132 
1133   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1134   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1135 
1136   void errorExpTgt();
1137   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1138 
1139   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1140   bool validateSOPLiteral(const MCInst &Inst) const;
1141   bool validateConstantBusLimitations(const MCInst &Inst);
1142   bool validateEarlyClobberLimitations(const MCInst &Inst);
1143   bool validateIntClampSupported(const MCInst &Inst);
1144   bool validateMIMGAtomicDMask(const MCInst &Inst);
1145   bool validateMIMGGatherDMask(const MCInst &Inst);
1146   bool validateMIMGDataSize(const MCInst &Inst);
1147   bool validateMIMGAddrSize(const MCInst &Inst);
1148   bool validateMIMGD16(const MCInst &Inst);
1149   bool validateMIMGDim(const MCInst &Inst);
1150   bool validateLdsDirect(const MCInst &Inst);
1151   bool validateVOP3Literal(const MCInst &Inst) const;
1152   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1153   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1154   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1155 
1156   bool isId(const StringRef Id) const;
1157   bool isId(const AsmToken &Token, const StringRef Id) const;
1158   bool isToken(const AsmToken::TokenKind Kind) const;
1159   bool trySkipId(const StringRef Id);
1160   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1161   bool trySkipToken(const AsmToken::TokenKind Kind);
1162   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1163   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1164   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1165   AsmToken::TokenKind getTokenKind() const;
1166   bool parseExpr(int64_t &Imm);
1167   StringRef getTokenStr() const;
1168   AsmToken peekToken();
1169   AsmToken getToken() const;
1170   SMLoc getLoc() const;
1171   void lex();
1172 
1173 public:
1174   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1175   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1176 
1177   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1178   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1179   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1180   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1181   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1182 
1183   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1184                             const unsigned MinVal,
1185                             const unsigned MaxVal,
1186                             const StringRef ErrMsg);
1187   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1188   bool parseSwizzleOffset(int64_t &Imm);
1189   bool parseSwizzleMacro(int64_t &Imm);
1190   bool parseSwizzleQuadPerm(int64_t &Imm);
1191   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1192   bool parseSwizzleBroadcast(int64_t &Imm);
1193   bool parseSwizzleSwap(int64_t &Imm);
1194   bool parseSwizzleReverse(int64_t &Imm);
1195 
1196   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1197   int64_t parseGPRIdxMacro();
1198 
1199   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1200   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1201   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1202   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1203   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1204 
1205   AMDGPUOperand::Ptr defaultDLC() const;
1206   AMDGPUOperand::Ptr defaultGLC() const;
1207   AMDGPUOperand::Ptr defaultSLC() const;
1208 
1209   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1210   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1211   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1212   AMDGPUOperand::Ptr defaultOffsetU12() const;
1213   AMDGPUOperand::Ptr defaultOffsetS13() const;
1214 
1215   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1216 
1217   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1218                OptionalImmIndexMap &OptionalIdx);
1219   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1220   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1221   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1222 
1223   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1224 
1225   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1226                bool IsAtomic = false);
1227   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1228 
1229   OperandMatchResultTy parseDim(OperandVector &Operands);
1230   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1231   AMDGPUOperand::Ptr defaultRowMask() const;
1232   AMDGPUOperand::Ptr defaultBankMask() const;
1233   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1234   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1235 
1236   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1237                                     AMDGPUOperand::ImmTy Type);
1238   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1239   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1240   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1241   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1242   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1243   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1244                 uint64_t BasicInstType, bool skipVcc = false);
1245 
1246   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1247   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1248 };
1249 
1250 struct OptionalOperand {
1251   const char *Name;
1252   AMDGPUOperand::ImmTy Type;
1253   bool IsBit;
1254   bool (*ConvertResult)(int64_t&);
1255 };
1256 
1257 } // end anonymous namespace
1258 
1259 // May be called with integer type with equivalent bitwidth.
1260 static const fltSemantics *getFltSemantics(unsigned Size) {
1261   switch (Size) {
1262   case 4:
1263     return &APFloat::IEEEsingle();
1264   case 8:
1265     return &APFloat::IEEEdouble();
1266   case 2:
1267     return &APFloat::IEEEhalf();
1268   default:
1269     llvm_unreachable("unsupported fp type");
1270   }
1271 }
1272 
1273 static const fltSemantics *getFltSemantics(MVT VT) {
1274   return getFltSemantics(VT.getSizeInBits() / 8);
1275 }
1276 
1277 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1278   switch (OperandType) {
1279   case AMDGPU::OPERAND_REG_IMM_INT32:
1280   case AMDGPU::OPERAND_REG_IMM_FP32:
1281   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1282   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1283     return &APFloat::IEEEsingle();
1284   case AMDGPU::OPERAND_REG_IMM_INT64:
1285   case AMDGPU::OPERAND_REG_IMM_FP64:
1286   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1287   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1288     return &APFloat::IEEEdouble();
1289   case AMDGPU::OPERAND_REG_IMM_INT16:
1290   case AMDGPU::OPERAND_REG_IMM_FP16:
1291   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1292   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1293   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1294   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1295   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1296   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1297     return &APFloat::IEEEhalf();
1298   default:
1299     llvm_unreachable("unsupported fp type");
1300   }
1301 }
1302 
1303 //===----------------------------------------------------------------------===//
1304 // Operand
1305 //===----------------------------------------------------------------------===//
1306 
1307 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1308   bool Lost;
1309 
1310   // Convert literal to single precision
1311   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1312                                                APFloat::rmNearestTiesToEven,
1313                                                &Lost);
1314   // We allow precision lost but not overflow or underflow
1315   if (Status != APFloat::opOK &&
1316       Lost &&
1317       ((Status & APFloat::opOverflow)  != 0 ||
1318        (Status & APFloat::opUnderflow) != 0)) {
1319     return false;
1320   }
1321 
1322   return true;
1323 }
1324 
1325 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1326   return isUIntN(Size, Val) || isIntN(Size, Val);
1327 }
1328 
1329 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1330 
1331   // This is a hack to enable named inline values like
1332   // shared_base with both 32-bit and 64-bit operands.
1333   // Note that these values are defined as
1334   // 32-bit operands only.
1335   if (isInlineValue()) {
1336     return true;
1337   }
1338 
1339   if (!isImmTy(ImmTyNone)) {
1340     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1341     return false;
1342   }
1343   // TODO: We should avoid using host float here. It would be better to
1344   // check the float bit values which is what a few other places do.
1345   // We've had bot failures before due to weird NaN support on mips hosts.
1346 
1347   APInt Literal(64, Imm.Val);
1348 
1349   if (Imm.IsFPImm) { // We got fp literal token
1350     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1351       return AMDGPU::isInlinableLiteral64(Imm.Val,
1352                                           AsmParser->hasInv2PiInlineImm());
1353     }
1354 
1355     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1356     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1357       return false;
1358 
1359     if (type.getScalarSizeInBits() == 16) {
1360       return AMDGPU::isInlinableLiteral16(
1361         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1362         AsmParser->hasInv2PiInlineImm());
1363     }
1364 
1365     // Check if single precision literal is inlinable
1366     return AMDGPU::isInlinableLiteral32(
1367       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1368       AsmParser->hasInv2PiInlineImm());
1369   }
1370 
1371   // We got int literal token.
1372   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1373     return AMDGPU::isInlinableLiteral64(Imm.Val,
1374                                         AsmParser->hasInv2PiInlineImm());
1375   }
1376 
1377   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1378     return false;
1379   }
1380 
1381   if (type.getScalarSizeInBits() == 16) {
1382     return AMDGPU::isInlinableLiteral16(
1383       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1384       AsmParser->hasInv2PiInlineImm());
1385   }
1386 
1387   return AMDGPU::isInlinableLiteral32(
1388     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1389     AsmParser->hasInv2PiInlineImm());
1390 }
1391 
1392 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1393   // Check that this immediate can be added as literal
1394   if (!isImmTy(ImmTyNone)) {
1395     return false;
1396   }
1397 
1398   if (!Imm.IsFPImm) {
1399     // We got int literal token.
1400 
1401     if (type == MVT::f64 && hasFPModifiers()) {
1402       // Cannot apply fp modifiers to int literals preserving the same semantics
1403       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1404       // disable these cases.
1405       return false;
1406     }
1407 
1408     unsigned Size = type.getSizeInBits();
1409     if (Size == 64)
1410       Size = 32;
1411 
1412     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1413     // types.
1414     return isSafeTruncation(Imm.Val, Size);
1415   }
1416 
1417   // We got fp literal token
1418   if (type == MVT::f64) { // Expected 64-bit fp operand
1419     // We would set low 64-bits of literal to zeroes but we accept this literals
1420     return true;
1421   }
1422 
1423   if (type == MVT::i64) { // Expected 64-bit int operand
1424     // We don't allow fp literals in 64-bit integer instructions. It is
1425     // unclear how we should encode them.
1426     return false;
1427   }
1428 
1429   // We allow fp literals with f16x2 operands assuming that the specified
1430   // literal goes into the lower half and the upper half is zero. We also
1431   // require that the literal may be losslesly converted to f16.
1432   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1433                      (type == MVT::v2i16)? MVT::i16 : type;
1434 
1435   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1436   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1437 }
1438 
1439 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1440   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1441 }
1442 
1443 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1444   if (AsmParser->isVI())
1445     return isVReg32();
1446   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1447     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1448   else
1449     return false;
1450 }
1451 
1452 bool AMDGPUOperand::isSDWAFP16Operand() const {
1453   return isSDWAOperand(MVT::f16);
1454 }
1455 
1456 bool AMDGPUOperand::isSDWAFP32Operand() const {
1457   return isSDWAOperand(MVT::f32);
1458 }
1459 
1460 bool AMDGPUOperand::isSDWAInt16Operand() const {
1461   return isSDWAOperand(MVT::i16);
1462 }
1463 
1464 bool AMDGPUOperand::isSDWAInt32Operand() const {
1465   return isSDWAOperand(MVT::i32);
1466 }
1467 
1468 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1469 {
1470   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1471   assert(Size == 2 || Size == 4 || Size == 8);
1472 
1473   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1474 
1475   if (Imm.Mods.Abs) {
1476     Val &= ~FpSignMask;
1477   }
1478   if (Imm.Mods.Neg) {
1479     Val ^= FpSignMask;
1480   }
1481 
1482   return Val;
1483 }
1484 
1485 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1486   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1487                              Inst.getNumOperands())) {
1488     addLiteralImmOperand(Inst, Imm.Val,
1489                          ApplyModifiers &
1490                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1491   } else {
1492     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1493     Inst.addOperand(MCOperand::createImm(Imm.Val));
1494   }
1495 }
1496 
1497 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1498   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1499   auto OpNum = Inst.getNumOperands();
1500   // Check that this operand accepts literals
1501   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1502 
1503   if (ApplyModifiers) {
1504     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1505     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1506     Val = applyInputFPModifiers(Val, Size);
1507   }
1508 
1509   APInt Literal(64, Val);
1510   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1511 
1512   if (Imm.IsFPImm) { // We got fp literal token
1513     switch (OpTy) {
1514     case AMDGPU::OPERAND_REG_IMM_INT64:
1515     case AMDGPU::OPERAND_REG_IMM_FP64:
1516     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1517     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1518       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1519                                        AsmParser->hasInv2PiInlineImm())) {
1520         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1521         return;
1522       }
1523 
1524       // Non-inlineable
1525       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1526         // For fp operands we check if low 32 bits are zeros
1527         if (Literal.getLoBits(32) != 0) {
1528           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1529           "Can't encode literal as exact 64-bit floating-point operand. "
1530           "Low 32-bits will be set to zero");
1531         }
1532 
1533         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1534         return;
1535       }
1536 
1537       // We don't allow fp literals in 64-bit integer instructions. It is
1538       // unclear how we should encode them. This case should be checked earlier
1539       // in predicate methods (isLiteralImm())
1540       llvm_unreachable("fp literal in 64-bit integer instruction.");
1541 
1542     case AMDGPU::OPERAND_REG_IMM_INT32:
1543     case AMDGPU::OPERAND_REG_IMM_FP32:
1544     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1545     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1546     case AMDGPU::OPERAND_REG_IMM_INT16:
1547     case AMDGPU::OPERAND_REG_IMM_FP16:
1548     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1549     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1550     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1551     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1552     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1553     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1554       bool lost;
1555       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1556       // Convert literal to single precision
1557       FPLiteral.convert(*getOpFltSemantics(OpTy),
1558                         APFloat::rmNearestTiesToEven, &lost);
1559       // We allow precision lost but not overflow or underflow. This should be
1560       // checked earlier in isLiteralImm()
1561 
1562       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1563       Inst.addOperand(MCOperand::createImm(ImmVal));
1564       return;
1565     }
1566     default:
1567       llvm_unreachable("invalid operand size");
1568     }
1569 
1570     return;
1571   }
1572 
1573   // We got int literal token.
1574   // Only sign extend inline immediates.
1575   switch (OpTy) {
1576   case AMDGPU::OPERAND_REG_IMM_INT32:
1577   case AMDGPU::OPERAND_REG_IMM_FP32:
1578   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1579   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1580   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1581   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1582     if (isSafeTruncation(Val, 32) &&
1583         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1584                                      AsmParser->hasInv2PiInlineImm())) {
1585       Inst.addOperand(MCOperand::createImm(Val));
1586       return;
1587     }
1588 
1589     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1590     return;
1591 
1592   case AMDGPU::OPERAND_REG_IMM_INT64:
1593   case AMDGPU::OPERAND_REG_IMM_FP64:
1594   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1595   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1596     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1597       Inst.addOperand(MCOperand::createImm(Val));
1598       return;
1599     }
1600 
1601     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1602     return;
1603 
1604   case AMDGPU::OPERAND_REG_IMM_INT16:
1605   case AMDGPU::OPERAND_REG_IMM_FP16:
1606   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1607   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1608     if (isSafeTruncation(Val, 16) &&
1609         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1610                                      AsmParser->hasInv2PiInlineImm())) {
1611       Inst.addOperand(MCOperand::createImm(Val));
1612       return;
1613     }
1614 
1615     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1616     return;
1617 
1618   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1619   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1620     assert(isSafeTruncation(Val, 16));
1621     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1622                                         AsmParser->hasInv2PiInlineImm()));
1623 
1624     Inst.addOperand(MCOperand::createImm(Val));
1625     return;
1626   }
1627   default:
1628     llvm_unreachable("invalid operand size");
1629   }
1630 }
1631 
1632 template <unsigned Bitwidth>
1633 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1634   APInt Literal(64, Imm.Val);
1635 
1636   if (!Imm.IsFPImm) {
1637     // We got int literal token.
1638     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1639     return;
1640   }
1641 
1642   bool Lost;
1643   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1644   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1645                     APFloat::rmNearestTiesToEven, &Lost);
1646   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1647 }
1648 
1649 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1650   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1651 }
1652 
1653 static bool isInlineValue(unsigned Reg) {
1654   switch (Reg) {
1655   case AMDGPU::SRC_SHARED_BASE:
1656   case AMDGPU::SRC_SHARED_LIMIT:
1657   case AMDGPU::SRC_PRIVATE_BASE:
1658   case AMDGPU::SRC_PRIVATE_LIMIT:
1659   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1660     return true;
1661   default:
1662     return false;
1663   }
1664 }
1665 
1666 bool AMDGPUOperand::isInlineValue() const {
1667   return isRegKind() && ::isInlineValue(getReg());
1668 }
1669 
1670 //===----------------------------------------------------------------------===//
1671 // AsmParser
1672 //===----------------------------------------------------------------------===//
1673 
1674 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1675   if (Is == IS_VGPR) {
1676     switch (RegWidth) {
1677       default: return -1;
1678       case 1: return AMDGPU::VGPR_32RegClassID;
1679       case 2: return AMDGPU::VReg_64RegClassID;
1680       case 3: return AMDGPU::VReg_96RegClassID;
1681       case 4: return AMDGPU::VReg_128RegClassID;
1682       case 8: return AMDGPU::VReg_256RegClassID;
1683       case 16: return AMDGPU::VReg_512RegClassID;
1684     }
1685   } else if (Is == IS_TTMP) {
1686     switch (RegWidth) {
1687       default: return -1;
1688       case 1: return AMDGPU::TTMP_32RegClassID;
1689       case 2: return AMDGPU::TTMP_64RegClassID;
1690       case 4: return AMDGPU::TTMP_128RegClassID;
1691       case 8: return AMDGPU::TTMP_256RegClassID;
1692       case 16: return AMDGPU::TTMP_512RegClassID;
1693     }
1694   } else if (Is == IS_SGPR) {
1695     switch (RegWidth) {
1696       default: return -1;
1697       case 1: return AMDGPU::SGPR_32RegClassID;
1698       case 2: return AMDGPU::SGPR_64RegClassID;
1699       case 4: return AMDGPU::SGPR_128RegClassID;
1700       case 8: return AMDGPU::SGPR_256RegClassID;
1701       case 16: return AMDGPU::SGPR_512RegClassID;
1702     }
1703   }
1704   return -1;
1705 }
1706 
1707 static unsigned getSpecialRegForName(StringRef RegName) {
1708   return StringSwitch<unsigned>(RegName)
1709     .Case("exec", AMDGPU::EXEC)
1710     .Case("vcc", AMDGPU::VCC)
1711     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1712     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1713     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1714     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1715     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1716     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1717     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1718     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1719     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1720     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1721     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1722     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1723     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1724     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1725     .Case("m0", AMDGPU::M0)
1726     .Case("scc", AMDGPU::SCC)
1727     .Case("tba", AMDGPU::TBA)
1728     .Case("tma", AMDGPU::TMA)
1729     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1730     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1731     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1732     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1733     .Case("vcc_lo", AMDGPU::VCC_LO)
1734     .Case("vcc_hi", AMDGPU::VCC_HI)
1735     .Case("exec_lo", AMDGPU::EXEC_LO)
1736     .Case("exec_hi", AMDGPU::EXEC_HI)
1737     .Case("tma_lo", AMDGPU::TMA_LO)
1738     .Case("tma_hi", AMDGPU::TMA_HI)
1739     .Case("tba_lo", AMDGPU::TBA_LO)
1740     .Case("tba_hi", AMDGPU::TBA_HI)
1741     .Case("null", AMDGPU::SGPR_NULL)
1742     .Default(0);
1743 }
1744 
1745 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1746                                     SMLoc &EndLoc) {
1747   auto R = parseRegister();
1748   if (!R) return true;
1749   assert(R->isReg());
1750   RegNo = R->getReg();
1751   StartLoc = R->getStartLoc();
1752   EndLoc = R->getEndLoc();
1753   return false;
1754 }
1755 
1756 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1757                                             RegisterKind RegKind, unsigned Reg1,
1758                                             unsigned RegNum) {
1759   switch (RegKind) {
1760   case IS_SPECIAL:
1761     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1762       Reg = AMDGPU::EXEC;
1763       RegWidth = 2;
1764       return true;
1765     }
1766     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1767       Reg = AMDGPU::FLAT_SCR;
1768       RegWidth = 2;
1769       return true;
1770     }
1771     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1772       Reg = AMDGPU::XNACK_MASK;
1773       RegWidth = 2;
1774       return true;
1775     }
1776     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1777       Reg = AMDGPU::VCC;
1778       RegWidth = 2;
1779       return true;
1780     }
1781     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1782       Reg = AMDGPU::TBA;
1783       RegWidth = 2;
1784       return true;
1785     }
1786     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1787       Reg = AMDGPU::TMA;
1788       RegWidth = 2;
1789       return true;
1790     }
1791     return false;
1792   case IS_VGPR:
1793   case IS_SGPR:
1794   case IS_TTMP:
1795     if (Reg1 != Reg + RegWidth) {
1796       return false;
1797     }
1798     RegWidth++;
1799     return true;
1800   default:
1801     llvm_unreachable("unexpected register kind");
1802   }
1803 }
1804 
1805 static const StringRef Registers[] = {
1806   { "v" },
1807   { "s" },
1808   { "ttmp" },
1809 };
1810 
1811 bool
1812 AMDGPUAsmParser::isRegister(const AsmToken &Token,
1813                             const AsmToken &NextToken) const {
1814 
1815   // A list of consecutive registers: [s0,s1,s2,s3]
1816   if (Token.is(AsmToken::LBrac))
1817     return true;
1818 
1819   if (!Token.is(AsmToken::Identifier))
1820     return false;
1821 
1822   // A single register like s0 or a range of registers like s[0:1]
1823 
1824   StringRef RegName = Token.getString();
1825 
1826   for (StringRef Reg : Registers) {
1827     if (RegName.startswith(Reg)) {
1828       if (Reg.size() < RegName.size()) {
1829         unsigned RegNum;
1830         // A single register with an index: rXX
1831         if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
1832           return true;
1833       } else {
1834         // A range of registers: r[XX:YY].
1835         if (NextToken.is(AsmToken::LBrac))
1836           return true;
1837       }
1838     }
1839   }
1840 
1841   return getSpecialRegForName(RegName);
1842 }
1843 
1844 bool
1845 AMDGPUAsmParser::isRegister()
1846 {
1847   return isRegister(getToken(), peekToken());
1848 }
1849 
1850 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1851                                           unsigned &RegNum, unsigned &RegWidth,
1852                                           unsigned *DwordRegIndex) {
1853   if (DwordRegIndex) { *DwordRegIndex = 0; }
1854   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1855   if (getLexer().is(AsmToken::Identifier)) {
1856     StringRef RegName = Parser.getTok().getString();
1857     if ((Reg = getSpecialRegForName(RegName))) {
1858       Parser.Lex();
1859       RegKind = IS_SPECIAL;
1860     } else {
1861       unsigned RegNumIndex = 0;
1862       if (RegName[0] == 'v') {
1863         RegNumIndex = 1;
1864         RegKind = IS_VGPR;
1865       } else if (RegName[0] == 's') {
1866         RegNumIndex = 1;
1867         RegKind = IS_SGPR;
1868       } else if (RegName.startswith("ttmp")) {
1869         RegNumIndex = strlen("ttmp");
1870         RegKind = IS_TTMP;
1871       } else {
1872         return false;
1873       }
1874       if (RegName.size() > RegNumIndex) {
1875         // Single 32-bit register: vXX.
1876         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1877           return false;
1878         Parser.Lex();
1879         RegWidth = 1;
1880       } else {
1881         // Range of registers: v[XX:YY]. ":YY" is optional.
1882         Parser.Lex();
1883         int64_t RegLo, RegHi;
1884         if (getLexer().isNot(AsmToken::LBrac))
1885           return false;
1886         Parser.Lex();
1887 
1888         if (getParser().parseAbsoluteExpression(RegLo))
1889           return false;
1890 
1891         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1892         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1893           return false;
1894         Parser.Lex();
1895 
1896         if (isRBrace) {
1897           RegHi = RegLo;
1898         } else {
1899           if (getParser().parseAbsoluteExpression(RegHi))
1900             return false;
1901 
1902           if (getLexer().isNot(AsmToken::RBrac))
1903             return false;
1904           Parser.Lex();
1905         }
1906         RegNum = (unsigned) RegLo;
1907         RegWidth = (RegHi - RegLo) + 1;
1908       }
1909     }
1910   } else if (getLexer().is(AsmToken::LBrac)) {
1911     // List of consecutive registers: [s0,s1,s2,s3]
1912     Parser.Lex();
1913     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1914       return false;
1915     if (RegWidth != 1)
1916       return false;
1917     RegisterKind RegKind1;
1918     unsigned Reg1, RegNum1, RegWidth1;
1919     do {
1920       if (getLexer().is(AsmToken::Comma)) {
1921         Parser.Lex();
1922       } else if (getLexer().is(AsmToken::RBrac)) {
1923         Parser.Lex();
1924         break;
1925       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1926         if (RegWidth1 != 1) {
1927           return false;
1928         }
1929         if (RegKind1 != RegKind) {
1930           return false;
1931         }
1932         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1933           return false;
1934         }
1935       } else {
1936         return false;
1937       }
1938     } while (true);
1939   } else {
1940     return false;
1941   }
1942   switch (RegKind) {
1943   case IS_SPECIAL:
1944     RegNum = 0;
1945     RegWidth = 1;
1946     break;
1947   case IS_VGPR:
1948   case IS_SGPR:
1949   case IS_TTMP:
1950   {
1951     unsigned Size = 1;
1952     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1953       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1954       Size = std::min(RegWidth, 4u);
1955     }
1956     if (RegNum % Size != 0)
1957       return false;
1958     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1959     RegNum = RegNum / Size;
1960     int RCID = getRegClass(RegKind, RegWidth);
1961     if (RCID == -1)
1962       return false;
1963     const MCRegisterClass RC = TRI->getRegClass(RCID);
1964     if (RegNum >= RC.getNumRegs())
1965       return false;
1966     Reg = RC.getRegister(RegNum);
1967     break;
1968   }
1969 
1970   default:
1971     llvm_unreachable("unexpected register kind");
1972   }
1973 
1974   if (!subtargetHasRegister(*TRI, Reg))
1975     return false;
1976   return true;
1977 }
1978 
1979 Optional<StringRef>
1980 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
1981   switch (RegKind) {
1982   case IS_VGPR:
1983     return StringRef(".amdgcn.next_free_vgpr");
1984   case IS_SGPR:
1985     return StringRef(".amdgcn.next_free_sgpr");
1986   default:
1987     return None;
1988   }
1989 }
1990 
1991 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
1992   auto SymbolName = getGprCountSymbolName(RegKind);
1993   assert(SymbolName && "initializing invalid register kind");
1994   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1995   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
1996 }
1997 
1998 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
1999                                             unsigned DwordRegIndex,
2000                                             unsigned RegWidth) {
2001   // Symbols are only defined for GCN targets
2002   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2003     return true;
2004 
2005   auto SymbolName = getGprCountSymbolName(RegKind);
2006   if (!SymbolName)
2007     return true;
2008   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2009 
2010   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2011   int64_t OldCount;
2012 
2013   if (!Sym->isVariable())
2014     return !Error(getParser().getTok().getLoc(),
2015                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2016   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2017     return !Error(
2018         getParser().getTok().getLoc(),
2019         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2020 
2021   if (OldCount <= NewMax)
2022     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2023 
2024   return true;
2025 }
2026 
2027 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2028   const auto &Tok = Parser.getTok();
2029   SMLoc StartLoc = Tok.getLoc();
2030   SMLoc EndLoc = Tok.getEndLoc();
2031   RegisterKind RegKind;
2032   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2033 
2034   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2035     //FIXME: improve error messages (bug 41303).
2036     Error(StartLoc, "not a valid operand.");
2037     return nullptr;
2038   }
2039   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2040     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2041       return nullptr;
2042   } else
2043     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2044   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2045 }
2046 
2047 OperandMatchResultTy
2048 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2049   // TODO: add syntactic sugar for 1/(2*PI)
2050 
2051   assert(!isRegister());
2052   assert(!isModifier());
2053 
2054   const auto& Tok = getToken();
2055   const auto& NextTok = peekToken();
2056   bool IsReal = Tok.is(AsmToken::Real);
2057   SMLoc S = getLoc();
2058   bool Negate = false;
2059 
2060   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2061     lex();
2062     IsReal = true;
2063     Negate = true;
2064   }
2065 
2066   if (IsReal) {
2067     // Floating-point expressions are not supported.
2068     // Can only allow floating-point literals with an
2069     // optional sign.
2070 
2071     StringRef Num = getTokenStr();
2072     lex();
2073 
2074     APFloat RealVal(APFloat::IEEEdouble());
2075     auto roundMode = APFloat::rmNearestTiesToEven;
2076     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2077       return MatchOperand_ParseFail;
2078     }
2079     if (Negate)
2080       RealVal.changeSign();
2081 
2082     Operands.push_back(
2083       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2084                                AMDGPUOperand::ImmTyNone, true));
2085 
2086     return MatchOperand_Success;
2087 
2088   } else {
2089     int64_t IntVal;
2090     const MCExpr *Expr;
2091     SMLoc S = getLoc();
2092 
2093     if (HasSP3AbsModifier) {
2094       // This is a workaround for handling expressions
2095       // as arguments of SP3 'abs' modifier, for example:
2096       //     |1.0|
2097       //     |-1|
2098       //     |1+x|
2099       // This syntax is not compatible with syntax of standard
2100       // MC expressions (due to the trailing '|').
2101       SMLoc EndLoc;
2102       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2103         return MatchOperand_ParseFail;
2104     } else {
2105       if (Parser.parseExpression(Expr))
2106         return MatchOperand_ParseFail;
2107     }
2108 
2109     if (Expr->evaluateAsAbsolute(IntVal)) {
2110       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2111     } else {
2112       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2113     }
2114 
2115     return MatchOperand_Success;
2116   }
2117 
2118   return MatchOperand_NoMatch;
2119 }
2120 
2121 OperandMatchResultTy
2122 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2123   if (!isRegister())
2124     return MatchOperand_NoMatch;
2125 
2126   if (auto R = parseRegister()) {
2127     assert(R->isReg());
2128     Operands.push_back(std::move(R));
2129     return MatchOperand_Success;
2130   }
2131   return MatchOperand_ParseFail;
2132 }
2133 
2134 OperandMatchResultTy
2135 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2136   auto res = parseReg(Operands);
2137   if (res != MatchOperand_NoMatch) {
2138     return res;
2139   } else if (isModifier()) {
2140     return MatchOperand_NoMatch;
2141   } else {
2142     return parseImm(Operands, HasSP3AbsMod);
2143   }
2144 }
2145 
2146 bool
2147 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2148   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2149     const auto &str = Token.getString();
2150     return str == "abs" || str == "neg" || str == "sext";
2151   }
2152   return false;
2153 }
2154 
2155 bool
2156 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2157   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2158 }
2159 
2160 bool
2161 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2162   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2163 }
2164 
2165 bool
2166 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2167   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2168 }
2169 
2170 // Check if this is an operand modifier or an opcode modifier
2171 // which may look like an expression but it is not. We should
2172 // avoid parsing these modifiers as expressions. Currently
2173 // recognized sequences are:
2174 //   |...|
2175 //   abs(...)
2176 //   neg(...)
2177 //   sext(...)
2178 //   -reg
2179 //   -|...|
2180 //   -abs(...)
2181 //   name:...
2182 // Note that simple opcode modifiers like 'gds' may be parsed as
2183 // expressions; this is a special case. See getExpressionAsToken.
2184 //
2185 bool
2186 AMDGPUAsmParser::isModifier() {
2187 
2188   AsmToken Tok = getToken();
2189   AsmToken NextToken[2];
2190   peekTokens(NextToken);
2191 
2192   return isOperandModifier(Tok, NextToken[0]) ||
2193          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2194          isOpcodeModifierWithVal(Tok, NextToken[0]);
2195 }
2196 
2197 // Check if the current token is an SP3 'neg' modifier.
2198 // Currently this modifier is allowed in the following context:
2199 //
2200 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2201 // 2. Before an 'abs' modifier: -abs(...)
2202 // 3. Before an SP3 'abs' modifier: -|...|
2203 //
2204 // In all other cases "-" is handled as a part
2205 // of an expression that follows the sign.
2206 //
2207 // Note: When "-" is followed by an integer literal,
2208 // this is interpreted as integer negation rather
2209 // than a floating-point NEG modifier applied to N.
2210 // Beside being contr-intuitive, such use of floating-point
2211 // NEG modifier would have resulted in different meaning
2212 // of integer literals used with VOP1/2/C and VOP3,
2213 // for example:
2214 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2215 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2216 // Negative fp literals with preceding "-" are
2217 // handled likewise for unifomtity
2218 //
2219 bool
2220 AMDGPUAsmParser::parseSP3NegModifier() {
2221 
2222   AsmToken NextToken[2];
2223   peekTokens(NextToken);
2224 
2225   if (isToken(AsmToken::Minus) &&
2226       (isRegister(NextToken[0], NextToken[1]) ||
2227        NextToken[0].is(AsmToken::Pipe) ||
2228        isId(NextToken[0], "abs"))) {
2229     lex();
2230     return true;
2231   }
2232 
2233   return false;
2234 }
2235 
2236 OperandMatchResultTy
2237 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2238                                               bool AllowImm) {
2239   bool Neg, SP3Neg;
2240   bool Abs, SP3Abs;
2241   SMLoc Loc;
2242 
2243   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2244   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2245     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2246     return MatchOperand_ParseFail;
2247   }
2248 
2249   SP3Neg = parseSP3NegModifier();
2250 
2251   Loc = getLoc();
2252   Neg = trySkipId("neg");
2253   if (Neg && SP3Neg) {
2254     Error(Loc, "expected register or immediate");
2255     return MatchOperand_ParseFail;
2256   }
2257   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2258     return MatchOperand_ParseFail;
2259 
2260   Abs = trySkipId("abs");
2261   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2262     return MatchOperand_ParseFail;
2263 
2264   Loc = getLoc();
2265   SP3Abs = trySkipToken(AsmToken::Pipe);
2266   if (Abs && SP3Abs) {
2267     Error(Loc, "expected register or immediate");
2268     return MatchOperand_ParseFail;
2269   }
2270 
2271   OperandMatchResultTy Res;
2272   if (AllowImm) {
2273     Res = parseRegOrImm(Operands, SP3Abs);
2274   } else {
2275     Res = parseReg(Operands);
2276   }
2277   if (Res != MatchOperand_Success) {
2278     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2279   }
2280 
2281   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2282     return MatchOperand_ParseFail;
2283   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2284     return MatchOperand_ParseFail;
2285   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2286     return MatchOperand_ParseFail;
2287 
2288   AMDGPUOperand::Modifiers Mods;
2289   Mods.Abs = Abs || SP3Abs;
2290   Mods.Neg = Neg || SP3Neg;
2291 
2292   if (Mods.hasFPModifiers()) {
2293     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2294     if (Op.isExpr()) {
2295       Error(Op.getStartLoc(), "expected an absolute expression");
2296       return MatchOperand_ParseFail;
2297     }
2298     Op.setModifiers(Mods);
2299   }
2300   return MatchOperand_Success;
2301 }
2302 
2303 OperandMatchResultTy
2304 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2305                                                bool AllowImm) {
2306   bool Sext = trySkipId("sext");
2307   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2308     return MatchOperand_ParseFail;
2309 
2310   OperandMatchResultTy Res;
2311   if (AllowImm) {
2312     Res = parseRegOrImm(Operands);
2313   } else {
2314     Res = parseReg(Operands);
2315   }
2316   if (Res != MatchOperand_Success) {
2317     return Sext? MatchOperand_ParseFail : Res;
2318   }
2319 
2320   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2321     return MatchOperand_ParseFail;
2322 
2323   AMDGPUOperand::Modifiers Mods;
2324   Mods.Sext = Sext;
2325 
2326   if (Mods.hasIntModifiers()) {
2327     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2328     if (Op.isExpr()) {
2329       Error(Op.getStartLoc(), "expected an absolute expression");
2330       return MatchOperand_ParseFail;
2331     }
2332     Op.setModifiers(Mods);
2333   }
2334 
2335   return MatchOperand_Success;
2336 }
2337 
2338 OperandMatchResultTy
2339 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2340   return parseRegOrImmWithFPInputMods(Operands, false);
2341 }
2342 
2343 OperandMatchResultTy
2344 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2345   return parseRegOrImmWithIntInputMods(Operands, false);
2346 }
2347 
2348 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2349   auto Loc = getLoc();
2350   if (trySkipId("off")) {
2351     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2352                                                 AMDGPUOperand::ImmTyOff, false));
2353     return MatchOperand_Success;
2354   }
2355 
2356   if (!isRegister())
2357     return MatchOperand_NoMatch;
2358 
2359   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2360   if (Reg) {
2361     Operands.push_back(std::move(Reg));
2362     return MatchOperand_Success;
2363   }
2364 
2365   return MatchOperand_ParseFail;
2366 
2367 }
2368 
2369 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2370   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2371 
2372   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2373       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2374       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2375       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2376     return Match_InvalidOperand;
2377 
2378   if ((TSFlags & SIInstrFlags::VOP3) &&
2379       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2380       getForcedEncodingSize() != 64)
2381     return Match_PreferE32;
2382 
2383   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2384       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2385     // v_mac_f32/16 allow only dst_sel == DWORD;
2386     auto OpNum =
2387         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2388     const auto &Op = Inst.getOperand(OpNum);
2389     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2390       return Match_InvalidOperand;
2391     }
2392   }
2393 
2394   if (TSFlags & SIInstrFlags::FLAT) {
2395     // FIXME: Produces error without correct column reported.
2396     auto Opcode = Inst.getOpcode();
2397     auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
2398 
2399     const auto &Op = Inst.getOperand(OpNum);
2400     if (!hasFlatOffsets() && Op.getImm() != 0)
2401       return Match_InvalidOperand;
2402 
2403     // GFX10: Address offset is 12-bit signed byte offset. Must be positive for
2404     // FLAT segment. For FLAT segment MSB is ignored and forced to zero.
2405     if (isGFX10()) {
2406       if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
2407         if (!isInt<12>(Op.getImm()))
2408           return Match_InvalidOperand;
2409       } else {
2410         if (!isUInt<11>(Op.getImm()))
2411           return Match_InvalidOperand;
2412       }
2413     }
2414   }
2415 
2416   return Match_Success;
2417 }
2418 
2419 // What asm variants we should check
2420 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2421   if (getForcedEncodingSize() == 32) {
2422     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2423     return makeArrayRef(Variants);
2424   }
2425 
2426   if (isForcedVOP3()) {
2427     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2428     return makeArrayRef(Variants);
2429   }
2430 
2431   if (isForcedSDWA()) {
2432     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2433                                         AMDGPUAsmVariants::SDWA9};
2434     return makeArrayRef(Variants);
2435   }
2436 
2437   if (isForcedDPP()) {
2438     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2439     return makeArrayRef(Variants);
2440   }
2441 
2442   static const unsigned Variants[] = {
2443     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2444     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2445   };
2446 
2447   return makeArrayRef(Variants);
2448 }
2449 
2450 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2451   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2452   const unsigned Num = Desc.getNumImplicitUses();
2453   for (unsigned i = 0; i < Num; ++i) {
2454     unsigned Reg = Desc.ImplicitUses[i];
2455     switch (Reg) {
2456     case AMDGPU::FLAT_SCR:
2457     case AMDGPU::VCC:
2458     case AMDGPU::VCC_LO:
2459     case AMDGPU::VCC_HI:
2460     case AMDGPU::M0:
2461     case AMDGPU::SGPR_NULL:
2462       return Reg;
2463     default:
2464       break;
2465     }
2466   }
2467   return AMDGPU::NoRegister;
2468 }
2469 
2470 // NB: This code is correct only when used to check constant
2471 // bus limitations because GFX7 support no f16 inline constants.
2472 // Note that there are no cases when a GFX7 opcode violates
2473 // constant bus limitations due to the use of an f16 constant.
2474 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2475                                        unsigned OpIdx) const {
2476   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2477 
2478   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2479     return false;
2480   }
2481 
2482   const MCOperand &MO = Inst.getOperand(OpIdx);
2483 
2484   int64_t Val = MO.getImm();
2485   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2486 
2487   switch (OpSize) { // expected operand size
2488   case 8:
2489     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2490   case 4:
2491     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2492   case 2: {
2493     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2494     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2495         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2496         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2497         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2498       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2499     } else {
2500       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2501     }
2502   }
2503   default:
2504     llvm_unreachable("invalid operand size");
2505   }
2506 }
2507 
2508 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2509   const MCOperand &MO = Inst.getOperand(OpIdx);
2510   if (MO.isImm()) {
2511     return !isInlineConstant(Inst, OpIdx);
2512   }
2513   return !MO.isReg() ||
2514          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2515 }
2516 
2517 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2518   const unsigned Opcode = Inst.getOpcode();
2519   const MCInstrDesc &Desc = MII.get(Opcode);
2520   unsigned ConstantBusUseCount = 0;
2521   unsigned NumLiterals = 0;
2522   unsigned LiteralSize;
2523 
2524   if (Desc.TSFlags &
2525       (SIInstrFlags::VOPC |
2526        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2527        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2528        SIInstrFlags::SDWA)) {
2529     // Check special imm operands (used by madmk, etc)
2530     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2531       ++ConstantBusUseCount;
2532     }
2533 
2534     SmallDenseSet<unsigned> SGPRsUsed;
2535     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2536     if (SGPRUsed != AMDGPU::NoRegister) {
2537       SGPRsUsed.insert(SGPRUsed);
2538       ++ConstantBusUseCount;
2539     }
2540 
2541     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2542     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2543     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2544 
2545     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2546 
2547     for (int OpIdx : OpIndices) {
2548       if (OpIdx == -1) break;
2549 
2550       const MCOperand &MO = Inst.getOperand(OpIdx);
2551       if (usesConstantBus(Inst, OpIdx)) {
2552         if (MO.isReg()) {
2553           const unsigned Reg = mc2PseudoReg(MO.getReg());
2554           // Pairs of registers with a partial intersections like these
2555           //   s0, s[0:1]
2556           //   flat_scratch_lo, flat_scratch
2557           //   flat_scratch_lo, flat_scratch_hi
2558           // are theoretically valid but they are disabled anyway.
2559           // Note that this code mimics SIInstrInfo::verifyInstruction
2560           if (!SGPRsUsed.count(Reg)) {
2561             SGPRsUsed.insert(Reg);
2562             ++ConstantBusUseCount;
2563           }
2564           SGPRUsed = Reg;
2565         } else { // Expression or a literal
2566 
2567           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2568             continue; // special operand like VINTERP attr_chan
2569 
2570           // An instruction may use only one literal.
2571           // This has been validated on the previous step.
2572           // See validateVOP3Literal.
2573           // This literal may be used as more than one operand.
2574           // If all these operands are of the same size,
2575           // this literal counts as one scalar value.
2576           // Otherwise it counts as 2 scalar values.
2577           // See "GFX10 Shader Programming", section 3.6.2.3.
2578 
2579           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2580           if (Size < 4) Size = 4;
2581 
2582           if (NumLiterals == 0) {
2583             NumLiterals = 1;
2584             LiteralSize = Size;
2585           } else if (LiteralSize != Size) {
2586             NumLiterals = 2;
2587           }
2588         }
2589       }
2590     }
2591   }
2592   ConstantBusUseCount += NumLiterals;
2593 
2594   if (isGFX10())
2595     return ConstantBusUseCount <= 2;
2596 
2597   return ConstantBusUseCount <= 1;
2598 }
2599 
2600 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2601   const unsigned Opcode = Inst.getOpcode();
2602   const MCInstrDesc &Desc = MII.get(Opcode);
2603 
2604   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2605   if (DstIdx == -1 ||
2606       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2607     return true;
2608   }
2609 
2610   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2611 
2612   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2613   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2614   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2615 
2616   assert(DstIdx != -1);
2617   const MCOperand &Dst = Inst.getOperand(DstIdx);
2618   assert(Dst.isReg());
2619   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2620 
2621   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2622 
2623   for (int SrcIdx : SrcIndices) {
2624     if (SrcIdx == -1) break;
2625     const MCOperand &Src = Inst.getOperand(SrcIdx);
2626     if (Src.isReg()) {
2627       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2628       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2629         return false;
2630       }
2631     }
2632   }
2633 
2634   return true;
2635 }
2636 
2637 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2638 
2639   const unsigned Opc = Inst.getOpcode();
2640   const MCInstrDesc &Desc = MII.get(Opc);
2641 
2642   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2643     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2644     assert(ClampIdx != -1);
2645     return Inst.getOperand(ClampIdx).getImm() == 0;
2646   }
2647 
2648   return true;
2649 }
2650 
2651 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2652 
2653   const unsigned Opc = Inst.getOpcode();
2654   const MCInstrDesc &Desc = MII.get(Opc);
2655 
2656   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2657     return true;
2658 
2659   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2660   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2661   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2662 
2663   assert(VDataIdx != -1);
2664   assert(DMaskIdx != -1);
2665   assert(TFEIdx != -1);
2666 
2667   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2668   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2669   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2670   if (DMask == 0)
2671     DMask = 1;
2672 
2673   unsigned DataSize =
2674     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2675   if (hasPackedD16()) {
2676     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2677     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2678       DataSize = (DataSize + 1) / 2;
2679   }
2680 
2681   return (VDataSize / 4) == DataSize + TFESize;
2682 }
2683 
2684 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2685   const unsigned Opc = Inst.getOpcode();
2686   const MCInstrDesc &Desc = MII.get(Opc);
2687 
2688   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2689     return true;
2690 
2691   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2692   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2693       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2694   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2695   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2696   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2697 
2698   assert(VAddr0Idx != -1);
2699   assert(SrsrcIdx != -1);
2700   assert(DimIdx != -1);
2701   assert(SrsrcIdx > VAddr0Idx);
2702 
2703   unsigned Dim = Inst.getOperand(DimIdx).getImm();
2704   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2705   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2706   unsigned VAddrSize =
2707       IsNSA ? SrsrcIdx - VAddr0Idx
2708             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2709 
2710   unsigned AddrSize = BaseOpcode->NumExtraArgs +
2711                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2712                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2713                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2714   if (!IsNSA) {
2715     if (AddrSize > 8)
2716       AddrSize = 16;
2717     else if (AddrSize > 4)
2718       AddrSize = 8;
2719   }
2720 
2721   return VAddrSize == AddrSize;
2722 }
2723 
2724 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2725 
2726   const unsigned Opc = Inst.getOpcode();
2727   const MCInstrDesc &Desc = MII.get(Opc);
2728 
2729   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2730     return true;
2731   if (!Desc.mayLoad() || !Desc.mayStore())
2732     return true; // Not atomic
2733 
2734   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2735   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2736 
2737   // This is an incomplete check because image_atomic_cmpswap
2738   // may only use 0x3 and 0xf while other atomic operations
2739   // may use 0x1 and 0x3. However these limitations are
2740   // verified when we check that dmask matches dst size.
2741   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2742 }
2743 
2744 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2745 
2746   const unsigned Opc = Inst.getOpcode();
2747   const MCInstrDesc &Desc = MII.get(Opc);
2748 
2749   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2750     return true;
2751 
2752   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2753   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2754 
2755   // GATHER4 instructions use dmask in a different fashion compared to
2756   // other MIMG instructions. The only useful DMASK values are
2757   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2758   // (red,red,red,red) etc.) The ISA document doesn't mention
2759   // this.
2760   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2761 }
2762 
2763 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2764 
2765   const unsigned Opc = Inst.getOpcode();
2766   const MCInstrDesc &Desc = MII.get(Opc);
2767 
2768   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2769     return true;
2770 
2771   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2772   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2773     if (isCI() || isSI())
2774       return false;
2775   }
2776 
2777   return true;
2778 }
2779 
2780 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
2781   const unsigned Opc = Inst.getOpcode();
2782   const MCInstrDesc &Desc = MII.get(Opc);
2783 
2784   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2785     return true;
2786 
2787   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2788   if (DimIdx < 0)
2789     return true;
2790 
2791   long Imm = Inst.getOperand(DimIdx).getImm();
2792   if (Imm < 0 || Imm >= 8)
2793     return false;
2794 
2795   return true;
2796 }
2797 
2798 static bool IsRevOpcode(const unsigned Opcode)
2799 {
2800   switch (Opcode) {
2801   case AMDGPU::V_SUBREV_F32_e32:
2802   case AMDGPU::V_SUBREV_F32_e64:
2803   case AMDGPU::V_SUBREV_F32_e32_gfx10:
2804   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
2805   case AMDGPU::V_SUBREV_F32_e32_vi:
2806   case AMDGPU::V_SUBREV_F32_e64_gfx10:
2807   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
2808   case AMDGPU::V_SUBREV_F32_e64_vi:
2809 
2810   case AMDGPU::V_SUBREV_I32_e32:
2811   case AMDGPU::V_SUBREV_I32_e64:
2812   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
2813   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
2814 
2815   case AMDGPU::V_SUBBREV_U32_e32:
2816   case AMDGPU::V_SUBBREV_U32_e64:
2817   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
2818   case AMDGPU::V_SUBBREV_U32_e32_vi:
2819   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
2820   case AMDGPU::V_SUBBREV_U32_e64_vi:
2821 
2822   case AMDGPU::V_SUBREV_U32_e32:
2823   case AMDGPU::V_SUBREV_U32_e64:
2824   case AMDGPU::V_SUBREV_U32_e32_gfx9:
2825   case AMDGPU::V_SUBREV_U32_e32_vi:
2826   case AMDGPU::V_SUBREV_U32_e64_gfx9:
2827   case AMDGPU::V_SUBREV_U32_e64_vi:
2828 
2829   case AMDGPU::V_SUBREV_F16_e32:
2830   case AMDGPU::V_SUBREV_F16_e64:
2831   case AMDGPU::V_SUBREV_F16_e32_gfx10:
2832   case AMDGPU::V_SUBREV_F16_e32_vi:
2833   case AMDGPU::V_SUBREV_F16_e64_gfx10:
2834   case AMDGPU::V_SUBREV_F16_e64_vi:
2835 
2836   case AMDGPU::V_SUBREV_U16_e32:
2837   case AMDGPU::V_SUBREV_U16_e64:
2838   case AMDGPU::V_SUBREV_U16_e32_vi:
2839   case AMDGPU::V_SUBREV_U16_e64_vi:
2840 
2841   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
2842   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
2843   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
2844 
2845   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
2846   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
2847 
2848   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
2849   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
2850 
2851   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
2852   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
2853 
2854   case AMDGPU::V_LSHRREV_B32_e32:
2855   case AMDGPU::V_LSHRREV_B32_e64:
2856   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
2857   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
2858   case AMDGPU::V_LSHRREV_B32_e32_vi:
2859   case AMDGPU::V_LSHRREV_B32_e64_vi:
2860   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
2861   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
2862 
2863   case AMDGPU::V_ASHRREV_I32_e32:
2864   case AMDGPU::V_ASHRREV_I32_e64:
2865   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
2866   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
2867   case AMDGPU::V_ASHRREV_I32_e32_vi:
2868   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
2869   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
2870   case AMDGPU::V_ASHRREV_I32_e64_vi:
2871 
2872   case AMDGPU::V_LSHLREV_B32_e32:
2873   case AMDGPU::V_LSHLREV_B32_e64:
2874   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
2875   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
2876   case AMDGPU::V_LSHLREV_B32_e32_vi:
2877   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
2878   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
2879   case AMDGPU::V_LSHLREV_B32_e64_vi:
2880 
2881   case AMDGPU::V_LSHLREV_B16_e32:
2882   case AMDGPU::V_LSHLREV_B16_e64:
2883   case AMDGPU::V_LSHLREV_B16_e32_vi:
2884   case AMDGPU::V_LSHLREV_B16_e64_vi:
2885   case AMDGPU::V_LSHLREV_B16_gfx10:
2886 
2887   case AMDGPU::V_LSHRREV_B16_e32:
2888   case AMDGPU::V_LSHRREV_B16_e64:
2889   case AMDGPU::V_LSHRREV_B16_e32_vi:
2890   case AMDGPU::V_LSHRREV_B16_e64_vi:
2891   case AMDGPU::V_LSHRREV_B16_gfx10:
2892 
2893   case AMDGPU::V_ASHRREV_I16_e32:
2894   case AMDGPU::V_ASHRREV_I16_e64:
2895   case AMDGPU::V_ASHRREV_I16_e32_vi:
2896   case AMDGPU::V_ASHRREV_I16_e64_vi:
2897   case AMDGPU::V_ASHRREV_I16_gfx10:
2898 
2899   case AMDGPU::V_LSHLREV_B64:
2900   case AMDGPU::V_LSHLREV_B64_gfx10:
2901   case AMDGPU::V_LSHLREV_B64_vi:
2902 
2903   case AMDGPU::V_LSHRREV_B64:
2904   case AMDGPU::V_LSHRREV_B64_gfx10:
2905   case AMDGPU::V_LSHRREV_B64_vi:
2906 
2907   case AMDGPU::V_ASHRREV_I64:
2908   case AMDGPU::V_ASHRREV_I64_gfx10:
2909   case AMDGPU::V_ASHRREV_I64_vi:
2910 
2911   case AMDGPU::V_PK_LSHLREV_B16:
2912   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
2913   case AMDGPU::V_PK_LSHLREV_B16_vi:
2914 
2915   case AMDGPU::V_PK_LSHRREV_B16:
2916   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
2917   case AMDGPU::V_PK_LSHRREV_B16_vi:
2918   case AMDGPU::V_PK_ASHRREV_I16:
2919   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
2920   case AMDGPU::V_PK_ASHRREV_I16_vi:
2921     return true;
2922   default:
2923     return false;
2924   }
2925 }
2926 
2927 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
2928 
2929   using namespace SIInstrFlags;
2930   const unsigned Opcode = Inst.getOpcode();
2931   const MCInstrDesc &Desc = MII.get(Opcode);
2932 
2933   // lds_direct register is defined so that it can be used
2934   // with 9-bit operands only. Ignore encodings which do not accept these.
2935   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
2936     return true;
2937 
2938   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2939   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2940   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2941 
2942   const int SrcIndices[] = { Src1Idx, Src2Idx };
2943 
2944   // lds_direct cannot be specified as either src1 or src2.
2945   for (int SrcIdx : SrcIndices) {
2946     if (SrcIdx == -1) break;
2947     const MCOperand &Src = Inst.getOperand(SrcIdx);
2948     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
2949       return false;
2950     }
2951   }
2952 
2953   if (Src0Idx == -1)
2954     return true;
2955 
2956   const MCOperand &Src = Inst.getOperand(Src0Idx);
2957   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
2958     return true;
2959 
2960   // lds_direct is specified as src0. Check additional limitations.
2961   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
2962 }
2963 
2964 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
2965   unsigned Opcode = Inst.getOpcode();
2966   const MCInstrDesc &Desc = MII.get(Opcode);
2967   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
2968     return true;
2969 
2970   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2971   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2972 
2973   const int OpIndices[] = { Src0Idx, Src1Idx };
2974 
2975   unsigned NumLiterals = 0;
2976   uint32_t LiteralValue;
2977 
2978   for (int OpIdx : OpIndices) {
2979     if (OpIdx == -1) break;
2980 
2981     const MCOperand &MO = Inst.getOperand(OpIdx);
2982     if (MO.isImm() &&
2983         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
2984         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
2985         !isInlineConstant(Inst, OpIdx)) {
2986       uint32_t Value = static_cast<uint32_t>(MO.getImm());
2987       if (NumLiterals == 0 || LiteralValue != Value) {
2988         LiteralValue = Value;
2989         ++NumLiterals;
2990       }
2991     }
2992   }
2993 
2994   return NumLiterals <= 1;
2995 }
2996 
2997 // VOP3 literal is only allowed in GFX10+ and only one can be used
2998 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
2999   unsigned Opcode = Inst.getOpcode();
3000   const MCInstrDesc &Desc = MII.get(Opcode);
3001   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3002     return true;
3003 
3004   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3005   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3006   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3007 
3008   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3009 
3010   unsigned NumLiterals = 0;
3011   uint32_t LiteralValue;
3012 
3013   for (int OpIdx : OpIndices) {
3014     if (OpIdx == -1) break;
3015 
3016     const MCOperand &MO = Inst.getOperand(OpIdx);
3017     if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
3018       continue;
3019 
3020     if (!isInlineConstant(Inst, OpIdx)) {
3021       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3022       if (NumLiterals == 0 || LiteralValue != Value) {
3023         LiteralValue = Value;
3024         ++NumLiterals;
3025       }
3026     }
3027   }
3028 
3029   return !NumLiterals ||
3030          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3031 }
3032 
3033 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3034                                           const SMLoc &IDLoc) {
3035   if (!validateLdsDirect(Inst)) {
3036     Error(IDLoc,
3037       "invalid use of lds_direct");
3038     return false;
3039   }
3040   if (!validateSOPLiteral(Inst)) {
3041     Error(IDLoc,
3042       "only one literal operand is allowed");
3043     return false;
3044   }
3045   if (!validateVOP3Literal(Inst)) {
3046     Error(IDLoc,
3047       "invalid literal operand");
3048     return false;
3049   }
3050   if (!validateConstantBusLimitations(Inst)) {
3051     Error(IDLoc,
3052       "invalid operand (violates constant bus restrictions)");
3053     return false;
3054   }
3055   if (!validateEarlyClobberLimitations(Inst)) {
3056     Error(IDLoc,
3057       "destination must be different than all sources");
3058     return false;
3059   }
3060   if (!validateIntClampSupported(Inst)) {
3061     Error(IDLoc,
3062       "integer clamping is not supported on this GPU");
3063     return false;
3064   }
3065   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3066   if (!validateMIMGD16(Inst)) {
3067     Error(IDLoc,
3068       "d16 modifier is not supported on this GPU");
3069     return false;
3070   }
3071   if (!validateMIMGDim(Inst)) {
3072     Error(IDLoc, "dim modifier is required on this GPU");
3073     return false;
3074   }
3075   if (!validateMIMGDataSize(Inst)) {
3076     Error(IDLoc,
3077       "image data size does not match dmask and tfe");
3078     return false;
3079   }
3080   if (!validateMIMGAddrSize(Inst)) {
3081     Error(IDLoc,
3082       "image address size does not match dim and a16");
3083     return false;
3084   }
3085   if (!validateMIMGAtomicDMask(Inst)) {
3086     Error(IDLoc,
3087       "invalid atomic image dmask");
3088     return false;
3089   }
3090   if (!validateMIMGGatherDMask(Inst)) {
3091     Error(IDLoc,
3092       "invalid image_gather dmask: only one bit must be set");
3093     return false;
3094   }
3095 
3096   return true;
3097 }
3098 
3099 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3100                                             const FeatureBitset &FBS,
3101                                             unsigned VariantID = 0);
3102 
3103 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3104                                               OperandVector &Operands,
3105                                               MCStreamer &Out,
3106                                               uint64_t &ErrorInfo,
3107                                               bool MatchingInlineAsm) {
3108   MCInst Inst;
3109   unsigned Result = Match_Success;
3110   for (auto Variant : getMatchedVariants()) {
3111     uint64_t EI;
3112     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3113                                   Variant);
3114     // We order match statuses from least to most specific. We use most specific
3115     // status as resulting
3116     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3117     if ((R == Match_Success) ||
3118         (R == Match_PreferE32) ||
3119         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3120         (R == Match_InvalidOperand && Result != Match_MissingFeature
3121                                    && Result != Match_PreferE32) ||
3122         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3123                                    && Result != Match_MissingFeature
3124                                    && Result != Match_PreferE32)) {
3125       Result = R;
3126       ErrorInfo = EI;
3127     }
3128     if (R == Match_Success)
3129       break;
3130   }
3131 
3132   switch (Result) {
3133   default: break;
3134   case Match_Success:
3135     if (!validateInstruction(Inst, IDLoc)) {
3136       return true;
3137     }
3138     Inst.setLoc(IDLoc);
3139     Out.EmitInstruction(Inst, getSTI());
3140     return false;
3141 
3142   case Match_MissingFeature:
3143     return Error(IDLoc, "instruction not supported on this GPU");
3144 
3145   case Match_MnemonicFail: {
3146     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3147     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3148         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3149     return Error(IDLoc, "invalid instruction" + Suggestion,
3150                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3151   }
3152 
3153   case Match_InvalidOperand: {
3154     SMLoc ErrorLoc = IDLoc;
3155     if (ErrorInfo != ~0ULL) {
3156       if (ErrorInfo >= Operands.size()) {
3157         return Error(IDLoc, "too few operands for instruction");
3158       }
3159       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3160       if (ErrorLoc == SMLoc())
3161         ErrorLoc = IDLoc;
3162     }
3163     return Error(ErrorLoc, "invalid operand for instruction");
3164   }
3165 
3166   case Match_PreferE32:
3167     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3168                         "should be encoded as e32");
3169   }
3170   llvm_unreachable("Implement any new match types added!");
3171 }
3172 
3173 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3174   int64_t Tmp = -1;
3175   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3176     return true;
3177   }
3178   if (getParser().parseAbsoluteExpression(Tmp)) {
3179     return true;
3180   }
3181   Ret = static_cast<uint32_t>(Tmp);
3182   return false;
3183 }
3184 
3185 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3186                                                uint32_t &Minor) {
3187   if (ParseAsAbsoluteExpression(Major))
3188     return TokError("invalid major version");
3189 
3190   if (getLexer().isNot(AsmToken::Comma))
3191     return TokError("minor version number required, comma expected");
3192   Lex();
3193 
3194   if (ParseAsAbsoluteExpression(Minor))
3195     return TokError("invalid minor version");
3196 
3197   return false;
3198 }
3199 
3200 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3201   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3202     return TokError("directive only supported for amdgcn architecture");
3203 
3204   std::string Target;
3205 
3206   SMLoc TargetStart = getTok().getLoc();
3207   if (getParser().parseEscapedString(Target))
3208     return true;
3209   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3210 
3211   std::string ExpectedTarget;
3212   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3213   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3214 
3215   if (Target != ExpectedTargetOS.str())
3216     return getParser().Error(TargetRange.Start, "target must match options",
3217                              TargetRange);
3218 
3219   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3220   return false;
3221 }
3222 
3223 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3224   return getParser().Error(Range.Start, "value out of range", Range);
3225 }
3226 
3227 bool AMDGPUAsmParser::calculateGPRBlocks(
3228     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3229     bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
3230     unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
3231     unsigned &SGPRBlocks) {
3232   // TODO(scott.linder): These calculations are duplicated from
3233   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3234   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3235 
3236   unsigned NumVGPRs = NextFreeVGPR;
3237   unsigned NumSGPRs = NextFreeSGPR;
3238 
3239   if (Version.Major >= 10)
3240     NumSGPRs = 0;
3241   else {
3242     unsigned MaxAddressableNumSGPRs =
3243         IsaInfo::getAddressableNumSGPRs(&getSTI());
3244 
3245     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3246         NumSGPRs > MaxAddressableNumSGPRs)
3247       return OutOfRangeError(SGPRRange);
3248 
3249     NumSGPRs +=
3250         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3251 
3252     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3253         NumSGPRs > MaxAddressableNumSGPRs)
3254       return OutOfRangeError(SGPRRange);
3255 
3256     if (Features.test(FeatureSGPRInitBug))
3257       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3258   }
3259 
3260   VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
3261   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3262 
3263   return false;
3264 }
3265 
3266 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3267   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3268     return TokError("directive only supported for amdgcn architecture");
3269 
3270   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3271     return TokError("directive only supported for amdhsa OS");
3272 
3273   StringRef KernelName;
3274   if (getParser().parseIdentifier(KernelName))
3275     return true;
3276 
3277   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3278 
3279   StringSet<> Seen;
3280 
3281   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3282 
3283   SMRange VGPRRange;
3284   uint64_t NextFreeVGPR = 0;
3285   SMRange SGPRRange;
3286   uint64_t NextFreeSGPR = 0;
3287   unsigned UserSGPRCount = 0;
3288   bool ReserveVCC = true;
3289   bool ReserveFlatScr = true;
3290   bool ReserveXNACK = hasXNACK();
3291 
3292   while (true) {
3293     while (getLexer().is(AsmToken::EndOfStatement))
3294       Lex();
3295 
3296     if (getLexer().isNot(AsmToken::Identifier))
3297       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3298 
3299     StringRef ID = getTok().getIdentifier();
3300     SMRange IDRange = getTok().getLocRange();
3301     Lex();
3302 
3303     if (ID == ".end_amdhsa_kernel")
3304       break;
3305 
3306     if (Seen.find(ID) != Seen.end())
3307       return TokError(".amdhsa_ directives cannot be repeated");
3308     Seen.insert(ID);
3309 
3310     SMLoc ValStart = getTok().getLoc();
3311     int64_t IVal;
3312     if (getParser().parseAbsoluteExpression(IVal))
3313       return true;
3314     SMLoc ValEnd = getTok().getLoc();
3315     SMRange ValRange = SMRange(ValStart, ValEnd);
3316 
3317     if (IVal < 0)
3318       return OutOfRangeError(ValRange);
3319 
3320     uint64_t Val = IVal;
3321 
3322 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3323   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3324     return OutOfRangeError(RANGE);                                             \
3325   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3326 
3327     if (ID == ".amdhsa_group_segment_fixed_size") {
3328       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3329         return OutOfRangeError(ValRange);
3330       KD.group_segment_fixed_size = Val;
3331     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3332       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3333         return OutOfRangeError(ValRange);
3334       KD.private_segment_fixed_size = Val;
3335     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3336       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3337                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3338                        Val, ValRange);
3339       UserSGPRCount += 4;
3340     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3341       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3342                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3343                        ValRange);
3344       UserSGPRCount += 2;
3345     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3346       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3347                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3348                        ValRange);
3349       UserSGPRCount += 2;
3350     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3351       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3352                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3353                        Val, ValRange);
3354       UserSGPRCount += 2;
3355     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3356       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3357                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3358                        ValRange);
3359       UserSGPRCount += 2;
3360     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3361       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3362                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3363                        ValRange);
3364       UserSGPRCount += 2;
3365     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3366       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3367                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3368                        Val, ValRange);
3369       UserSGPRCount += 1;
3370     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3371       PARSE_BITS_ENTRY(
3372           KD.compute_pgm_rsrc2,
3373           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3374           ValRange);
3375     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3376       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3377                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3378                        ValRange);
3379     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3380       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3381                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3382                        ValRange);
3383     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3384       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3385                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3386                        ValRange);
3387     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3388       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3389                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3390                        ValRange);
3391     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3392       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3393                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3394                        ValRange);
3395     } else if (ID == ".amdhsa_next_free_vgpr") {
3396       VGPRRange = ValRange;
3397       NextFreeVGPR = Val;
3398     } else if (ID == ".amdhsa_next_free_sgpr") {
3399       SGPRRange = ValRange;
3400       NextFreeSGPR = Val;
3401     } else if (ID == ".amdhsa_reserve_vcc") {
3402       if (!isUInt<1>(Val))
3403         return OutOfRangeError(ValRange);
3404       ReserveVCC = Val;
3405     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3406       if (IVersion.Major < 7)
3407         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3408                                  IDRange);
3409       if (!isUInt<1>(Val))
3410         return OutOfRangeError(ValRange);
3411       ReserveFlatScr = Val;
3412     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3413       if (IVersion.Major < 8)
3414         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3415                                  IDRange);
3416       if (!isUInt<1>(Val))
3417         return OutOfRangeError(ValRange);
3418       ReserveXNACK = Val;
3419     } else if (ID == ".amdhsa_float_round_mode_32") {
3420       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3421                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3422     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3423       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3424                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3425     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3426       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3427                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3428     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3429       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3430                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3431                        ValRange);
3432     } else if (ID == ".amdhsa_dx10_clamp") {
3433       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3434                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3435     } else if (ID == ".amdhsa_ieee_mode") {
3436       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3437                        Val, ValRange);
3438     } else if (ID == ".amdhsa_fp16_overflow") {
3439       if (IVersion.Major < 9)
3440         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3441                                  IDRange);
3442       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3443                        ValRange);
3444     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3445       if (IVersion.Major < 10)
3446         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3447                                  IDRange);
3448       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3449                        ValRange);
3450     } else if (ID == ".amdhsa_memory_ordered") {
3451       if (IVersion.Major < 10)
3452         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3453                                  IDRange);
3454       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3455                        ValRange);
3456     } else if (ID == ".amdhsa_forward_progress") {
3457       if (IVersion.Major < 10)
3458         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3459                                  IDRange);
3460       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3461                        ValRange);
3462     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3463       PARSE_BITS_ENTRY(
3464           KD.compute_pgm_rsrc2,
3465           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3466           ValRange);
3467     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3468       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3469                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3470                        Val, ValRange);
3471     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3472       PARSE_BITS_ENTRY(
3473           KD.compute_pgm_rsrc2,
3474           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3475           ValRange);
3476     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3477       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3478                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3479                        Val, ValRange);
3480     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3481       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3482                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3483                        Val, ValRange);
3484     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3485       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3486                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3487                        Val, ValRange);
3488     } else if (ID == ".amdhsa_exception_int_div_zero") {
3489       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3490                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3491                        Val, ValRange);
3492     } else {
3493       return getParser().Error(IDRange.Start,
3494                                "unknown .amdhsa_kernel directive", IDRange);
3495     }
3496 
3497 #undef PARSE_BITS_ENTRY
3498   }
3499 
3500   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3501     return TokError(".amdhsa_next_free_vgpr directive is required");
3502 
3503   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3504     return TokError(".amdhsa_next_free_sgpr directive is required");
3505 
3506   unsigned VGPRBlocks;
3507   unsigned SGPRBlocks;
3508   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3509                          ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
3510                          SGPRRange, VGPRBlocks, SGPRBlocks))
3511     return true;
3512 
3513   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3514           VGPRBlocks))
3515     return OutOfRangeError(VGPRRange);
3516   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3517                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3518 
3519   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3520           SGPRBlocks))
3521     return OutOfRangeError(SGPRRange);
3522   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3523                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3524                   SGPRBlocks);
3525 
3526   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3527     return TokError("too many user SGPRs enabled");
3528   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3529                   UserSGPRCount);
3530 
3531   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3532       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3533       ReserveFlatScr, ReserveXNACK);
3534   return false;
3535 }
3536 
3537 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3538   uint32_t Major;
3539   uint32_t Minor;
3540 
3541   if (ParseDirectiveMajorMinor(Major, Minor))
3542     return true;
3543 
3544   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3545   return false;
3546 }
3547 
3548 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3549   uint32_t Major;
3550   uint32_t Minor;
3551   uint32_t Stepping;
3552   StringRef VendorName;
3553   StringRef ArchName;
3554 
3555   // If this directive has no arguments, then use the ISA version for the
3556   // targeted GPU.
3557   if (getLexer().is(AsmToken::EndOfStatement)) {
3558     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3559     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3560                                                       ISA.Stepping,
3561                                                       "AMD", "AMDGPU");
3562     return false;
3563   }
3564 
3565   if (ParseDirectiveMajorMinor(Major, Minor))
3566     return true;
3567 
3568   if (getLexer().isNot(AsmToken::Comma))
3569     return TokError("stepping version number required, comma expected");
3570   Lex();
3571 
3572   if (ParseAsAbsoluteExpression(Stepping))
3573     return TokError("invalid stepping version");
3574 
3575   if (getLexer().isNot(AsmToken::Comma))
3576     return TokError("vendor name required, comma expected");
3577   Lex();
3578 
3579   if (getLexer().isNot(AsmToken::String))
3580     return TokError("invalid vendor name");
3581 
3582   VendorName = getLexer().getTok().getStringContents();
3583   Lex();
3584 
3585   if (getLexer().isNot(AsmToken::Comma))
3586     return TokError("arch name required, comma expected");
3587   Lex();
3588 
3589   if (getLexer().isNot(AsmToken::String))
3590     return TokError("invalid arch name");
3591 
3592   ArchName = getLexer().getTok().getStringContents();
3593   Lex();
3594 
3595   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3596                                                     VendorName, ArchName);
3597   return false;
3598 }
3599 
3600 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3601                                                amd_kernel_code_t &Header) {
3602   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3603   // assembly for backwards compatibility.
3604   if (ID == "max_scratch_backing_memory_byte_size") {
3605     Parser.eatToEndOfStatement();
3606     return false;
3607   }
3608 
3609   SmallString<40> ErrStr;
3610   raw_svector_ostream Err(ErrStr);
3611   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3612     return TokError(Err.str());
3613   }
3614   Lex();
3615 
3616   if (ID == "enable_wgp_mode") {
3617     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
3618       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3619   }
3620 
3621   if (ID == "enable_mem_ordered") {
3622     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
3623       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3624   }
3625 
3626   if (ID == "enable_fwd_progress") {
3627     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
3628       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3629   }
3630 
3631   return false;
3632 }
3633 
3634 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3635   amd_kernel_code_t Header;
3636   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3637 
3638   while (true) {
3639     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3640     // will set the current token to EndOfStatement.
3641     while(getLexer().is(AsmToken::EndOfStatement))
3642       Lex();
3643 
3644     if (getLexer().isNot(AsmToken::Identifier))
3645       return TokError("expected value identifier or .end_amd_kernel_code_t");
3646 
3647     StringRef ID = getLexer().getTok().getIdentifier();
3648     Lex();
3649 
3650     if (ID == ".end_amd_kernel_code_t")
3651       break;
3652 
3653     if (ParseAMDKernelCodeTValue(ID, Header))
3654       return true;
3655   }
3656 
3657   getTargetStreamer().EmitAMDKernelCodeT(Header);
3658 
3659   return false;
3660 }
3661 
3662 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3663   if (getLexer().isNot(AsmToken::Identifier))
3664     return TokError("expected symbol name");
3665 
3666   StringRef KernelName = Parser.getTok().getString();
3667 
3668   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3669                                            ELF::STT_AMDGPU_HSA_KERNEL);
3670   Lex();
3671   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3672     KernelScope.initialize(getContext());
3673   return false;
3674 }
3675 
3676 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3677   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3678     return Error(getParser().getTok().getLoc(),
3679                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3680                  "architectures");
3681   }
3682 
3683   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3684 
3685   std::string ISAVersionStringFromSTI;
3686   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3687   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3688 
3689   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3690     return Error(getParser().getTok().getLoc(),
3691                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3692                  "arguments specified through the command line");
3693   }
3694 
3695   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3696   Lex();
3697 
3698   return false;
3699 }
3700 
3701 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3702   const char *AssemblerDirectiveBegin;
3703   const char *AssemblerDirectiveEnd;
3704   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3705       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3706           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3707                             HSAMD::V3::AssemblerDirectiveEnd)
3708           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3709                             HSAMD::AssemblerDirectiveEnd);
3710 
3711   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3712     return Error(getParser().getTok().getLoc(),
3713                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
3714                  "not available on non-amdhsa OSes")).str());
3715   }
3716 
3717   std::string HSAMetadataString;
3718   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
3719                           HSAMetadataString))
3720     return true;
3721 
3722   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3723     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3724       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3725   } else {
3726     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3727       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3728   }
3729 
3730   return false;
3731 }
3732 
3733 /// Common code to parse out a block of text (typically YAML) between start and
3734 /// end directives.
3735 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
3736                                           const char *AssemblerDirectiveEnd,
3737                                           std::string &CollectString) {
3738 
3739   raw_string_ostream CollectStream(CollectString);
3740 
3741   getLexer().setSkipSpace(false);
3742 
3743   bool FoundEnd = false;
3744   while (!getLexer().is(AsmToken::Eof)) {
3745     while (getLexer().is(AsmToken::Space)) {
3746       CollectStream << getLexer().getTok().getString();
3747       Lex();
3748     }
3749 
3750     if (getLexer().is(AsmToken::Identifier)) {
3751       StringRef ID = getLexer().getTok().getIdentifier();
3752       if (ID == AssemblerDirectiveEnd) {
3753         Lex();
3754         FoundEnd = true;
3755         break;
3756       }
3757     }
3758 
3759     CollectStream << Parser.parseStringToEndOfStatement()
3760                   << getContext().getAsmInfo()->getSeparatorString();
3761 
3762     Parser.eatToEndOfStatement();
3763   }
3764 
3765   getLexer().setSkipSpace(true);
3766 
3767   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3768     return TokError(Twine("expected directive ") +
3769                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
3770   }
3771 
3772   CollectStream.flush();
3773   return false;
3774 }
3775 
3776 /// Parse the assembler directive for new MsgPack-format PAL metadata.
3777 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
3778   std::string String;
3779   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
3780                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
3781     return true;
3782 
3783   auto PALMetadata = getTargetStreamer().getPALMetadata();
3784   if (!PALMetadata->setFromString(String))
3785     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
3786   return false;
3787 }
3788 
3789 /// Parse the assembler directive for old linear-format PAL metadata.
3790 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3791   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3792     return Error(getParser().getTok().getLoc(),
3793                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3794                  "not available on non-amdpal OSes")).str());
3795   }
3796 
3797   auto PALMetadata = getTargetStreamer().getPALMetadata();
3798   PALMetadata->setLegacy();
3799   for (;;) {
3800     uint32_t Key, Value;
3801     if (ParseAsAbsoluteExpression(Key)) {
3802       return TokError(Twine("invalid value in ") +
3803                       Twine(PALMD::AssemblerDirective));
3804     }
3805     if (getLexer().isNot(AsmToken::Comma)) {
3806       return TokError(Twine("expected an even number of values in ") +
3807                       Twine(PALMD::AssemblerDirective));
3808     }
3809     Lex();
3810     if (ParseAsAbsoluteExpression(Value)) {
3811       return TokError(Twine("invalid value in ") +
3812                       Twine(PALMD::AssemblerDirective));
3813     }
3814     PALMetadata->setRegister(Key, Value);
3815     if (getLexer().isNot(AsmToken::Comma))
3816       break;
3817     Lex();
3818   }
3819   return false;
3820 }
3821 
3822 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3823   StringRef IDVal = DirectiveID.getString();
3824 
3825   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3826     if (IDVal == ".amdgcn_target")
3827       return ParseDirectiveAMDGCNTarget();
3828 
3829     if (IDVal == ".amdhsa_kernel")
3830       return ParseDirectiveAMDHSAKernel();
3831 
3832     // TODO: Restructure/combine with PAL metadata directive.
3833     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3834       return ParseDirectiveHSAMetadata();
3835   } else {
3836     if (IDVal == ".hsa_code_object_version")
3837       return ParseDirectiveHSACodeObjectVersion();
3838 
3839     if (IDVal == ".hsa_code_object_isa")
3840       return ParseDirectiveHSACodeObjectISA();
3841 
3842     if (IDVal == ".amd_kernel_code_t")
3843       return ParseDirectiveAMDKernelCodeT();
3844 
3845     if (IDVal == ".amdgpu_hsa_kernel")
3846       return ParseDirectiveAMDGPUHsaKernel();
3847 
3848     if (IDVal == ".amd_amdgpu_isa")
3849       return ParseDirectiveISAVersion();
3850 
3851     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3852       return ParseDirectiveHSAMetadata();
3853   }
3854 
3855   if (IDVal == PALMD::AssemblerDirectiveBegin)
3856     return ParseDirectivePALMetadataBegin();
3857 
3858   if (IDVal == PALMD::AssemblerDirective)
3859     return ParseDirectivePALMetadata();
3860 
3861   return true;
3862 }
3863 
3864 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
3865                                            unsigned RegNo) const {
3866 
3867   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
3868        R.isValid(); ++R) {
3869     if (*R == RegNo)
3870       return isGFX9() || isGFX10();
3871   }
3872 
3873   // GFX10 has 2 more SGPRs 104 and 105.
3874   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
3875        R.isValid(); ++R) {
3876     if (*R == RegNo)
3877       return hasSGPR104_SGPR105();
3878   }
3879 
3880   switch (RegNo) {
3881   case AMDGPU::TBA:
3882   case AMDGPU::TBA_LO:
3883   case AMDGPU::TBA_HI:
3884   case AMDGPU::TMA:
3885   case AMDGPU::TMA_LO:
3886   case AMDGPU::TMA_HI:
3887     return !isGFX9() && !isGFX10();
3888   case AMDGPU::XNACK_MASK:
3889   case AMDGPU::XNACK_MASK_LO:
3890   case AMDGPU::XNACK_MASK_HI:
3891     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
3892   case AMDGPU::SGPR_NULL:
3893     return isGFX10();
3894   default:
3895     break;
3896   }
3897 
3898   if (isInlineValue(RegNo))
3899     return !isCI() && !isSI() && !isVI();
3900 
3901   if (isCI())
3902     return true;
3903 
3904   if (isSI() || isGFX10()) {
3905     // No flat_scr on SI.
3906     // On GFX10 flat scratch is not a valid register operand and can only be
3907     // accessed with s_setreg/s_getreg.
3908     switch (RegNo) {
3909     case AMDGPU::FLAT_SCR:
3910     case AMDGPU::FLAT_SCR_LO:
3911     case AMDGPU::FLAT_SCR_HI:
3912       return false;
3913     default:
3914       return true;
3915     }
3916   }
3917 
3918   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
3919   // SI/CI have.
3920   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
3921        R.isValid(); ++R) {
3922     if (*R == RegNo)
3923       return hasSGPR102_SGPR103();
3924   }
3925 
3926   return true;
3927 }
3928 
3929 OperandMatchResultTy
3930 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
3931                               OperandMode Mode) {
3932   // Try to parse with a custom parser
3933   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
3934 
3935   // If we successfully parsed the operand or if there as an error parsing,
3936   // we are done.
3937   //
3938   // If we are parsing after we reach EndOfStatement then this means we
3939   // are appending default values to the Operands list.  This is only done
3940   // by custom parser, so we shouldn't continue on to the generic parsing.
3941   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
3942       getLexer().is(AsmToken::EndOfStatement))
3943     return ResTy;
3944 
3945   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
3946     unsigned Prefix = Operands.size();
3947     SMLoc LBraceLoc = getTok().getLoc();
3948     Parser.Lex(); // eat the '['
3949 
3950     for (;;) {
3951       ResTy = parseReg(Operands);
3952       if (ResTy != MatchOperand_Success)
3953         return ResTy;
3954 
3955       if (getLexer().is(AsmToken::RBrac))
3956         break;
3957 
3958       if (getLexer().isNot(AsmToken::Comma))
3959         return MatchOperand_ParseFail;
3960       Parser.Lex();
3961     }
3962 
3963     if (Operands.size() - Prefix > 1) {
3964       Operands.insert(Operands.begin() + Prefix,
3965                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
3966       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
3967                                                     getTok().getLoc()));
3968     }
3969 
3970     Parser.Lex(); // eat the ']'
3971     return MatchOperand_Success;
3972   }
3973 
3974   return parseRegOrImm(Operands);
3975 }
3976 
3977 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
3978   // Clear any forced encodings from the previous instruction.
3979   setForcedEncodingSize(0);
3980   setForcedDPP(false);
3981   setForcedSDWA(false);
3982 
3983   if (Name.endswith("_e64")) {
3984     setForcedEncodingSize(64);
3985     return Name.substr(0, Name.size() - 4);
3986   } else if (Name.endswith("_e32")) {
3987     setForcedEncodingSize(32);
3988     return Name.substr(0, Name.size() - 4);
3989   } else if (Name.endswith("_dpp")) {
3990     setForcedDPP(true);
3991     return Name.substr(0, Name.size() - 4);
3992   } else if (Name.endswith("_sdwa")) {
3993     setForcedSDWA(true);
3994     return Name.substr(0, Name.size() - 5);
3995   }
3996   return Name;
3997 }
3998 
3999 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4000                                        StringRef Name,
4001                                        SMLoc NameLoc, OperandVector &Operands) {
4002   // Add the instruction mnemonic
4003   Name = parseMnemonicSuffix(Name);
4004   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4005 
4006   bool IsMIMG = Name.startswith("image_");
4007 
4008   while (!getLexer().is(AsmToken::EndOfStatement)) {
4009     OperandMode Mode = OperandMode_Default;
4010     if (IsMIMG && isGFX10() && Operands.size() == 2)
4011       Mode = OperandMode_NSA;
4012     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4013 
4014     // Eat the comma or space if there is one.
4015     if (getLexer().is(AsmToken::Comma))
4016       Parser.Lex();
4017 
4018     switch (Res) {
4019       case MatchOperand_Success: break;
4020       case MatchOperand_ParseFail:
4021         // FIXME: use real operand location rather than the current location.
4022         Error(getLexer().getLoc(), "failed parsing operand.");
4023         while (!getLexer().is(AsmToken::EndOfStatement)) {
4024           Parser.Lex();
4025         }
4026         return true;
4027       case MatchOperand_NoMatch:
4028         // FIXME: use real operand location rather than the current location.
4029         Error(getLexer().getLoc(), "not a valid operand.");
4030         while (!getLexer().is(AsmToken::EndOfStatement)) {
4031           Parser.Lex();
4032         }
4033         return true;
4034     }
4035   }
4036 
4037   return false;
4038 }
4039 
4040 //===----------------------------------------------------------------------===//
4041 // Utility functions
4042 //===----------------------------------------------------------------------===//
4043 
4044 OperandMatchResultTy
4045 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4046 
4047   if (!trySkipId(Prefix, AsmToken::Colon))
4048     return MatchOperand_NoMatch;
4049 
4050   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4051 }
4052 
4053 OperandMatchResultTy
4054 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4055                                     AMDGPUOperand::ImmTy ImmTy,
4056                                     bool (*ConvertResult)(int64_t&)) {
4057   SMLoc S = getLoc();
4058   int64_t Value = 0;
4059 
4060   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4061   if (Res != MatchOperand_Success)
4062     return Res;
4063 
4064   if (ConvertResult && !ConvertResult(Value)) {
4065     Error(S, "invalid " + StringRef(Prefix) + " value.");
4066   }
4067 
4068   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4069   return MatchOperand_Success;
4070 }
4071 
4072 OperandMatchResultTy
4073 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4074                                              OperandVector &Operands,
4075                                              AMDGPUOperand::ImmTy ImmTy,
4076                                              bool (*ConvertResult)(int64_t&)) {
4077   SMLoc S = getLoc();
4078   if (!trySkipId(Prefix, AsmToken::Colon))
4079     return MatchOperand_NoMatch;
4080 
4081   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4082     return MatchOperand_ParseFail;
4083 
4084   unsigned Val = 0;
4085   const unsigned MaxSize = 4;
4086 
4087   // FIXME: How to verify the number of elements matches the number of src
4088   // operands?
4089   for (int I = 0; ; ++I) {
4090     int64_t Op;
4091     SMLoc Loc = getLoc();
4092     if (!parseExpr(Op))
4093       return MatchOperand_ParseFail;
4094 
4095     if (Op != 0 && Op != 1) {
4096       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4097       return MatchOperand_ParseFail;
4098     }
4099 
4100     Val |= (Op << I);
4101 
4102     if (trySkipToken(AsmToken::RBrac))
4103       break;
4104 
4105     if (I + 1 == MaxSize) {
4106       Error(getLoc(), "expected a closing square bracket");
4107       return MatchOperand_ParseFail;
4108     }
4109 
4110     if (!skipToken(AsmToken::Comma, "expected a comma"))
4111       return MatchOperand_ParseFail;
4112   }
4113 
4114   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4115   return MatchOperand_Success;
4116 }
4117 
4118 OperandMatchResultTy
4119 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4120                                AMDGPUOperand::ImmTy ImmTy) {
4121   int64_t Bit = 0;
4122   SMLoc S = Parser.getTok().getLoc();
4123 
4124   // We are at the end of the statement, and this is a default argument, so
4125   // use a default value.
4126   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4127     switch(getLexer().getKind()) {
4128       case AsmToken::Identifier: {
4129         StringRef Tok = Parser.getTok().getString();
4130         if (Tok == Name) {
4131           if (Tok == "r128" && isGFX9())
4132             Error(S, "r128 modifier is not supported on this GPU");
4133           if (Tok == "a16" && !isGFX9())
4134             Error(S, "a16 modifier is not supported on this GPU");
4135           Bit = 1;
4136           Parser.Lex();
4137         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4138           Bit = 0;
4139           Parser.Lex();
4140         } else {
4141           return MatchOperand_NoMatch;
4142         }
4143         break;
4144       }
4145       default:
4146         return MatchOperand_NoMatch;
4147     }
4148   }
4149 
4150   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4151     return MatchOperand_ParseFail;
4152 
4153   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4154   return MatchOperand_Success;
4155 }
4156 
4157 static void addOptionalImmOperand(
4158   MCInst& Inst, const OperandVector& Operands,
4159   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4160   AMDGPUOperand::ImmTy ImmT,
4161   int64_t Default = 0) {
4162   auto i = OptionalIdx.find(ImmT);
4163   if (i != OptionalIdx.end()) {
4164     unsigned Idx = i->second;
4165     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4166   } else {
4167     Inst.addOperand(MCOperand::createImm(Default));
4168   }
4169 }
4170 
4171 OperandMatchResultTy
4172 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4173   if (getLexer().isNot(AsmToken::Identifier)) {
4174     return MatchOperand_NoMatch;
4175   }
4176   StringRef Tok = Parser.getTok().getString();
4177   if (Tok != Prefix) {
4178     return MatchOperand_NoMatch;
4179   }
4180 
4181   Parser.Lex();
4182   if (getLexer().isNot(AsmToken::Colon)) {
4183     return MatchOperand_ParseFail;
4184   }
4185 
4186   Parser.Lex();
4187   if (getLexer().isNot(AsmToken::Identifier)) {
4188     return MatchOperand_ParseFail;
4189   }
4190 
4191   Value = Parser.getTok().getString();
4192   return MatchOperand_Success;
4193 }
4194 
4195 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4196 // values to live in a joint format operand in the MCInst encoding.
4197 OperandMatchResultTy
4198 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4199   SMLoc S = Parser.getTok().getLoc();
4200   int64_t Dfmt = 0, Nfmt = 0;
4201   // dfmt and nfmt can appear in either order, and each is optional.
4202   bool GotDfmt = false, GotNfmt = false;
4203   while (!GotDfmt || !GotNfmt) {
4204     if (!GotDfmt) {
4205       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4206       if (Res != MatchOperand_NoMatch) {
4207         if (Res != MatchOperand_Success)
4208           return Res;
4209         if (Dfmt >= 16) {
4210           Error(Parser.getTok().getLoc(), "out of range dfmt");
4211           return MatchOperand_ParseFail;
4212         }
4213         GotDfmt = true;
4214         Parser.Lex();
4215         continue;
4216       }
4217     }
4218     if (!GotNfmt) {
4219       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4220       if (Res != MatchOperand_NoMatch) {
4221         if (Res != MatchOperand_Success)
4222           return Res;
4223         if (Nfmt >= 8) {
4224           Error(Parser.getTok().getLoc(), "out of range nfmt");
4225           return MatchOperand_ParseFail;
4226         }
4227         GotNfmt = true;
4228         Parser.Lex();
4229         continue;
4230       }
4231     }
4232     break;
4233   }
4234   if (!GotDfmt && !GotNfmt)
4235     return MatchOperand_NoMatch;
4236   auto Format = Dfmt | Nfmt << 4;
4237   Operands.push_back(
4238       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4239   return MatchOperand_Success;
4240 }
4241 
4242 //===----------------------------------------------------------------------===//
4243 // ds
4244 //===----------------------------------------------------------------------===//
4245 
4246 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4247                                     const OperandVector &Operands) {
4248   OptionalImmIndexMap OptionalIdx;
4249 
4250   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4251     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4252 
4253     // Add the register arguments
4254     if (Op.isReg()) {
4255       Op.addRegOperands(Inst, 1);
4256       continue;
4257     }
4258 
4259     // Handle optional arguments
4260     OptionalIdx[Op.getImmTy()] = i;
4261   }
4262 
4263   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4264   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4265   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4266 
4267   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4268 }
4269 
4270 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4271                                 bool IsGdsHardcoded) {
4272   OptionalImmIndexMap OptionalIdx;
4273 
4274   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4275     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4276 
4277     // Add the register arguments
4278     if (Op.isReg()) {
4279       Op.addRegOperands(Inst, 1);
4280       continue;
4281     }
4282 
4283     if (Op.isToken() && Op.getToken() == "gds") {
4284       IsGdsHardcoded = true;
4285       continue;
4286     }
4287 
4288     // Handle optional arguments
4289     OptionalIdx[Op.getImmTy()] = i;
4290   }
4291 
4292   AMDGPUOperand::ImmTy OffsetType =
4293     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4294      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4295      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4296                                                       AMDGPUOperand::ImmTyOffset;
4297 
4298   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4299 
4300   if (!IsGdsHardcoded) {
4301     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4302   }
4303   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4304 }
4305 
4306 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4307   OptionalImmIndexMap OptionalIdx;
4308 
4309   unsigned OperandIdx[4];
4310   unsigned EnMask = 0;
4311   int SrcIdx = 0;
4312 
4313   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4314     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4315 
4316     // Add the register arguments
4317     if (Op.isReg()) {
4318       assert(SrcIdx < 4);
4319       OperandIdx[SrcIdx] = Inst.size();
4320       Op.addRegOperands(Inst, 1);
4321       ++SrcIdx;
4322       continue;
4323     }
4324 
4325     if (Op.isOff()) {
4326       assert(SrcIdx < 4);
4327       OperandIdx[SrcIdx] = Inst.size();
4328       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4329       ++SrcIdx;
4330       continue;
4331     }
4332 
4333     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4334       Op.addImmOperands(Inst, 1);
4335       continue;
4336     }
4337 
4338     if (Op.isToken() && Op.getToken() == "done")
4339       continue;
4340 
4341     // Handle optional arguments
4342     OptionalIdx[Op.getImmTy()] = i;
4343   }
4344 
4345   assert(SrcIdx == 4);
4346 
4347   bool Compr = false;
4348   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4349     Compr = true;
4350     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4351     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4352     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4353   }
4354 
4355   for (auto i = 0; i < SrcIdx; ++i) {
4356     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4357       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4358     }
4359   }
4360 
4361   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4362   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4363 
4364   Inst.addOperand(MCOperand::createImm(EnMask));
4365 }
4366 
4367 //===----------------------------------------------------------------------===//
4368 // s_waitcnt
4369 //===----------------------------------------------------------------------===//
4370 
4371 static bool
4372 encodeCnt(
4373   const AMDGPU::IsaVersion ISA,
4374   int64_t &IntVal,
4375   int64_t CntVal,
4376   bool Saturate,
4377   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4378   unsigned (*decode)(const IsaVersion &Version, unsigned))
4379 {
4380   bool Failed = false;
4381 
4382   IntVal = encode(ISA, IntVal, CntVal);
4383   if (CntVal != decode(ISA, IntVal)) {
4384     if (Saturate) {
4385       IntVal = encode(ISA, IntVal, -1);
4386     } else {
4387       Failed = true;
4388     }
4389   }
4390   return Failed;
4391 }
4392 
4393 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4394 
4395   SMLoc CntLoc = getLoc();
4396   StringRef CntName = getTokenStr();
4397 
4398   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4399       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4400     return false;
4401 
4402   int64_t CntVal;
4403   SMLoc ValLoc = getLoc();
4404   if (!parseExpr(CntVal))
4405     return false;
4406 
4407   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4408 
4409   bool Failed = true;
4410   bool Sat = CntName.endswith("_sat");
4411 
4412   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4413     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4414   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4415     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4416   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4417     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4418   } else {
4419     Error(CntLoc, "invalid counter name " + CntName);
4420     return false;
4421   }
4422 
4423   if (Failed) {
4424     Error(ValLoc, "too large value for " + CntName);
4425     return false;
4426   }
4427 
4428   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4429     return false;
4430 
4431   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4432     if (isToken(AsmToken::EndOfStatement)) {
4433       Error(getLoc(), "expected a counter name");
4434       return false;
4435     }
4436   }
4437 
4438   return true;
4439 }
4440 
4441 OperandMatchResultTy
4442 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4443   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4444   int64_t Waitcnt = getWaitcntBitMask(ISA);
4445   SMLoc S = getLoc();
4446 
4447   // If parse failed, do not return error code
4448   // to avoid excessive error messages.
4449   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4450     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4451   } else {
4452     parseExpr(Waitcnt);
4453   }
4454 
4455   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4456   return MatchOperand_Success;
4457 }
4458 
4459 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
4460                                           int64_t &Width) {
4461   using namespace llvm::AMDGPU::Hwreg;
4462 
4463   if (Parser.getTok().getString() != "hwreg")
4464     return true;
4465   Parser.Lex();
4466 
4467   if (getLexer().isNot(AsmToken::LParen))
4468     return true;
4469   Parser.Lex();
4470 
4471   if (getLexer().is(AsmToken::Identifier)) {
4472     HwReg.IsSymbolic = true;
4473     HwReg.Id = ID_UNKNOWN_;
4474     const StringRef tok = Parser.getTok().getString();
4475     int Last = ID_SYMBOLIC_LAST_;
4476     if (isSI() || isCI() || isVI())
4477       Last = ID_SYMBOLIC_FIRST_GFX9_;
4478     else if (isGFX9())
4479       Last = ID_SYMBOLIC_FIRST_GFX10_;
4480     for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
4481       if (tok == IdSymbolic[i]) {
4482         HwReg.Id = i;
4483         break;
4484       }
4485     }
4486     Parser.Lex();
4487   } else {
4488     HwReg.IsSymbolic = false;
4489     if (getLexer().isNot(AsmToken::Integer))
4490       return true;
4491     if (getParser().parseAbsoluteExpression(HwReg.Id))
4492       return true;
4493   }
4494 
4495   if (getLexer().is(AsmToken::RParen)) {
4496     Parser.Lex();
4497     return false;
4498   }
4499 
4500   // optional params
4501   if (getLexer().isNot(AsmToken::Comma))
4502     return true;
4503   Parser.Lex();
4504 
4505   if (getLexer().isNot(AsmToken::Integer))
4506     return true;
4507   if (getParser().parseAbsoluteExpression(Offset))
4508     return true;
4509 
4510   if (getLexer().isNot(AsmToken::Comma))
4511     return true;
4512   Parser.Lex();
4513 
4514   if (getLexer().isNot(AsmToken::Integer))
4515     return true;
4516   if (getParser().parseAbsoluteExpression(Width))
4517     return true;
4518 
4519   if (getLexer().isNot(AsmToken::RParen))
4520     return true;
4521   Parser.Lex();
4522 
4523   return false;
4524 }
4525 
4526 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4527   using namespace llvm::AMDGPU::Hwreg;
4528 
4529   int64_t Imm16Val = 0;
4530   SMLoc S = Parser.getTok().getLoc();
4531 
4532   switch(getLexer().getKind()) {
4533     default: return MatchOperand_NoMatch;
4534     case AsmToken::Integer:
4535       // The operand can be an integer value.
4536       if (getParser().parseAbsoluteExpression(Imm16Val))
4537         return MatchOperand_NoMatch;
4538       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4539         Error(S, "invalid immediate: only 16-bit values are legal");
4540         // Do not return error code, but create an imm operand anyway and proceed
4541         // to the next operand, if any. That avoids unneccessary error messages.
4542       }
4543       break;
4544 
4545     case AsmToken::Identifier: {
4546         OperandInfoTy HwReg(ID_UNKNOWN_);
4547         int64_t Offset = OFFSET_DEFAULT_;
4548         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
4549         if (parseHwregConstruct(HwReg, Offset, Width))
4550           return MatchOperand_ParseFail;
4551         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
4552           if (HwReg.IsSymbolic)
4553             Error(S, "invalid symbolic name of hardware register");
4554           else
4555             Error(S, "invalid code of hardware register: only 6-bit values are legal");
4556         }
4557         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
4558           Error(S, "invalid bit offset: only 5-bit values are legal");
4559         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
4560           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
4561         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
4562       }
4563       break;
4564   }
4565   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
4566   return MatchOperand_Success;
4567 }
4568 
4569 bool AMDGPUOperand::isSWaitCnt() const {
4570   return isImm();
4571 }
4572 
4573 bool AMDGPUOperand::isHwreg() const {
4574   return isImmTy(ImmTyHwreg);
4575 }
4576 
4577 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
4578   using namespace llvm::AMDGPU::SendMsg;
4579 
4580   if (Parser.getTok().getString() != "sendmsg")
4581     return true;
4582   Parser.Lex();
4583 
4584   if (getLexer().isNot(AsmToken::LParen))
4585     return true;
4586   Parser.Lex();
4587 
4588   if (getLexer().is(AsmToken::Identifier)) {
4589     Msg.IsSymbolic = true;
4590     Msg.Id = ID_UNKNOWN_;
4591     const std::string tok = Parser.getTok().getString();
4592     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
4593       switch(i) {
4594         default: continue; // Omit gaps.
4595         case ID_GS_ALLOC_REQ:
4596           if (isSI() || isCI() || isVI())
4597             continue;
4598           break;
4599         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:
4600         case ID_SYSMSG: break;
4601       }
4602       if (tok == IdSymbolic[i]) {
4603         Msg.Id = i;
4604         break;
4605       }
4606     }
4607     Parser.Lex();
4608   } else {
4609     Msg.IsSymbolic = false;
4610     if (getLexer().isNot(AsmToken::Integer))
4611       return true;
4612     if (getParser().parseAbsoluteExpression(Msg.Id))
4613       return true;
4614     if (getLexer().is(AsmToken::Integer))
4615       if (getParser().parseAbsoluteExpression(Msg.Id))
4616         Msg.Id = ID_UNKNOWN_;
4617   }
4618   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
4619     return false;
4620 
4621   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
4622     if (getLexer().isNot(AsmToken::RParen))
4623       return true;
4624     Parser.Lex();
4625     return false;
4626   }
4627 
4628   if (getLexer().isNot(AsmToken::Comma))
4629     return true;
4630   Parser.Lex();
4631 
4632   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
4633   Operation.Id = ID_UNKNOWN_;
4634   if (getLexer().is(AsmToken::Identifier)) {
4635     Operation.IsSymbolic = true;
4636     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
4637     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
4638     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
4639     const StringRef Tok = Parser.getTok().getString();
4640     for (int i = F; i < L; ++i) {
4641       if (Tok == S[i]) {
4642         Operation.Id = i;
4643         break;
4644       }
4645     }
4646     Parser.Lex();
4647   } else {
4648     Operation.IsSymbolic = false;
4649     if (getLexer().isNot(AsmToken::Integer))
4650       return true;
4651     if (getParser().parseAbsoluteExpression(Operation.Id))
4652       return true;
4653   }
4654 
4655   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4656     // Stream id is optional.
4657     if (getLexer().is(AsmToken::RParen)) {
4658       Parser.Lex();
4659       return false;
4660     }
4661 
4662     if (getLexer().isNot(AsmToken::Comma))
4663       return true;
4664     Parser.Lex();
4665 
4666     if (getLexer().isNot(AsmToken::Integer))
4667       return true;
4668     if (getParser().parseAbsoluteExpression(StreamId))
4669       return true;
4670   }
4671 
4672   if (getLexer().isNot(AsmToken::RParen))
4673     return true;
4674   Parser.Lex();
4675   return false;
4676 }
4677 
4678 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
4679   if (getLexer().getKind() != AsmToken::Identifier)
4680     return MatchOperand_NoMatch;
4681 
4682   StringRef Str = Parser.getTok().getString();
4683   int Slot = StringSwitch<int>(Str)
4684     .Case("p10", 0)
4685     .Case("p20", 1)
4686     .Case("p0", 2)
4687     .Default(-1);
4688 
4689   SMLoc S = Parser.getTok().getLoc();
4690   if (Slot == -1)
4691     return MatchOperand_ParseFail;
4692 
4693   Parser.Lex();
4694   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
4695                                               AMDGPUOperand::ImmTyInterpSlot));
4696   return MatchOperand_Success;
4697 }
4698 
4699 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
4700   if (getLexer().getKind() != AsmToken::Identifier)
4701     return MatchOperand_NoMatch;
4702 
4703   StringRef Str = Parser.getTok().getString();
4704   if (!Str.startswith("attr"))
4705     return MatchOperand_NoMatch;
4706 
4707   StringRef Chan = Str.take_back(2);
4708   int AttrChan = StringSwitch<int>(Chan)
4709     .Case(".x", 0)
4710     .Case(".y", 1)
4711     .Case(".z", 2)
4712     .Case(".w", 3)
4713     .Default(-1);
4714   if (AttrChan == -1)
4715     return MatchOperand_ParseFail;
4716 
4717   Str = Str.drop_back(2).drop_front(4);
4718 
4719   uint8_t Attr;
4720   if (Str.getAsInteger(10, Attr))
4721     return MatchOperand_ParseFail;
4722 
4723   SMLoc S = Parser.getTok().getLoc();
4724   Parser.Lex();
4725   if (Attr > 63) {
4726     Error(S, "out of bounds attr");
4727     return MatchOperand_Success;
4728   }
4729 
4730   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4731 
4732   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4733                                               AMDGPUOperand::ImmTyInterpAttr));
4734   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4735                                               AMDGPUOperand::ImmTyAttrChan));
4736   return MatchOperand_Success;
4737 }
4738 
4739 void AMDGPUAsmParser::errorExpTgt() {
4740   Error(Parser.getTok().getLoc(), "invalid exp target");
4741 }
4742 
4743 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4744                                                       uint8_t &Val) {
4745   if (Str == "null") {
4746     Val = 9;
4747     return MatchOperand_Success;
4748   }
4749 
4750   if (Str.startswith("mrt")) {
4751     Str = Str.drop_front(3);
4752     if (Str == "z") { // == mrtz
4753       Val = 8;
4754       return MatchOperand_Success;
4755     }
4756 
4757     if (Str.getAsInteger(10, Val))
4758       return MatchOperand_ParseFail;
4759 
4760     if (Val > 7)
4761       errorExpTgt();
4762 
4763     return MatchOperand_Success;
4764   }
4765 
4766   if (Str.startswith("pos")) {
4767     Str = Str.drop_front(3);
4768     if (Str.getAsInteger(10, Val))
4769       return MatchOperand_ParseFail;
4770 
4771     if (Val > 4 || (Val == 4 && !isGFX10()))
4772       errorExpTgt();
4773 
4774     Val += 12;
4775     return MatchOperand_Success;
4776   }
4777 
4778   if (isGFX10() && Str == "prim") {
4779     Val = 20;
4780     return MatchOperand_Success;
4781   }
4782 
4783   if (Str.startswith("param")) {
4784     Str = Str.drop_front(5);
4785     if (Str.getAsInteger(10, Val))
4786       return MatchOperand_ParseFail;
4787 
4788     if (Val >= 32)
4789       errorExpTgt();
4790 
4791     Val += 32;
4792     return MatchOperand_Success;
4793   }
4794 
4795   if (Str.startswith("invalid_target_")) {
4796     Str = Str.drop_front(15);
4797     if (Str.getAsInteger(10, Val))
4798       return MatchOperand_ParseFail;
4799 
4800     errorExpTgt();
4801     return MatchOperand_Success;
4802   }
4803 
4804   return MatchOperand_NoMatch;
4805 }
4806 
4807 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4808   uint8_t Val;
4809   StringRef Str = Parser.getTok().getString();
4810 
4811   auto Res = parseExpTgtImpl(Str, Val);
4812   if (Res != MatchOperand_Success)
4813     return Res;
4814 
4815   SMLoc S = Parser.getTok().getLoc();
4816   Parser.Lex();
4817 
4818   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4819                                               AMDGPUOperand::ImmTyExpTgt));
4820   return MatchOperand_Success;
4821 }
4822 
4823 OperandMatchResultTy
4824 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4825   using namespace llvm::AMDGPU::SendMsg;
4826 
4827   int64_t Imm16Val = 0;
4828   SMLoc S = Parser.getTok().getLoc();
4829 
4830   switch(getLexer().getKind()) {
4831   default:
4832     return MatchOperand_NoMatch;
4833   case AsmToken::Integer:
4834     // The operand can be an integer value.
4835     if (getParser().parseAbsoluteExpression(Imm16Val))
4836       return MatchOperand_NoMatch;
4837     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4838       Error(S, "invalid immediate: only 16-bit values are legal");
4839       // Do not return error code, but create an imm operand anyway and proceed
4840       // to the next operand, if any. That avoids unneccessary error messages.
4841     }
4842     break;
4843   case AsmToken::Identifier: {
4844       OperandInfoTy Msg(ID_UNKNOWN_);
4845       OperandInfoTy Operation(OP_UNKNOWN_);
4846       int64_t StreamId = STREAM_ID_DEFAULT_;
4847       if (parseSendMsgConstruct(Msg, Operation, StreamId))
4848         return MatchOperand_ParseFail;
4849       do {
4850         // Validate and encode message ID.
4851         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4852                 || (Msg.Id == ID_GS_ALLOC_REQ && !isSI() && !isCI() && !isVI())
4853                 || Msg.Id == ID_SYSMSG)) {
4854           if (Msg.IsSymbolic)
4855             Error(S, "invalid/unsupported symbolic name of message");
4856           else
4857             Error(S, "invalid/unsupported code of message");
4858           break;
4859         }
4860         Imm16Val = (Msg.Id << ID_SHIFT_);
4861         // Validate and encode operation ID.
4862         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4863           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4864             if (Operation.IsSymbolic)
4865               Error(S, "invalid symbolic name of GS_OP");
4866             else
4867               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4868             break;
4869           }
4870           if (Operation.Id == OP_GS_NOP
4871               && Msg.Id != ID_GS_DONE) {
4872             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
4873             break;
4874           }
4875           Imm16Val |= (Operation.Id << OP_SHIFT_);
4876         }
4877         if (Msg.Id == ID_SYSMSG) {
4878           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
4879             if (Operation.IsSymbolic)
4880               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
4881             else
4882               Error(S, "invalid/unsupported code of SYSMSG_OP");
4883             break;
4884           }
4885           Imm16Val |= (Operation.Id << OP_SHIFT_);
4886         }
4887         // Validate and encode stream ID.
4888         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4889           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
4890             Error(S, "invalid stream id: only 2-bit values are legal");
4891             break;
4892           }
4893           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
4894         }
4895       } while (false);
4896     }
4897     break;
4898   }
4899   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
4900   return MatchOperand_Success;
4901 }
4902 
4903 bool AMDGPUOperand::isSendMsg() const {
4904   return isImmTy(ImmTySendMsg);
4905 }
4906 
4907 //===----------------------------------------------------------------------===//
4908 // parser helpers
4909 //===----------------------------------------------------------------------===//
4910 
4911 bool
4912 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
4913   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
4914 }
4915 
4916 bool
4917 AMDGPUAsmParser::isId(const StringRef Id) const {
4918   return isId(getToken(), Id);
4919 }
4920 
4921 bool
4922 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
4923   return getTokenKind() == Kind;
4924 }
4925 
4926 bool
4927 AMDGPUAsmParser::trySkipId(const StringRef Id) {
4928   if (isId(Id)) {
4929     lex();
4930     return true;
4931   }
4932   return false;
4933 }
4934 
4935 bool
4936 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
4937   if (isId(Id) && peekToken().is(Kind)) {
4938     lex();
4939     lex();
4940     return true;
4941   }
4942   return false;
4943 }
4944 
4945 bool
4946 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
4947   if (isToken(Kind)) {
4948     lex();
4949     return true;
4950   }
4951   return false;
4952 }
4953 
4954 bool
4955 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
4956                            const StringRef ErrMsg) {
4957   if (!trySkipToken(Kind)) {
4958     Error(getLoc(), ErrMsg);
4959     return false;
4960   }
4961   return true;
4962 }
4963 
4964 bool
4965 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
4966   return !getParser().parseAbsoluteExpression(Imm);
4967 }
4968 
4969 bool
4970 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
4971   if (isToken(AsmToken::String)) {
4972     Val = getToken().getStringContents();
4973     lex();
4974     return true;
4975   } else {
4976     Error(getLoc(), ErrMsg);
4977     return false;
4978   }
4979 }
4980 
4981 AsmToken
4982 AMDGPUAsmParser::getToken() const {
4983   return Parser.getTok();
4984 }
4985 
4986 AsmToken
4987 AMDGPUAsmParser::peekToken() {
4988   return getLexer().peekTok();
4989 }
4990 
4991 void
4992 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
4993   auto TokCount = getLexer().peekTokens(Tokens);
4994 
4995   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
4996     Tokens[Idx] = AsmToken(AsmToken::Error, "");
4997 }
4998 
4999 AsmToken::TokenKind
5000 AMDGPUAsmParser::getTokenKind() const {
5001   return getLexer().getKind();
5002 }
5003 
5004 SMLoc
5005 AMDGPUAsmParser::getLoc() const {
5006   return getToken().getLoc();
5007 }
5008 
5009 StringRef
5010 AMDGPUAsmParser::getTokenStr() const {
5011   return getToken().getString();
5012 }
5013 
5014 void
5015 AMDGPUAsmParser::lex() {
5016   Parser.Lex();
5017 }
5018 
5019 //===----------------------------------------------------------------------===//
5020 // swizzle
5021 //===----------------------------------------------------------------------===//
5022 
5023 LLVM_READNONE
5024 static unsigned
5025 encodeBitmaskPerm(const unsigned AndMask,
5026                   const unsigned OrMask,
5027                   const unsigned XorMask) {
5028   using namespace llvm::AMDGPU::Swizzle;
5029 
5030   return BITMASK_PERM_ENC |
5031          (AndMask << BITMASK_AND_SHIFT) |
5032          (OrMask  << BITMASK_OR_SHIFT)  |
5033          (XorMask << BITMASK_XOR_SHIFT);
5034 }
5035 
5036 bool
5037 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5038                                       const unsigned MinVal,
5039                                       const unsigned MaxVal,
5040                                       const StringRef ErrMsg) {
5041   for (unsigned i = 0; i < OpNum; ++i) {
5042     if (!skipToken(AsmToken::Comma, "expected a comma")){
5043       return false;
5044     }
5045     SMLoc ExprLoc = Parser.getTok().getLoc();
5046     if (!parseExpr(Op[i])) {
5047       return false;
5048     }
5049     if (Op[i] < MinVal || Op[i] > MaxVal) {
5050       Error(ExprLoc, ErrMsg);
5051       return false;
5052     }
5053   }
5054 
5055   return true;
5056 }
5057 
5058 bool
5059 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5060   using namespace llvm::AMDGPU::Swizzle;
5061 
5062   int64_t Lane[LANE_NUM];
5063   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5064                            "expected a 2-bit lane id")) {
5065     Imm = QUAD_PERM_ENC;
5066     for (unsigned I = 0; I < LANE_NUM; ++I) {
5067       Imm |= Lane[I] << (LANE_SHIFT * I);
5068     }
5069     return true;
5070   }
5071   return false;
5072 }
5073 
5074 bool
5075 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5076   using namespace llvm::AMDGPU::Swizzle;
5077 
5078   SMLoc S = Parser.getTok().getLoc();
5079   int64_t GroupSize;
5080   int64_t LaneIdx;
5081 
5082   if (!parseSwizzleOperands(1, &GroupSize,
5083                             2, 32,
5084                             "group size must be in the interval [2,32]")) {
5085     return false;
5086   }
5087   if (!isPowerOf2_64(GroupSize)) {
5088     Error(S, "group size must be a power of two");
5089     return false;
5090   }
5091   if (parseSwizzleOperands(1, &LaneIdx,
5092                            0, GroupSize - 1,
5093                            "lane id must be in the interval [0,group size - 1]")) {
5094     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5095     return true;
5096   }
5097   return false;
5098 }
5099 
5100 bool
5101 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5102   using namespace llvm::AMDGPU::Swizzle;
5103 
5104   SMLoc S = Parser.getTok().getLoc();
5105   int64_t GroupSize;
5106 
5107   if (!parseSwizzleOperands(1, &GroupSize,
5108       2, 32, "group size must be in the interval [2,32]")) {
5109     return false;
5110   }
5111   if (!isPowerOf2_64(GroupSize)) {
5112     Error(S, "group size must be a power of two");
5113     return false;
5114   }
5115 
5116   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5117   return true;
5118 }
5119 
5120 bool
5121 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5122   using namespace llvm::AMDGPU::Swizzle;
5123 
5124   SMLoc S = Parser.getTok().getLoc();
5125   int64_t GroupSize;
5126 
5127   if (!parseSwizzleOperands(1, &GroupSize,
5128       1, 16, "group size must be in the interval [1,16]")) {
5129     return false;
5130   }
5131   if (!isPowerOf2_64(GroupSize)) {
5132     Error(S, "group size must be a power of two");
5133     return false;
5134   }
5135 
5136   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5137   return true;
5138 }
5139 
5140 bool
5141 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5142   using namespace llvm::AMDGPU::Swizzle;
5143 
5144   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5145     return false;
5146   }
5147 
5148   StringRef Ctl;
5149   SMLoc StrLoc = Parser.getTok().getLoc();
5150   if (!parseString(Ctl)) {
5151     return false;
5152   }
5153   if (Ctl.size() != BITMASK_WIDTH) {
5154     Error(StrLoc, "expected a 5-character mask");
5155     return false;
5156   }
5157 
5158   unsigned AndMask = 0;
5159   unsigned OrMask = 0;
5160   unsigned XorMask = 0;
5161 
5162   for (size_t i = 0; i < Ctl.size(); ++i) {
5163     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5164     switch(Ctl[i]) {
5165     default:
5166       Error(StrLoc, "invalid mask");
5167       return false;
5168     case '0':
5169       break;
5170     case '1':
5171       OrMask |= Mask;
5172       break;
5173     case 'p':
5174       AndMask |= Mask;
5175       break;
5176     case 'i':
5177       AndMask |= Mask;
5178       XorMask |= Mask;
5179       break;
5180     }
5181   }
5182 
5183   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5184   return true;
5185 }
5186 
5187 bool
5188 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5189 
5190   SMLoc OffsetLoc = Parser.getTok().getLoc();
5191 
5192   if (!parseExpr(Imm)) {
5193     return false;
5194   }
5195   if (!isUInt<16>(Imm)) {
5196     Error(OffsetLoc, "expected a 16-bit offset");
5197     return false;
5198   }
5199   return true;
5200 }
5201 
5202 bool
5203 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5204   using namespace llvm::AMDGPU::Swizzle;
5205 
5206   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5207 
5208     SMLoc ModeLoc = Parser.getTok().getLoc();
5209     bool Ok = false;
5210 
5211     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5212       Ok = parseSwizzleQuadPerm(Imm);
5213     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5214       Ok = parseSwizzleBitmaskPerm(Imm);
5215     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5216       Ok = parseSwizzleBroadcast(Imm);
5217     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5218       Ok = parseSwizzleSwap(Imm);
5219     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5220       Ok = parseSwizzleReverse(Imm);
5221     } else {
5222       Error(ModeLoc, "expected a swizzle mode");
5223     }
5224 
5225     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5226   }
5227 
5228   return false;
5229 }
5230 
5231 OperandMatchResultTy
5232 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5233   SMLoc S = Parser.getTok().getLoc();
5234   int64_t Imm = 0;
5235 
5236   if (trySkipId("offset")) {
5237 
5238     bool Ok = false;
5239     if (skipToken(AsmToken::Colon, "expected a colon")) {
5240       if (trySkipId("swizzle")) {
5241         Ok = parseSwizzleMacro(Imm);
5242       } else {
5243         Ok = parseSwizzleOffset(Imm);
5244       }
5245     }
5246 
5247     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5248 
5249     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5250   } else {
5251     // Swizzle "offset" operand is optional.
5252     // If it is omitted, try parsing other optional operands.
5253     return parseOptionalOpr(Operands);
5254   }
5255 }
5256 
5257 bool
5258 AMDGPUOperand::isSwizzle() const {
5259   return isImmTy(ImmTySwizzle);
5260 }
5261 
5262 //===----------------------------------------------------------------------===//
5263 // VGPR Index Mode
5264 //===----------------------------------------------------------------------===//
5265 
5266 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5267 
5268   using namespace llvm::AMDGPU::VGPRIndexMode;
5269 
5270   if (trySkipToken(AsmToken::RParen)) {
5271     return OFF;
5272   }
5273 
5274   int64_t Imm = 0;
5275 
5276   while (true) {
5277     unsigned Mode = 0;
5278     SMLoc S = Parser.getTok().getLoc();
5279 
5280     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5281       if (trySkipId(IdSymbolic[ModeId])) {
5282         Mode = 1 << ModeId;
5283         break;
5284       }
5285     }
5286 
5287     if (Mode == 0) {
5288       Error(S, (Imm == 0)?
5289                "expected a VGPR index mode or a closing parenthesis" :
5290                "expected a VGPR index mode");
5291       break;
5292     }
5293 
5294     if (Imm & Mode) {
5295       Error(S, "duplicate VGPR index mode");
5296       break;
5297     }
5298     Imm |= Mode;
5299 
5300     if (trySkipToken(AsmToken::RParen))
5301       break;
5302     if (!skipToken(AsmToken::Comma,
5303                    "expected a comma or a closing parenthesis"))
5304       break;
5305   }
5306 
5307   return Imm;
5308 }
5309 
5310 OperandMatchResultTy
5311 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5312 
5313   int64_t Imm = 0;
5314   SMLoc S = Parser.getTok().getLoc();
5315 
5316   if (getLexer().getKind() == AsmToken::Identifier &&
5317       Parser.getTok().getString() == "gpr_idx" &&
5318       getLexer().peekTok().is(AsmToken::LParen)) {
5319 
5320     Parser.Lex();
5321     Parser.Lex();
5322 
5323     // If parse failed, trigger an error but do not return error code
5324     // to avoid excessive error messages.
5325     Imm = parseGPRIdxMacro();
5326 
5327   } else {
5328     if (getParser().parseAbsoluteExpression(Imm))
5329       return MatchOperand_NoMatch;
5330     if (Imm < 0 || !isUInt<4>(Imm)) {
5331       Error(S, "invalid immediate: only 4-bit values are legal");
5332     }
5333   }
5334 
5335   Operands.push_back(
5336       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5337   return MatchOperand_Success;
5338 }
5339 
5340 bool AMDGPUOperand::isGPRIdxMode() const {
5341   return isImmTy(ImmTyGprIdxMode);
5342 }
5343 
5344 //===----------------------------------------------------------------------===//
5345 // sopp branch targets
5346 //===----------------------------------------------------------------------===//
5347 
5348 OperandMatchResultTy
5349 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5350   SMLoc S = Parser.getTok().getLoc();
5351 
5352   switch (getLexer().getKind()) {
5353     default: return MatchOperand_ParseFail;
5354     case AsmToken::Integer: {
5355       int64_t Imm;
5356       if (getParser().parseAbsoluteExpression(Imm))
5357         return MatchOperand_ParseFail;
5358       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
5359       return MatchOperand_Success;
5360     }
5361 
5362     case AsmToken::Identifier:
5363       Operands.push_back(AMDGPUOperand::CreateExpr(this,
5364           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
5365                                   Parser.getTok().getString()), getContext()), S));
5366       Parser.Lex();
5367       return MatchOperand_Success;
5368   }
5369 }
5370 
5371 //===----------------------------------------------------------------------===//
5372 // mubuf
5373 //===----------------------------------------------------------------------===//
5374 
5375 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5376   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5377 }
5378 
5379 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5380   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5381 }
5382 
5383 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5384   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5385 }
5386 
5387 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5388                                const OperandVector &Operands,
5389                                bool IsAtomic,
5390                                bool IsAtomicReturn,
5391                                bool IsLds) {
5392   bool IsLdsOpcode = IsLds;
5393   bool HasLdsModifier = false;
5394   OptionalImmIndexMap OptionalIdx;
5395   assert(IsAtomicReturn ? IsAtomic : true);
5396   unsigned FirstOperandIdx = 1;
5397 
5398   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5399     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5400 
5401     // Add the register arguments
5402     if (Op.isReg()) {
5403       Op.addRegOperands(Inst, 1);
5404       // Insert a tied src for atomic return dst.
5405       // This cannot be postponed as subsequent calls to
5406       // addImmOperands rely on correct number of MC operands.
5407       if (IsAtomicReturn && i == FirstOperandIdx)
5408         Op.addRegOperands(Inst, 1);
5409       continue;
5410     }
5411 
5412     // Handle the case where soffset is an immediate
5413     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5414       Op.addImmOperands(Inst, 1);
5415       continue;
5416     }
5417 
5418     HasLdsModifier |= Op.isLDS();
5419 
5420     // Handle tokens like 'offen' which are sometimes hard-coded into the
5421     // asm string.  There are no MCInst operands for these.
5422     if (Op.isToken()) {
5423       continue;
5424     }
5425     assert(Op.isImm());
5426 
5427     // Handle optional arguments
5428     OptionalIdx[Op.getImmTy()] = i;
5429   }
5430 
5431   // This is a workaround for an llvm quirk which may result in an
5432   // incorrect instruction selection. Lds and non-lds versions of
5433   // MUBUF instructions are identical except that lds versions
5434   // have mandatory 'lds' modifier. However this modifier follows
5435   // optional modifiers and llvm asm matcher regards this 'lds'
5436   // modifier as an optional one. As a result, an lds version
5437   // of opcode may be selected even if it has no 'lds' modifier.
5438   if (IsLdsOpcode && !HasLdsModifier) {
5439     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5440     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5441       Inst.setOpcode(NoLdsOpcode);
5442       IsLdsOpcode = false;
5443     }
5444   }
5445 
5446   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5447   if (!IsAtomic) { // glc is hard-coded.
5448     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5449   }
5450   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5451 
5452   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5453     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5454   }
5455 
5456   if (isGFX10())
5457     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5458 }
5459 
5460 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5461   OptionalImmIndexMap OptionalIdx;
5462 
5463   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5464     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5465 
5466     // Add the register arguments
5467     if (Op.isReg()) {
5468       Op.addRegOperands(Inst, 1);
5469       continue;
5470     }
5471 
5472     // Handle the case where soffset is an immediate
5473     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5474       Op.addImmOperands(Inst, 1);
5475       continue;
5476     }
5477 
5478     // Handle tokens like 'offen' which are sometimes hard-coded into the
5479     // asm string.  There are no MCInst operands for these.
5480     if (Op.isToken()) {
5481       continue;
5482     }
5483     assert(Op.isImm());
5484 
5485     // Handle optional arguments
5486     OptionalIdx[Op.getImmTy()] = i;
5487   }
5488 
5489   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5490                         AMDGPUOperand::ImmTyOffset);
5491   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5492   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5493   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5494   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5495 
5496   if (isGFX10())
5497     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5498 }
5499 
5500 //===----------------------------------------------------------------------===//
5501 // mimg
5502 //===----------------------------------------------------------------------===//
5503 
5504 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5505                               bool IsAtomic) {
5506   unsigned I = 1;
5507   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5508   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5509     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5510   }
5511 
5512   if (IsAtomic) {
5513     // Add src, same as dst
5514     assert(Desc.getNumDefs() == 1);
5515     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5516   }
5517 
5518   OptionalImmIndexMap OptionalIdx;
5519 
5520   for (unsigned E = Operands.size(); I != E; ++I) {
5521     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5522 
5523     // Add the register arguments
5524     if (Op.isReg()) {
5525       Op.addRegOperands(Inst, 1);
5526     } else if (Op.isImmModifier()) {
5527       OptionalIdx[Op.getImmTy()] = I;
5528     } else if (!Op.isToken()) {
5529       llvm_unreachable("unexpected operand type");
5530     }
5531   }
5532 
5533   bool IsGFX10 = isGFX10();
5534 
5535   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5536   if (IsGFX10)
5537     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5538   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5539   if (IsGFX10)
5540     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5541   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5542   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5543   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5544   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5545   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5546   if (!IsGFX10)
5547     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5548   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5549 }
5550 
5551 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5552   cvtMIMG(Inst, Operands, true);
5553 }
5554 
5555 //===----------------------------------------------------------------------===//
5556 // smrd
5557 //===----------------------------------------------------------------------===//
5558 
5559 bool AMDGPUOperand::isSMRDOffset8() const {
5560   return isImm() && isUInt<8>(getImm());
5561 }
5562 
5563 bool AMDGPUOperand::isSMRDOffset20() const {
5564   return isImm() && isUInt<20>(getImm());
5565 }
5566 
5567 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5568   // 32-bit literals are only supported on CI and we only want to use them
5569   // when the offset is > 8-bits.
5570   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5571 }
5572 
5573 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5574   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5575 }
5576 
5577 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5578   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5579 }
5580 
5581 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5582   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5583 }
5584 
5585 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
5586   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5587 }
5588 
5589 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
5590   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5591 }
5592 
5593 //===----------------------------------------------------------------------===//
5594 // vop3
5595 //===----------------------------------------------------------------------===//
5596 
5597 static bool ConvertOmodMul(int64_t &Mul) {
5598   if (Mul != 1 && Mul != 2 && Mul != 4)
5599     return false;
5600 
5601   Mul >>= 1;
5602   return true;
5603 }
5604 
5605 static bool ConvertOmodDiv(int64_t &Div) {
5606   if (Div == 1) {
5607     Div = 0;
5608     return true;
5609   }
5610 
5611   if (Div == 2) {
5612     Div = 3;
5613     return true;
5614   }
5615 
5616   return false;
5617 }
5618 
5619 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5620   if (BoundCtrl == 0) {
5621     BoundCtrl = 1;
5622     return true;
5623   }
5624 
5625   if (BoundCtrl == -1) {
5626     BoundCtrl = 0;
5627     return true;
5628   }
5629 
5630   return false;
5631 }
5632 
5633 // Note: the order in this table matches the order of operands in AsmString.
5634 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5635   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5636   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5637   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5638   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5639   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5640   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5641   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5642   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5643   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5644   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
5645   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5646   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5647   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5648   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5649   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5650   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5651   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5652   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5653   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5654   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5655   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5656   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5657   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5658   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5659   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5660   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
5661   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5662   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5663   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5664   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5665   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5666   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5667   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5668   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5669   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5670   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5671   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5672   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5673   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
5674 };
5675 
5676 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5677   unsigned size = Operands.size();
5678   assert(size > 0);
5679 
5680   OperandMatchResultTy res = parseOptionalOpr(Operands);
5681 
5682   // This is a hack to enable hardcoded mandatory operands which follow
5683   // optional operands.
5684   //
5685   // Current design assumes that all operands after the first optional operand
5686   // are also optional. However implementation of some instructions violates
5687   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5688   //
5689   // To alleviate this problem, we have to (implicitly) parse extra operands
5690   // to make sure autogenerated parser of custom operands never hit hardcoded
5691   // mandatory operands.
5692 
5693   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5694 
5695     // We have parsed the first optional operand.
5696     // Parse as many operands as necessary to skip all mandatory operands.
5697 
5698     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5699       if (res != MatchOperand_Success ||
5700           getLexer().is(AsmToken::EndOfStatement)) break;
5701       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5702       res = parseOptionalOpr(Operands);
5703     }
5704   }
5705 
5706   return res;
5707 }
5708 
5709 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
5710   OperandMatchResultTy res;
5711   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
5712     // try to parse any optional operand here
5713     if (Op.IsBit) {
5714       res = parseNamedBit(Op.Name, Operands, Op.Type);
5715     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
5716       res = parseOModOperand(Operands);
5717     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
5718                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
5719                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
5720       res = parseSDWASel(Operands, Op.Name, Op.Type);
5721     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
5722       res = parseSDWADstUnused(Operands);
5723     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
5724                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
5725                Op.Type == AMDGPUOperand::ImmTyNegLo ||
5726                Op.Type == AMDGPUOperand::ImmTyNegHi) {
5727       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
5728                                         Op.ConvertResult);
5729     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
5730       res = parseDim(Operands);
5731     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
5732       res = parseDfmtNfmt(Operands);
5733     } else {
5734       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
5735     }
5736     if (res != MatchOperand_NoMatch) {
5737       return res;
5738     }
5739   }
5740   return MatchOperand_NoMatch;
5741 }
5742 
5743 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
5744   StringRef Name = Parser.getTok().getString();
5745   if (Name == "mul") {
5746     return parseIntWithPrefix("mul", Operands,
5747                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
5748   }
5749 
5750   if (Name == "div") {
5751     return parseIntWithPrefix("div", Operands,
5752                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
5753   }
5754 
5755   return MatchOperand_NoMatch;
5756 }
5757 
5758 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
5759   cvtVOP3P(Inst, Operands);
5760 
5761   int Opc = Inst.getOpcode();
5762 
5763   int SrcNum;
5764   const int Ops[] = { AMDGPU::OpName::src0,
5765                       AMDGPU::OpName::src1,
5766                       AMDGPU::OpName::src2 };
5767   for (SrcNum = 0;
5768        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
5769        ++SrcNum);
5770   assert(SrcNum > 0);
5771 
5772   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5773   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5774 
5775   if ((OpSel & (1 << SrcNum)) != 0) {
5776     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
5777     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
5778     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
5779   }
5780 }
5781 
5782 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
5783       // 1. This operand is input modifiers
5784   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
5785       // 2. This is not last operand
5786       && Desc.NumOperands > (OpNum + 1)
5787       // 3. Next operand is register class
5788       && Desc.OpInfo[OpNum + 1].RegClass != -1
5789       // 4. Next register is not tied to any other operand
5790       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
5791 }
5792 
5793 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
5794 {
5795   OptionalImmIndexMap OptionalIdx;
5796   unsigned Opc = Inst.getOpcode();
5797 
5798   unsigned I = 1;
5799   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5800   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5801     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5802   }
5803 
5804   for (unsigned E = Operands.size(); I != E; ++I) {
5805     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5806     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5807       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5808     } else if (Op.isInterpSlot() ||
5809                Op.isInterpAttr() ||
5810                Op.isAttrChan()) {
5811       Inst.addOperand(MCOperand::createImm(Op.getImm()));
5812     } else if (Op.isImmModifier()) {
5813       OptionalIdx[Op.getImmTy()] = I;
5814     } else {
5815       llvm_unreachable("unhandled operand type");
5816     }
5817   }
5818 
5819   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
5820     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
5821   }
5822 
5823   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5824     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5825   }
5826 
5827   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5828     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5829   }
5830 }
5831 
5832 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
5833                               OptionalImmIndexMap &OptionalIdx) {
5834   unsigned Opc = Inst.getOpcode();
5835 
5836   unsigned I = 1;
5837   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5838   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5839     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5840   }
5841 
5842   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
5843     // This instruction has src modifiers
5844     for (unsigned E = Operands.size(); I != E; ++I) {
5845       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5846       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5847         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5848       } else if (Op.isImmModifier()) {
5849         OptionalIdx[Op.getImmTy()] = I;
5850       } else if (Op.isRegOrImm()) {
5851         Op.addRegOrImmOperands(Inst, 1);
5852       } else {
5853         llvm_unreachable("unhandled operand type");
5854       }
5855     }
5856   } else {
5857     // No src modifiers
5858     for (unsigned E = Operands.size(); I != E; ++I) {
5859       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5860       if (Op.isMod()) {
5861         OptionalIdx[Op.getImmTy()] = I;
5862       } else {
5863         Op.addRegOrImmOperands(Inst, 1);
5864       }
5865     }
5866   }
5867 
5868   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5869     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5870   }
5871 
5872   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5873     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5874   }
5875 
5876   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
5877   // it has src2 register operand that is tied to dst operand
5878   // we don't allow modifiers for this operand in assembler so src2_modifiers
5879   // should be 0.
5880   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
5881       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
5882       Opc == AMDGPU::V_MAC_F32_e64_vi ||
5883       Opc == AMDGPU::V_MAC_F16_e64_vi ||
5884       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
5885       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
5886       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
5887     auto it = Inst.begin();
5888     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5889     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5890     ++it;
5891     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5892   }
5893 }
5894 
5895 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5896   OptionalImmIndexMap OptionalIdx;
5897   cvtVOP3(Inst, Operands, OptionalIdx);
5898 }
5899 
5900 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5901                                const OperandVector &Operands) {
5902   OptionalImmIndexMap OptIdx;
5903   const int Opc = Inst.getOpcode();
5904   const MCInstrDesc &Desc = MII.get(Opc);
5905 
5906   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5907 
5908   cvtVOP3(Inst, Operands, OptIdx);
5909 
5910   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5911     assert(!IsPacked);
5912     Inst.addOperand(Inst.getOperand(0));
5913   }
5914 
5915   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5916   // instruction, and then figure out where to actually put the modifiers
5917 
5918   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
5919 
5920   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5921   if (OpSelHiIdx != -1) {
5922     int DefaultVal = IsPacked ? -1 : 0;
5923     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
5924                           DefaultVal);
5925   }
5926 
5927   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
5928   if (NegLoIdx != -1) {
5929     assert(IsPacked);
5930     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
5931     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
5932   }
5933 
5934   const int Ops[] = { AMDGPU::OpName::src0,
5935                       AMDGPU::OpName::src1,
5936                       AMDGPU::OpName::src2 };
5937   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
5938                          AMDGPU::OpName::src1_modifiers,
5939                          AMDGPU::OpName::src2_modifiers };
5940 
5941   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5942 
5943   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5944   unsigned OpSelHi = 0;
5945   unsigned NegLo = 0;
5946   unsigned NegHi = 0;
5947 
5948   if (OpSelHiIdx != -1) {
5949     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5950   }
5951 
5952   if (NegLoIdx != -1) {
5953     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
5954     NegLo = Inst.getOperand(NegLoIdx).getImm();
5955     NegHi = Inst.getOperand(NegHiIdx).getImm();
5956   }
5957 
5958   for (int J = 0; J < 3; ++J) {
5959     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
5960     if (OpIdx == -1)
5961       break;
5962 
5963     uint32_t ModVal = 0;
5964 
5965     if ((OpSel & (1 << J)) != 0)
5966       ModVal |= SISrcMods::OP_SEL_0;
5967 
5968     if ((OpSelHi & (1 << J)) != 0)
5969       ModVal |= SISrcMods::OP_SEL_1;
5970 
5971     if ((NegLo & (1 << J)) != 0)
5972       ModVal |= SISrcMods::NEG;
5973 
5974     if ((NegHi & (1 << J)) != 0)
5975       ModVal |= SISrcMods::NEG_HI;
5976 
5977     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
5978 
5979     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
5980   }
5981 }
5982 
5983 //===----------------------------------------------------------------------===//
5984 // dpp
5985 //===----------------------------------------------------------------------===//
5986 
5987 bool AMDGPUOperand::isDPPCtrl() const {
5988   using namespace AMDGPU::DPP;
5989 
5990   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
5991   if (result) {
5992     int64_t Imm = getImm();
5993     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
5994            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
5995            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
5996            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
5997            (Imm == DppCtrl::WAVE_SHL1) ||
5998            (Imm == DppCtrl::WAVE_ROL1) ||
5999            (Imm == DppCtrl::WAVE_SHR1) ||
6000            (Imm == DppCtrl::WAVE_ROR1) ||
6001            (Imm == DppCtrl::ROW_MIRROR) ||
6002            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6003            (Imm == DppCtrl::BCAST15) ||
6004            (Imm == DppCtrl::BCAST31);
6005   }
6006   return false;
6007 }
6008 
6009 bool AMDGPUOperand::isS16Imm() const {
6010   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6011 }
6012 
6013 bool AMDGPUOperand::isU16Imm() const {
6014   return isImm() && isUInt<16>(getImm());
6015 }
6016 
6017 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6018   if (!isGFX10())
6019     return MatchOperand_NoMatch;
6020 
6021   SMLoc S = Parser.getTok().getLoc();
6022 
6023   if (getLexer().isNot(AsmToken::Identifier))
6024     return MatchOperand_NoMatch;
6025   if (getLexer().getTok().getString() != "dim")
6026     return MatchOperand_NoMatch;
6027 
6028   Parser.Lex();
6029   if (getLexer().isNot(AsmToken::Colon))
6030     return MatchOperand_ParseFail;
6031 
6032   Parser.Lex();
6033 
6034   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6035   // integer.
6036   std::string Token;
6037   if (getLexer().is(AsmToken::Integer)) {
6038     SMLoc Loc = getLexer().getTok().getEndLoc();
6039     Token = getLexer().getTok().getString();
6040     Parser.Lex();
6041     if (getLexer().getTok().getLoc() != Loc)
6042       return MatchOperand_ParseFail;
6043   }
6044   if (getLexer().isNot(AsmToken::Identifier))
6045     return MatchOperand_ParseFail;
6046   Token += getLexer().getTok().getString();
6047 
6048   StringRef DimId = Token;
6049   if (DimId.startswith("SQ_RSRC_IMG_"))
6050     DimId = DimId.substr(12);
6051 
6052   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6053   if (!DimInfo)
6054     return MatchOperand_ParseFail;
6055 
6056   Parser.Lex();
6057 
6058   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6059                                               AMDGPUOperand::ImmTyDim));
6060   return MatchOperand_Success;
6061 }
6062 
6063 OperandMatchResultTy
6064 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6065   using namespace AMDGPU::DPP;
6066 
6067   SMLoc S = Parser.getTok().getLoc();
6068   StringRef Prefix;
6069   int64_t Int;
6070 
6071   if (getLexer().getKind() == AsmToken::Identifier) {
6072     Prefix = Parser.getTok().getString();
6073   } else {
6074     return MatchOperand_NoMatch;
6075   }
6076 
6077   if (Prefix == "row_mirror") {
6078     Int = DppCtrl::ROW_MIRROR;
6079     Parser.Lex();
6080   } else if (Prefix == "row_half_mirror") {
6081     Int = DppCtrl::ROW_HALF_MIRROR;
6082     Parser.Lex();
6083   } else {
6084     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6085     if (Prefix != "quad_perm"
6086         && Prefix != "row_shl"
6087         && Prefix != "row_shr"
6088         && Prefix != "row_ror"
6089         && Prefix != "wave_shl"
6090         && Prefix != "wave_rol"
6091         && Prefix != "wave_shr"
6092         && Prefix != "wave_ror"
6093         && Prefix != "row_bcast") {
6094       return MatchOperand_NoMatch;
6095     }
6096 
6097     Parser.Lex();
6098     if (getLexer().isNot(AsmToken::Colon))
6099       return MatchOperand_ParseFail;
6100 
6101     if (Prefix == "quad_perm") {
6102       // quad_perm:[%d,%d,%d,%d]
6103       Parser.Lex();
6104       if (getLexer().isNot(AsmToken::LBrac))
6105         return MatchOperand_ParseFail;
6106       Parser.Lex();
6107 
6108       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6109         return MatchOperand_ParseFail;
6110 
6111       for (int i = 0; i < 3; ++i) {
6112         if (getLexer().isNot(AsmToken::Comma))
6113           return MatchOperand_ParseFail;
6114         Parser.Lex();
6115 
6116         int64_t Temp;
6117         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6118           return MatchOperand_ParseFail;
6119         const int shift = i*2 + 2;
6120         Int += (Temp << shift);
6121       }
6122 
6123       if (getLexer().isNot(AsmToken::RBrac))
6124         return MatchOperand_ParseFail;
6125       Parser.Lex();
6126     } else {
6127       // sel:%d
6128       Parser.Lex();
6129       if (getParser().parseAbsoluteExpression(Int))
6130         return MatchOperand_ParseFail;
6131 
6132       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6133         Int |= DppCtrl::ROW_SHL0;
6134       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6135         Int |= DppCtrl::ROW_SHR0;
6136       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6137         Int |= DppCtrl::ROW_ROR0;
6138       } else if (Prefix == "wave_shl" && 1 == Int) {
6139         Int = DppCtrl::WAVE_SHL1;
6140       } else if (Prefix == "wave_rol" && 1 == Int) {
6141         Int = DppCtrl::WAVE_ROL1;
6142       } else if (Prefix == "wave_shr" && 1 == Int) {
6143         Int = DppCtrl::WAVE_SHR1;
6144       } else if (Prefix == "wave_ror" && 1 == Int) {
6145         Int = DppCtrl::WAVE_ROR1;
6146       } else if (Prefix == "row_bcast") {
6147         if (Int == 15) {
6148           Int = DppCtrl::BCAST15;
6149         } else if (Int == 31) {
6150           Int = DppCtrl::BCAST31;
6151         } else {
6152           return MatchOperand_ParseFail;
6153         }
6154       } else {
6155         return MatchOperand_ParseFail;
6156       }
6157     }
6158   }
6159 
6160   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6161   return MatchOperand_Success;
6162 }
6163 
6164 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6165   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6166 }
6167 
6168 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6169   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6170 }
6171 
6172 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6173   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6174 }
6175 
6176 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6177   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6178 }
6179 
6180 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
6181   OptionalImmIndexMap OptionalIdx;
6182 
6183   unsigned I = 1;
6184   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6185   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6186     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6187   }
6188 
6189   for (unsigned E = Operands.size(); I != E; ++I) {
6190     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6191                                             MCOI::TIED_TO);
6192     if (TiedTo != -1) {
6193       assert((unsigned)TiedTo < Inst.getNumOperands());
6194       // handle tied old or src2 for MAC instructions
6195       Inst.addOperand(Inst.getOperand(TiedTo));
6196     }
6197     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6198     // Add the register arguments
6199     if (Op.isReg() && Op.getReg() == AMDGPU::VCC) {
6200       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6201       // Skip it.
6202       continue;
6203     }
6204     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6205       Op.addRegWithFPInputModsOperands(Inst, 2);
6206     } else if (Op.isDPPCtrl()) {
6207       Op.addImmOperands(Inst, 1);
6208     } else if (Op.isImm()) {
6209       // Handle optional arguments
6210       OptionalIdx[Op.getImmTy()] = I;
6211     } else {
6212       llvm_unreachable("Invalid operand type");
6213     }
6214   }
6215 
6216   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6217   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6218   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6219 }
6220 
6221 //===----------------------------------------------------------------------===//
6222 // sdwa
6223 //===----------------------------------------------------------------------===//
6224 
6225 OperandMatchResultTy
6226 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6227                               AMDGPUOperand::ImmTy Type) {
6228   using namespace llvm::AMDGPU::SDWA;
6229 
6230   SMLoc S = Parser.getTok().getLoc();
6231   StringRef Value;
6232   OperandMatchResultTy res;
6233 
6234   res = parseStringWithPrefix(Prefix, Value);
6235   if (res != MatchOperand_Success) {
6236     return res;
6237   }
6238 
6239   int64_t Int;
6240   Int = StringSwitch<int64_t>(Value)
6241         .Case("BYTE_0", SdwaSel::BYTE_0)
6242         .Case("BYTE_1", SdwaSel::BYTE_1)
6243         .Case("BYTE_2", SdwaSel::BYTE_2)
6244         .Case("BYTE_3", SdwaSel::BYTE_3)
6245         .Case("WORD_0", SdwaSel::WORD_0)
6246         .Case("WORD_1", SdwaSel::WORD_1)
6247         .Case("DWORD", SdwaSel::DWORD)
6248         .Default(0xffffffff);
6249   Parser.Lex(); // eat last token
6250 
6251   if (Int == 0xffffffff) {
6252     return MatchOperand_ParseFail;
6253   }
6254 
6255   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6256   return MatchOperand_Success;
6257 }
6258 
6259 OperandMatchResultTy
6260 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6261   using namespace llvm::AMDGPU::SDWA;
6262 
6263   SMLoc S = Parser.getTok().getLoc();
6264   StringRef Value;
6265   OperandMatchResultTy res;
6266 
6267   res = parseStringWithPrefix("dst_unused", Value);
6268   if (res != MatchOperand_Success) {
6269     return res;
6270   }
6271 
6272   int64_t Int;
6273   Int = StringSwitch<int64_t>(Value)
6274         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6275         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6276         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6277         .Default(0xffffffff);
6278   Parser.Lex(); // eat last token
6279 
6280   if (Int == 0xffffffff) {
6281     return MatchOperand_ParseFail;
6282   }
6283 
6284   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6285   return MatchOperand_Success;
6286 }
6287 
6288 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6289   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6290 }
6291 
6292 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6293   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6294 }
6295 
6296 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6297   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6298 }
6299 
6300 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6301   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6302 }
6303 
6304 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6305                               uint64_t BasicInstType, bool skipVcc) {
6306   using namespace llvm::AMDGPU::SDWA;
6307 
6308   OptionalImmIndexMap OptionalIdx;
6309   bool skippedVcc = false;
6310 
6311   unsigned I = 1;
6312   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6313   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6314     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6315   }
6316 
6317   for (unsigned E = Operands.size(); I != E; ++I) {
6318     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6319     if (skipVcc && !skippedVcc && Op.isReg() && Op.getReg() == AMDGPU::VCC) {
6320       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6321       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6322       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6323       // Skip VCC only if we didn't skip it on previous iteration.
6324       if (BasicInstType == SIInstrFlags::VOP2 &&
6325           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6326         skippedVcc = true;
6327         continue;
6328       } else if (BasicInstType == SIInstrFlags::VOPC &&
6329                  Inst.getNumOperands() == 0) {
6330         skippedVcc = true;
6331         continue;
6332       }
6333     }
6334     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6335       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6336     } else if (Op.isImm()) {
6337       // Handle optional arguments
6338       OptionalIdx[Op.getImmTy()] = I;
6339     } else {
6340       llvm_unreachable("Invalid operand type");
6341     }
6342     skippedVcc = false;
6343   }
6344 
6345   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6346       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6347       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6348     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6349     switch (BasicInstType) {
6350     case SIInstrFlags::VOP1:
6351       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6352       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6353         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6354       }
6355       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6356       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6357       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6358       break;
6359 
6360     case SIInstrFlags::VOP2:
6361       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6362       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6363         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6364       }
6365       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6366       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6367       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6368       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6369       break;
6370 
6371     case SIInstrFlags::VOPC:
6372       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6373         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6374       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6375       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6376       break;
6377 
6378     default:
6379       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6380     }
6381   }
6382 
6383   // special case v_mac_{f16, f32}:
6384   // it has src2 register operand that is tied to dst operand
6385   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6386       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6387     auto it = Inst.begin();
6388     std::advance(
6389       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6390     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6391   }
6392 }
6393 
6394 /// Force static initialization.
6395 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6396   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6397   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6398 }
6399 
6400 #define GET_REGISTER_MATCHER
6401 #define GET_MATCHER_IMPLEMENTATION
6402 #define GET_MNEMONIC_SPELL_CHECKER
6403 #include "AMDGPUGenAsmMatcher.inc"
6404 
6405 // This fuction should be defined after auto-generated include so that we have
6406 // MatchClassKind enum defined
6407 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6408                                                      unsigned Kind) {
6409   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6410   // But MatchInstructionImpl() expects to meet token and fails to validate
6411   // operand. This method checks if we are given immediate operand but expect to
6412   // get corresponding token.
6413   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6414   switch (Kind) {
6415   case MCK_addr64:
6416     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6417   case MCK_gds:
6418     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6419   case MCK_lds:
6420     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6421   case MCK_glc:
6422     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6423   case MCK_idxen:
6424     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6425   case MCK_offen:
6426     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6427   case MCK_SSrcB32:
6428     // When operands have expression values, they will return true for isToken,
6429     // because it is not possible to distinguish between a token and an
6430     // expression at parse time. MatchInstructionImpl() will always try to
6431     // match an operand as a token, when isToken returns true, and when the
6432     // name of the expression is not a valid token, the match will fail,
6433     // so we need to handle it here.
6434     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6435   case MCK_SSrcF32:
6436     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6437   case MCK_SoppBrTarget:
6438     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6439   case MCK_VReg32OrOff:
6440     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6441   case MCK_InterpSlot:
6442     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6443   case MCK_Attr:
6444     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6445   case MCK_AttrChan:
6446     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6447   default:
6448     return Match_InvalidOperand;
6449   }
6450 }
6451 
6452 //===----------------------------------------------------------------------===//
6453 // endpgm
6454 //===----------------------------------------------------------------------===//
6455 
6456 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6457   SMLoc S = Parser.getTok().getLoc();
6458   int64_t Imm = 0;
6459 
6460   if (!parseExpr(Imm)) {
6461     // The operand is optional, if not present default to 0
6462     Imm = 0;
6463   }
6464 
6465   if (!isUInt<16>(Imm)) {
6466     Error(S, "expected a 16-bit value");
6467     return MatchOperand_ParseFail;
6468   }
6469 
6470   Operands.push_back(
6471       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6472   return MatchOperand_Success;
6473 }
6474 
6475 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
6476