1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTyTFE,
147     ImmTyD16,
148     ImmTyClampSI,
149     ImmTyOModSI,
150     ImmTyDppCtrl,
151     ImmTyDppRowMask,
152     ImmTyDppBankMask,
153     ImmTyDppBoundCtrl,
154     ImmTySdwaDstSel,
155     ImmTySdwaSrc0Sel,
156     ImmTySdwaSrc1Sel,
157     ImmTySdwaDstUnused,
158     ImmTyDMask,
159     ImmTyDim,
160     ImmTyUNorm,
161     ImmTyDA,
162     ImmTyR128A16,
163     ImmTyLWE,
164     ImmTyExpTgt,
165     ImmTyExpCompr,
166     ImmTyExpVM,
167     ImmTyFORMAT,
168     ImmTyHwreg,
169     ImmTyOff,
170     ImmTySendMsg,
171     ImmTyInterpSlot,
172     ImmTyInterpAttr,
173     ImmTyAttrChan,
174     ImmTyOpSel,
175     ImmTyOpSelHi,
176     ImmTyNegLo,
177     ImmTyNegHi,
178     ImmTySwizzle,
179     ImmTyGprIdxMode,
180     ImmTyEndpgm,
181     ImmTyHigh
182   };
183 
184 private:
185   struct TokOp {
186     const char *Data;
187     unsigned Length;
188   };
189 
190   struct ImmOp {
191     int64_t Val;
192     ImmTy Type;
193     bool IsFPImm;
194     Modifiers Mods;
195   };
196 
197   struct RegOp {
198     unsigned RegNo;
199     Modifiers Mods;
200   };
201 
202   union {
203     TokOp Tok;
204     ImmOp Imm;
205     RegOp Reg;
206     const MCExpr *Expr;
207   };
208 
209 public:
210   bool isToken() const override {
211     if (Kind == Token)
212       return true;
213 
214     if (Kind != Expression || !Expr)
215       return false;
216 
217     // When parsing operands, we can't always tell if something was meant to be
218     // a token, like 'gds', or an expression that references a global variable.
219     // In this case, we assume the string is an expression, and if we need to
220     // interpret is a token, then we treat the symbol name as the token.
221     return isa<MCSymbolRefExpr>(Expr);
222   }
223 
224   bool isImm() const override {
225     return Kind == Immediate;
226   }
227 
228   bool isInlinableImm(MVT type) const;
229   bool isLiteralImm(MVT type) const;
230 
231   bool isRegKind() const {
232     return Kind == Register;
233   }
234 
235   bool isReg() const override {
236     return isRegKind() && !hasModifiers();
237   }
238 
239   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
240     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
241   }
242 
243   bool isRegOrImmWithInt16InputMods() const {
244     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
245   }
246 
247   bool isRegOrImmWithInt32InputMods() const {
248     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
249   }
250 
251   bool isRegOrImmWithInt64InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
253   }
254 
255   bool isRegOrImmWithFP16InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
257   }
258 
259   bool isRegOrImmWithFP32InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
261   }
262 
263   bool isRegOrImmWithFP64InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
265   }
266 
267   bool isVReg() const {
268     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
269            isRegClass(AMDGPU::VReg_64RegClassID) ||
270            isRegClass(AMDGPU::VReg_96RegClassID) ||
271            isRegClass(AMDGPU::VReg_128RegClassID) ||
272            isRegClass(AMDGPU::VReg_256RegClassID) ||
273            isRegClass(AMDGPU::VReg_512RegClassID);
274   }
275 
276   bool isVReg32() const {
277     return isRegClass(AMDGPU::VGPR_32RegClassID);
278   }
279 
280   bool isVReg32OrOff() const {
281     return isOff() || isVReg32();
282   }
283 
284   bool isSDWAOperand(MVT type) const;
285   bool isSDWAFP16Operand() const;
286   bool isSDWAFP32Operand() const;
287   bool isSDWAInt16Operand() const;
288   bool isSDWAInt32Operand() const;
289 
290   bool isImmTy(ImmTy ImmT) const {
291     return isImm() && Imm.Type == ImmT;
292   }
293 
294   bool isImmModifier() const {
295     return isImm() && Imm.Type != ImmTyNone;
296   }
297 
298   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
299   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
300   bool isDMask() const { return isImmTy(ImmTyDMask); }
301   bool isDim() const { return isImmTy(ImmTyDim); }
302   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
303   bool isDA() const { return isImmTy(ImmTyDA); }
304   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
305   bool isLWE() const { return isImmTy(ImmTyLWE); }
306   bool isOff() const { return isImmTy(ImmTyOff); }
307   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
308   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
309   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
310   bool isOffen() const { return isImmTy(ImmTyOffen); }
311   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
312   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
313   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
314   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
315   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
316 
317   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
318   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
319   bool isGDS() const { return isImmTy(ImmTyGDS); }
320   bool isLDS() const { return isImmTy(ImmTyLDS); }
321   bool isDLC() const { return isImmTy(ImmTyDLC); }
322   bool isGLC() const { return isImmTy(ImmTyGLC); }
323   bool isSLC() const { return isImmTy(ImmTySLC); }
324   bool isTFE() const { return isImmTy(ImmTyTFE); }
325   bool isD16() const { return isImmTy(ImmTyD16); }
326   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
327   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
328   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
329   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
330   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
331   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
332   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
333   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
334   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
335   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
336   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
337   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
338   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
339   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
340   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
341   bool isHigh() const { return isImmTy(ImmTyHigh); }
342 
343   bool isMod() const {
344     return isClampSI() || isOModSI();
345   }
346 
347   bool isRegOrImm() const {
348     return isReg() || isImm();
349   }
350 
351   bool isRegClass(unsigned RCID) const;
352 
353   bool isInlineValue() const;
354 
355   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
356     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
357   }
358 
359   bool isSCSrcB16() const {
360     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
361   }
362 
363   bool isSCSrcV2B16() const {
364     return isSCSrcB16();
365   }
366 
367   bool isSCSrcB32() const {
368     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
369   }
370 
371   bool isSCSrcB64() const {
372     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
373   }
374 
375   bool isSCSrcF16() const {
376     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
377   }
378 
379   bool isSCSrcV2F16() const {
380     return isSCSrcF16();
381   }
382 
383   bool isSCSrcF32() const {
384     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
385   }
386 
387   bool isSCSrcF64() const {
388     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
389   }
390 
391   bool isSSrcB32() const {
392     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
393   }
394 
395   bool isSSrcB16() const {
396     return isSCSrcB16() || isLiteralImm(MVT::i16);
397   }
398 
399   bool isSSrcV2B16() const {
400     llvm_unreachable("cannot happen");
401     return isSSrcB16();
402   }
403 
404   bool isSSrcB64() const {
405     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
406     // See isVSrc64().
407     return isSCSrcB64() || isLiteralImm(MVT::i64);
408   }
409 
410   bool isSSrcF32() const {
411     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
412   }
413 
414   bool isSSrcF64() const {
415     return isSCSrcB64() || isLiteralImm(MVT::f64);
416   }
417 
418   bool isSSrcF16() const {
419     return isSCSrcB16() || isLiteralImm(MVT::f16);
420   }
421 
422   bool isSSrcV2F16() const {
423     llvm_unreachable("cannot happen");
424     return isSSrcF16();
425   }
426 
427   bool isSSrcOrLdsB32() const {
428     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
429            isLiteralImm(MVT::i32) || isExpr();
430   }
431 
432   bool isVCSrcB32() const {
433     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
434   }
435 
436   bool isVCSrcB64() const {
437     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
438   }
439 
440   bool isVCSrcB16() const {
441     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
442   }
443 
444   bool isVCSrcV2B16() const {
445     return isVCSrcB16();
446   }
447 
448   bool isVCSrcF32() const {
449     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
450   }
451 
452   bool isVCSrcF64() const {
453     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
454   }
455 
456   bool isVCSrcF16() const {
457     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
458   }
459 
460   bool isVCSrcV2F16() const {
461     return isVCSrcF16();
462   }
463 
464   bool isVSrcB32() const {
465     return isVCSrcF32() || isLiteralImm(MVT::i32);
466   }
467 
468   bool isVSrcB64() const {
469     return isVCSrcF64() || isLiteralImm(MVT::i64);
470   }
471 
472   bool isVSrcB16() const {
473     return isVCSrcF16() || isLiteralImm(MVT::i16);
474   }
475 
476   bool isVSrcV2B16() const {
477     return isVSrcB16() || isLiteralImm(MVT::v2i16);
478   }
479 
480   bool isVSrcF32() const {
481     return isVCSrcF32() || isLiteralImm(MVT::f32);
482   }
483 
484   bool isVSrcF64() const {
485     return isVCSrcF64() || isLiteralImm(MVT::f64);
486   }
487 
488   bool isVSrcF16() const {
489     return isVCSrcF16() || isLiteralImm(MVT::f16);
490   }
491 
492   bool isVSrcV2F16() const {
493     return isVSrcF16() || isLiteralImm(MVT::v2f16);
494   }
495 
496   bool isKImmFP32() const {
497     return isLiteralImm(MVT::f32);
498   }
499 
500   bool isKImmFP16() const {
501     return isLiteralImm(MVT::f16);
502   }
503 
504   bool isMem() const override {
505     return false;
506   }
507 
508   bool isExpr() const {
509     return Kind == Expression;
510   }
511 
512   bool isSoppBrTarget() const {
513     return isExpr() || isImm();
514   }
515 
516   bool isSWaitCnt() const;
517   bool isHwreg() const;
518   bool isSendMsg() const;
519   bool isSwizzle() const;
520   bool isSMRDOffset8() const;
521   bool isSMRDOffset20() const;
522   bool isSMRDLiteralOffset() const;
523   bool isDPPCtrl() const;
524   bool isGPRIdxMode() const;
525   bool isS16Imm() const;
526   bool isU16Imm() const;
527   bool isEndpgm() const;
528 
529   StringRef getExpressionAsToken() const {
530     assert(isExpr());
531     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
532     return S->getSymbol().getName();
533   }
534 
535   StringRef getToken() const {
536     assert(isToken());
537 
538     if (Kind == Expression)
539       return getExpressionAsToken();
540 
541     return StringRef(Tok.Data, Tok.Length);
542   }
543 
544   int64_t getImm() const {
545     assert(isImm());
546     return Imm.Val;
547   }
548 
549   ImmTy getImmTy() const {
550     assert(isImm());
551     return Imm.Type;
552   }
553 
554   unsigned getReg() const override {
555     assert(isRegKind());
556     return Reg.RegNo;
557   }
558 
559   SMLoc getStartLoc() const override {
560     return StartLoc;
561   }
562 
563   SMLoc getEndLoc() const override {
564     return EndLoc;
565   }
566 
567   SMRange getLocRange() const {
568     return SMRange(StartLoc, EndLoc);
569   }
570 
571   Modifiers getModifiers() const {
572     assert(isRegKind() || isImmTy(ImmTyNone));
573     return isRegKind() ? Reg.Mods : Imm.Mods;
574   }
575 
576   void setModifiers(Modifiers Mods) {
577     assert(isRegKind() || isImmTy(ImmTyNone));
578     if (isRegKind())
579       Reg.Mods = Mods;
580     else
581       Imm.Mods = Mods;
582   }
583 
584   bool hasModifiers() const {
585     return getModifiers().hasModifiers();
586   }
587 
588   bool hasFPModifiers() const {
589     return getModifiers().hasFPModifiers();
590   }
591 
592   bool hasIntModifiers() const {
593     return getModifiers().hasIntModifiers();
594   }
595 
596   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
597 
598   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
599 
600   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
601 
602   template <unsigned Bitwidth>
603   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
604 
605   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
606     addKImmFPOperands<16>(Inst, N);
607   }
608 
609   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
610     addKImmFPOperands<32>(Inst, N);
611   }
612 
613   void addRegOperands(MCInst &Inst, unsigned N) const;
614 
615   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
616     if (isRegKind())
617       addRegOperands(Inst, N);
618     else if (isExpr())
619       Inst.addOperand(MCOperand::createExpr(Expr));
620     else
621       addImmOperands(Inst, N);
622   }
623 
624   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
625     Modifiers Mods = getModifiers();
626     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
627     if (isRegKind()) {
628       addRegOperands(Inst, N);
629     } else {
630       addImmOperands(Inst, N, false);
631     }
632   }
633 
634   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
635     assert(!hasIntModifiers());
636     addRegOrImmWithInputModsOperands(Inst, N);
637   }
638 
639   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
640     assert(!hasFPModifiers());
641     addRegOrImmWithInputModsOperands(Inst, N);
642   }
643 
644   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
645     Modifiers Mods = getModifiers();
646     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
647     assert(isRegKind());
648     addRegOperands(Inst, N);
649   }
650 
651   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
652     assert(!hasIntModifiers());
653     addRegWithInputModsOperands(Inst, N);
654   }
655 
656   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
657     assert(!hasFPModifiers());
658     addRegWithInputModsOperands(Inst, N);
659   }
660 
661   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
662     if (isImm())
663       addImmOperands(Inst, N);
664     else {
665       assert(isExpr());
666       Inst.addOperand(MCOperand::createExpr(Expr));
667     }
668   }
669 
670   static void printImmTy(raw_ostream& OS, ImmTy Type) {
671     switch (Type) {
672     case ImmTyNone: OS << "None"; break;
673     case ImmTyGDS: OS << "GDS"; break;
674     case ImmTyLDS: OS << "LDS"; break;
675     case ImmTyOffen: OS << "Offen"; break;
676     case ImmTyIdxen: OS << "Idxen"; break;
677     case ImmTyAddr64: OS << "Addr64"; break;
678     case ImmTyOffset: OS << "Offset"; break;
679     case ImmTyInstOffset: OS << "InstOffset"; break;
680     case ImmTyOffset0: OS << "Offset0"; break;
681     case ImmTyOffset1: OS << "Offset1"; break;
682     case ImmTyDLC: OS << "DLC"; break;
683     case ImmTyGLC: OS << "GLC"; break;
684     case ImmTySLC: OS << "SLC"; break;
685     case ImmTyTFE: OS << "TFE"; break;
686     case ImmTyD16: OS << "D16"; break;
687     case ImmTyFORMAT: OS << "FORMAT"; break;
688     case ImmTyClampSI: OS << "ClampSI"; break;
689     case ImmTyOModSI: OS << "OModSI"; break;
690     case ImmTyDppCtrl: OS << "DppCtrl"; break;
691     case ImmTyDppRowMask: OS << "DppRowMask"; break;
692     case ImmTyDppBankMask: OS << "DppBankMask"; break;
693     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
694     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
695     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
696     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
697     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
698     case ImmTyDMask: OS << "DMask"; break;
699     case ImmTyDim: OS << "Dim"; break;
700     case ImmTyUNorm: OS << "UNorm"; break;
701     case ImmTyDA: OS << "DA"; break;
702     case ImmTyR128A16: OS << "R128A16"; break;
703     case ImmTyLWE: OS << "LWE"; break;
704     case ImmTyOff: OS << "Off"; break;
705     case ImmTyExpTgt: OS << "ExpTgt"; break;
706     case ImmTyExpCompr: OS << "ExpCompr"; break;
707     case ImmTyExpVM: OS << "ExpVM"; break;
708     case ImmTyHwreg: OS << "Hwreg"; break;
709     case ImmTySendMsg: OS << "SendMsg"; break;
710     case ImmTyInterpSlot: OS << "InterpSlot"; break;
711     case ImmTyInterpAttr: OS << "InterpAttr"; break;
712     case ImmTyAttrChan: OS << "AttrChan"; break;
713     case ImmTyOpSel: OS << "OpSel"; break;
714     case ImmTyOpSelHi: OS << "OpSelHi"; break;
715     case ImmTyNegLo: OS << "NegLo"; break;
716     case ImmTyNegHi: OS << "NegHi"; break;
717     case ImmTySwizzle: OS << "Swizzle"; break;
718     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
719     case ImmTyHigh: OS << "High"; break;
720     case ImmTyEndpgm:
721       OS << "Endpgm";
722       break;
723     }
724   }
725 
726   void print(raw_ostream &OS) const override {
727     switch (Kind) {
728     case Register:
729       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
730       break;
731     case Immediate:
732       OS << '<' << getImm();
733       if (getImmTy() != ImmTyNone) {
734         OS << " type: "; printImmTy(OS, getImmTy());
735       }
736       OS << " mods: " << Imm.Mods << '>';
737       break;
738     case Token:
739       OS << '\'' << getToken() << '\'';
740       break;
741     case Expression:
742       OS << "<expr " << *Expr << '>';
743       break;
744     }
745   }
746 
747   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
748                                       int64_t Val, SMLoc Loc,
749                                       ImmTy Type = ImmTyNone,
750                                       bool IsFPImm = false) {
751     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
752     Op->Imm.Val = Val;
753     Op->Imm.IsFPImm = IsFPImm;
754     Op->Imm.Type = Type;
755     Op->Imm.Mods = Modifiers();
756     Op->StartLoc = Loc;
757     Op->EndLoc = Loc;
758     return Op;
759   }
760 
761   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
762                                         StringRef Str, SMLoc Loc,
763                                         bool HasExplicitEncodingSize = true) {
764     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
765     Res->Tok.Data = Str.data();
766     Res->Tok.Length = Str.size();
767     Res->StartLoc = Loc;
768     Res->EndLoc = Loc;
769     return Res;
770   }
771 
772   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
773                                       unsigned RegNo, SMLoc S,
774                                       SMLoc E) {
775     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
776     Op->Reg.RegNo = RegNo;
777     Op->Reg.Mods = Modifiers();
778     Op->StartLoc = S;
779     Op->EndLoc = E;
780     return Op;
781   }
782 
783   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
784                                        const class MCExpr *Expr, SMLoc S) {
785     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
786     Op->Expr = Expr;
787     Op->StartLoc = S;
788     Op->EndLoc = S;
789     return Op;
790   }
791 };
792 
793 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
794   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
795   return OS;
796 }
797 
798 //===----------------------------------------------------------------------===//
799 // AsmParser
800 //===----------------------------------------------------------------------===//
801 
802 // Holds info related to the current kernel, e.g. count of SGPRs used.
803 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
804 // .amdgpu_hsa_kernel or at EOF.
805 class KernelScopeInfo {
806   int SgprIndexUnusedMin = -1;
807   int VgprIndexUnusedMin = -1;
808   MCContext *Ctx = nullptr;
809 
810   void usesSgprAt(int i) {
811     if (i >= SgprIndexUnusedMin) {
812       SgprIndexUnusedMin = ++i;
813       if (Ctx) {
814         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
815         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
816       }
817     }
818   }
819 
820   void usesVgprAt(int i) {
821     if (i >= VgprIndexUnusedMin) {
822       VgprIndexUnusedMin = ++i;
823       if (Ctx) {
824         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
825         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
826       }
827     }
828   }
829 
830 public:
831   KernelScopeInfo() = default;
832 
833   void initialize(MCContext &Context) {
834     Ctx = &Context;
835     usesSgprAt(SgprIndexUnusedMin = -1);
836     usesVgprAt(VgprIndexUnusedMin = -1);
837   }
838 
839   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
840     switch (RegKind) {
841       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
842       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
843       default: break;
844     }
845   }
846 };
847 
848 class AMDGPUAsmParser : public MCTargetAsmParser {
849   MCAsmParser &Parser;
850 
851   // Number of extra operands parsed after the first optional operand.
852   // This may be necessary to skip hardcoded mandatory operands.
853   static const unsigned MAX_OPR_LOOKAHEAD = 8;
854 
855   unsigned ForcedEncodingSize = 0;
856   bool ForcedDPP = false;
857   bool ForcedSDWA = false;
858   KernelScopeInfo KernelScope;
859 
860   /// @name Auto-generated Match Functions
861   /// {
862 
863 #define GET_ASSEMBLER_HEADER
864 #include "AMDGPUGenAsmMatcher.inc"
865 
866   /// }
867 
868 private:
869   bool ParseAsAbsoluteExpression(uint32_t &Ret);
870   bool OutOfRangeError(SMRange Range);
871   /// Calculate VGPR/SGPR blocks required for given target, reserved
872   /// registers, and user-specified NextFreeXGPR values.
873   ///
874   /// \param Features [in] Target features, used for bug corrections.
875   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
876   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
877   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
878   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
879   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
880   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
881   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
882   /// \param VGPRBlocks [out] Result VGPR block count.
883   /// \param SGPRBlocks [out] Result SGPR block count.
884   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
885                           bool FlatScrUsed, bool XNACKUsed,
886                           unsigned NextFreeVGPR, SMRange VGPRRange,
887                           unsigned NextFreeSGPR, SMRange SGPRRange,
888                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
889   bool ParseDirectiveAMDGCNTarget();
890   bool ParseDirectiveAMDHSAKernel();
891   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
892   bool ParseDirectiveHSACodeObjectVersion();
893   bool ParseDirectiveHSACodeObjectISA();
894   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
895   bool ParseDirectiveAMDKernelCodeT();
896   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
897   bool ParseDirectiveAMDGPUHsaKernel();
898 
899   bool ParseDirectiveISAVersion();
900   bool ParseDirectiveHSAMetadata();
901   bool ParseDirectivePALMetadataBegin();
902   bool ParseDirectivePALMetadata();
903 
904   /// Common code to parse out a block of text (typically YAML) between start and
905   /// end directives.
906   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
907                            const char *AssemblerDirectiveEnd,
908                            std::string &CollectString);
909 
910   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
911                              RegisterKind RegKind, unsigned Reg1,
912                              unsigned RegNum);
913   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
914                            unsigned& RegNum, unsigned& RegWidth,
915                            unsigned *DwordRegIndex);
916   bool isRegister();
917   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
918   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
919   void initializeGprCountSymbol(RegisterKind RegKind);
920   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
921                              unsigned RegWidth);
922   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
923                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
924   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
925                  bool IsGdsHardcoded);
926 
927 public:
928   enum AMDGPUMatchResultTy {
929     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
930   };
931   enum OperandMode {
932     OperandMode_Default,
933     OperandMode_NSA,
934   };
935 
936   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
937 
938   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
939                const MCInstrInfo &MII,
940                const MCTargetOptions &Options)
941       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
942     MCAsmParserExtension::Initialize(Parser);
943 
944     if (getFeatureBits().none()) {
945       // Set default features.
946       copySTI().ToggleFeature("southern-islands");
947     }
948 
949     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
950 
951     {
952       // TODO: make those pre-defined variables read-only.
953       // Currently there is none suitable machinery in the core llvm-mc for this.
954       // MCSymbol::isRedefinable is intended for another purpose, and
955       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
956       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
957       MCContext &Ctx = getContext();
958       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
959         MCSymbol *Sym =
960             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
961         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
962         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
963         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
964         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
965         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
966       } else {
967         MCSymbol *Sym =
968             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
969         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
970         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
971         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
972         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
973         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
974       }
975       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
976         initializeGprCountSymbol(IS_VGPR);
977         initializeGprCountSymbol(IS_SGPR);
978       } else
979         KernelScope.initialize(getContext());
980     }
981   }
982 
983   bool hasXNACK() const {
984     return AMDGPU::hasXNACK(getSTI());
985   }
986 
987   bool hasMIMG_R128() const {
988     return AMDGPU::hasMIMG_R128(getSTI());
989   }
990 
991   bool hasPackedD16() const {
992     return AMDGPU::hasPackedD16(getSTI());
993   }
994 
995   bool isSI() const {
996     return AMDGPU::isSI(getSTI());
997   }
998 
999   bool isCI() const {
1000     return AMDGPU::isCI(getSTI());
1001   }
1002 
1003   bool isVI() const {
1004     return AMDGPU::isVI(getSTI());
1005   }
1006 
1007   bool isGFX9() const {
1008     return AMDGPU::isGFX9(getSTI());
1009   }
1010 
1011   bool isGFX10() const {
1012     return AMDGPU::isGFX10(getSTI());
1013   }
1014 
1015   bool hasInv2PiInlineImm() const {
1016     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1017   }
1018 
1019   bool hasFlatOffsets() const {
1020     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1021   }
1022 
1023   bool hasSGPR102_SGPR103() const {
1024     return !isVI() && !isGFX9();
1025   }
1026 
1027   bool hasSGPR104_SGPR105() const {
1028     return isGFX10();
1029   }
1030 
1031   bool hasIntClamp() const {
1032     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1033   }
1034 
1035   AMDGPUTargetStreamer &getTargetStreamer() {
1036     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1037     return static_cast<AMDGPUTargetStreamer &>(TS);
1038   }
1039 
1040   const MCRegisterInfo *getMRI() const {
1041     // We need this const_cast because for some reason getContext() is not const
1042     // in MCAsmParser.
1043     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1044   }
1045 
1046   const MCInstrInfo *getMII() const {
1047     return &MII;
1048   }
1049 
1050   const FeatureBitset &getFeatureBits() const {
1051     return getSTI().getFeatureBits();
1052   }
1053 
1054   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1055   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1056   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1057 
1058   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1059   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1060   bool isForcedDPP() const { return ForcedDPP; }
1061   bool isForcedSDWA() const { return ForcedSDWA; }
1062   ArrayRef<unsigned> getMatchedVariants() const;
1063 
1064   std::unique_ptr<AMDGPUOperand> parseRegister();
1065   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1066   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1067   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1068                                       unsigned Kind) override;
1069   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1070                                OperandVector &Operands, MCStreamer &Out,
1071                                uint64_t &ErrorInfo,
1072                                bool MatchingInlineAsm) override;
1073   bool ParseDirective(AsmToken DirectiveID) override;
1074   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1075                                     OperandMode Mode = OperandMode_Default);
1076   StringRef parseMnemonicSuffix(StringRef Name);
1077   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1078                         SMLoc NameLoc, OperandVector &Operands) override;
1079   //bool ProcessInstruction(MCInst &Inst);
1080 
1081   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1082 
1083   OperandMatchResultTy
1084   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1085                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1086                      bool (*ConvertResult)(int64_t &) = nullptr);
1087 
1088   OperandMatchResultTy parseOperandArrayWithPrefix(
1089     const char *Prefix,
1090     OperandVector &Operands,
1091     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1092     bool (*ConvertResult)(int64_t&) = nullptr);
1093 
1094   OperandMatchResultTy
1095   parseNamedBit(const char *Name, OperandVector &Operands,
1096                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1097   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1098                                              StringRef &Value);
1099 
1100   bool parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier = false);
1101   bool parseSP3NegModifier();
1102   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1103   OperandMatchResultTy parseReg(OperandVector &Operands);
1104   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1105   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1106   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1107   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1108   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1109   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1110   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1111 
1112   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1113   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1114   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1115   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1116 
1117   bool parseCnt(int64_t &IntVal);
1118   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1119   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1120 
1121 private:
1122   struct OperandInfoTy {
1123     int64_t Id;
1124     bool IsSymbolic = false;
1125 
1126     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1127   };
1128 
1129   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1130   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1131 
1132   void errorExpTgt();
1133   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1134 
1135   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1136   bool validateSOPLiteral(const MCInst &Inst) const;
1137   bool validateConstantBusLimitations(const MCInst &Inst);
1138   bool validateEarlyClobberLimitations(const MCInst &Inst);
1139   bool validateIntClampSupported(const MCInst &Inst);
1140   bool validateMIMGAtomicDMask(const MCInst &Inst);
1141   bool validateMIMGGatherDMask(const MCInst &Inst);
1142   bool validateMIMGDataSize(const MCInst &Inst);
1143   bool validateMIMGAddrSize(const MCInst &Inst);
1144   bool validateMIMGD16(const MCInst &Inst);
1145   bool validateMIMGDim(const MCInst &Inst);
1146   bool validateLdsDirect(const MCInst &Inst);
1147   bool validateVOP3Literal(const MCInst &Inst) const;
1148   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1149   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1150   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1151 
1152   bool isId(const StringRef Id) const;
1153   bool isId(const AsmToken &Token, const StringRef Id) const;
1154   bool isToken(const AsmToken::TokenKind Kind) const;
1155   bool trySkipId(const StringRef Id);
1156   bool trySkipToken(const AsmToken::TokenKind Kind);
1157   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1158   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1159   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1160   AsmToken::TokenKind getTokenKind() const;
1161   bool parseExpr(int64_t &Imm);
1162   StringRef getTokenStr() const;
1163   AsmToken peekToken();
1164   AsmToken getToken() const;
1165   SMLoc getLoc() const;
1166   void lex();
1167 
1168 public:
1169   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1170   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1171 
1172   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1173   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1174   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1175   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1176   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1177 
1178   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1179                             const unsigned MinVal,
1180                             const unsigned MaxVal,
1181                             const StringRef ErrMsg);
1182   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1183   bool parseSwizzleOffset(int64_t &Imm);
1184   bool parseSwizzleMacro(int64_t &Imm);
1185   bool parseSwizzleQuadPerm(int64_t &Imm);
1186   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1187   bool parseSwizzleBroadcast(int64_t &Imm);
1188   bool parseSwizzleSwap(int64_t &Imm);
1189   bool parseSwizzleReverse(int64_t &Imm);
1190 
1191   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1192   int64_t parseGPRIdxMacro();
1193 
1194   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1195   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1196   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1197   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1198   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1199 
1200   AMDGPUOperand::Ptr defaultDLC() const;
1201   AMDGPUOperand::Ptr defaultGLC() const;
1202   AMDGPUOperand::Ptr defaultSLC() const;
1203 
1204   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1205   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1206   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1207   AMDGPUOperand::Ptr defaultOffsetU12() const;
1208   AMDGPUOperand::Ptr defaultOffsetS13() const;
1209 
1210   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1211 
1212   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1213                OptionalImmIndexMap &OptionalIdx);
1214   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1215   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1216   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1217 
1218   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1219 
1220   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1221                bool IsAtomic = false);
1222   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1223 
1224   OperandMatchResultTy parseDim(OperandVector &Operands);
1225   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1226   AMDGPUOperand::Ptr defaultRowMask() const;
1227   AMDGPUOperand::Ptr defaultBankMask() const;
1228   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1229   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1230 
1231   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1232                                     AMDGPUOperand::ImmTy Type);
1233   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1234   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1235   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1236   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1237   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1238   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1239                 uint64_t BasicInstType, bool skipVcc = false);
1240 
1241   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1242   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1243 };
1244 
1245 struct OptionalOperand {
1246   const char *Name;
1247   AMDGPUOperand::ImmTy Type;
1248   bool IsBit;
1249   bool (*ConvertResult)(int64_t&);
1250 };
1251 
1252 } // end anonymous namespace
1253 
1254 // May be called with integer type with equivalent bitwidth.
1255 static const fltSemantics *getFltSemantics(unsigned Size) {
1256   switch (Size) {
1257   case 4:
1258     return &APFloat::IEEEsingle();
1259   case 8:
1260     return &APFloat::IEEEdouble();
1261   case 2:
1262     return &APFloat::IEEEhalf();
1263   default:
1264     llvm_unreachable("unsupported fp type");
1265   }
1266 }
1267 
1268 static const fltSemantics *getFltSemantics(MVT VT) {
1269   return getFltSemantics(VT.getSizeInBits() / 8);
1270 }
1271 
1272 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1273   switch (OperandType) {
1274   case AMDGPU::OPERAND_REG_IMM_INT32:
1275   case AMDGPU::OPERAND_REG_IMM_FP32:
1276   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1277   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1278     return &APFloat::IEEEsingle();
1279   case AMDGPU::OPERAND_REG_IMM_INT64:
1280   case AMDGPU::OPERAND_REG_IMM_FP64:
1281   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1282   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1283     return &APFloat::IEEEdouble();
1284   case AMDGPU::OPERAND_REG_IMM_INT16:
1285   case AMDGPU::OPERAND_REG_IMM_FP16:
1286   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1287   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1288   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1289   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1290   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1291   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1292     return &APFloat::IEEEhalf();
1293   default:
1294     llvm_unreachable("unsupported fp type");
1295   }
1296 }
1297 
1298 //===----------------------------------------------------------------------===//
1299 // Operand
1300 //===----------------------------------------------------------------------===//
1301 
1302 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1303   bool Lost;
1304 
1305   // Convert literal to single precision
1306   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1307                                                APFloat::rmNearestTiesToEven,
1308                                                &Lost);
1309   // We allow precision lost but not overflow or underflow
1310   if (Status != APFloat::opOK &&
1311       Lost &&
1312       ((Status & APFloat::opOverflow)  != 0 ||
1313        (Status & APFloat::opUnderflow) != 0)) {
1314     return false;
1315   }
1316 
1317   return true;
1318 }
1319 
1320 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1321   return isUIntN(Size, Val) || isIntN(Size, Val);
1322 }
1323 
1324 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1325 
1326   // This is a hack to enable named inline values like
1327   // shared_base with both 32-bit and 64-bit operands.
1328   // Note that these values are defined as
1329   // 32-bit operands only.
1330   if (isInlineValue()) {
1331     return true;
1332   }
1333 
1334   if (!isImmTy(ImmTyNone)) {
1335     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1336     return false;
1337   }
1338   // TODO: We should avoid using host float here. It would be better to
1339   // check the float bit values which is what a few other places do.
1340   // We've had bot failures before due to weird NaN support on mips hosts.
1341 
1342   APInt Literal(64, Imm.Val);
1343 
1344   if (Imm.IsFPImm) { // We got fp literal token
1345     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1346       return AMDGPU::isInlinableLiteral64(Imm.Val,
1347                                           AsmParser->hasInv2PiInlineImm());
1348     }
1349 
1350     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1351     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1352       return false;
1353 
1354     if (type.getScalarSizeInBits() == 16) {
1355       return AMDGPU::isInlinableLiteral16(
1356         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1357         AsmParser->hasInv2PiInlineImm());
1358     }
1359 
1360     // Check if single precision literal is inlinable
1361     return AMDGPU::isInlinableLiteral32(
1362       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1363       AsmParser->hasInv2PiInlineImm());
1364   }
1365 
1366   // We got int literal token.
1367   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1368     return AMDGPU::isInlinableLiteral64(Imm.Val,
1369                                         AsmParser->hasInv2PiInlineImm());
1370   }
1371 
1372   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1373     return false;
1374   }
1375 
1376   if (type.getScalarSizeInBits() == 16) {
1377     return AMDGPU::isInlinableLiteral16(
1378       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1379       AsmParser->hasInv2PiInlineImm());
1380   }
1381 
1382   return AMDGPU::isInlinableLiteral32(
1383     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1384     AsmParser->hasInv2PiInlineImm());
1385 }
1386 
1387 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1388   // Check that this immediate can be added as literal
1389   if (!isImmTy(ImmTyNone)) {
1390     return false;
1391   }
1392 
1393   if (!Imm.IsFPImm) {
1394     // We got int literal token.
1395 
1396     if (type == MVT::f64 && hasFPModifiers()) {
1397       // Cannot apply fp modifiers to int literals preserving the same semantics
1398       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1399       // disable these cases.
1400       return false;
1401     }
1402 
1403     unsigned Size = type.getSizeInBits();
1404     if (Size == 64)
1405       Size = 32;
1406 
1407     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1408     // types.
1409     return isSafeTruncation(Imm.Val, Size);
1410   }
1411 
1412   // We got fp literal token
1413   if (type == MVT::f64) { // Expected 64-bit fp operand
1414     // We would set low 64-bits of literal to zeroes but we accept this literals
1415     return true;
1416   }
1417 
1418   if (type == MVT::i64) { // Expected 64-bit int operand
1419     // We don't allow fp literals in 64-bit integer instructions. It is
1420     // unclear how we should encode them.
1421     return false;
1422   }
1423 
1424   // We allow fp literals with f16x2 operands assuming that the specified
1425   // literal goes into the lower half and the upper half is zero. We also
1426   // require that the literal may be losslesly converted to f16.
1427   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1428                      (type == MVT::v2i16)? MVT::i16 : type;
1429 
1430   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1431   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1432 }
1433 
1434 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1435   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1436 }
1437 
1438 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1439   if (AsmParser->isVI())
1440     return isVReg32();
1441   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1442     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1443   else
1444     return false;
1445 }
1446 
1447 bool AMDGPUOperand::isSDWAFP16Operand() const {
1448   return isSDWAOperand(MVT::f16);
1449 }
1450 
1451 bool AMDGPUOperand::isSDWAFP32Operand() const {
1452   return isSDWAOperand(MVT::f32);
1453 }
1454 
1455 bool AMDGPUOperand::isSDWAInt16Operand() const {
1456   return isSDWAOperand(MVT::i16);
1457 }
1458 
1459 bool AMDGPUOperand::isSDWAInt32Operand() const {
1460   return isSDWAOperand(MVT::i32);
1461 }
1462 
1463 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1464 {
1465   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1466   assert(Size == 2 || Size == 4 || Size == 8);
1467 
1468   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1469 
1470   if (Imm.Mods.Abs) {
1471     Val &= ~FpSignMask;
1472   }
1473   if (Imm.Mods.Neg) {
1474     Val ^= FpSignMask;
1475   }
1476 
1477   return Val;
1478 }
1479 
1480 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1481   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1482                              Inst.getNumOperands())) {
1483     addLiteralImmOperand(Inst, Imm.Val,
1484                          ApplyModifiers &
1485                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1486   } else {
1487     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1488     Inst.addOperand(MCOperand::createImm(Imm.Val));
1489   }
1490 }
1491 
1492 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1493   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1494   auto OpNum = Inst.getNumOperands();
1495   // Check that this operand accepts literals
1496   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1497 
1498   if (ApplyModifiers) {
1499     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1500     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1501     Val = applyInputFPModifiers(Val, Size);
1502   }
1503 
1504   APInt Literal(64, Val);
1505   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1506 
1507   if (Imm.IsFPImm) { // We got fp literal token
1508     switch (OpTy) {
1509     case AMDGPU::OPERAND_REG_IMM_INT64:
1510     case AMDGPU::OPERAND_REG_IMM_FP64:
1511     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1512     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1513       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1514                                        AsmParser->hasInv2PiInlineImm())) {
1515         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1516         return;
1517       }
1518 
1519       // Non-inlineable
1520       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1521         // For fp operands we check if low 32 bits are zeros
1522         if (Literal.getLoBits(32) != 0) {
1523           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1524           "Can't encode literal as exact 64-bit floating-point operand. "
1525           "Low 32-bits will be set to zero");
1526         }
1527 
1528         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1529         return;
1530       }
1531 
1532       // We don't allow fp literals in 64-bit integer instructions. It is
1533       // unclear how we should encode them. This case should be checked earlier
1534       // in predicate methods (isLiteralImm())
1535       llvm_unreachable("fp literal in 64-bit integer instruction.");
1536 
1537     case AMDGPU::OPERAND_REG_IMM_INT32:
1538     case AMDGPU::OPERAND_REG_IMM_FP32:
1539     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1540     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1541     case AMDGPU::OPERAND_REG_IMM_INT16:
1542     case AMDGPU::OPERAND_REG_IMM_FP16:
1543     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1544     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1545     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1546     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1547     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1548     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1549       bool lost;
1550       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1551       // Convert literal to single precision
1552       FPLiteral.convert(*getOpFltSemantics(OpTy),
1553                         APFloat::rmNearestTiesToEven, &lost);
1554       // We allow precision lost but not overflow or underflow. This should be
1555       // checked earlier in isLiteralImm()
1556 
1557       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1558       Inst.addOperand(MCOperand::createImm(ImmVal));
1559       return;
1560     }
1561     default:
1562       llvm_unreachable("invalid operand size");
1563     }
1564 
1565     return;
1566   }
1567 
1568   // We got int literal token.
1569   // Only sign extend inline immediates.
1570   switch (OpTy) {
1571   case AMDGPU::OPERAND_REG_IMM_INT32:
1572   case AMDGPU::OPERAND_REG_IMM_FP32:
1573   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1574   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1575   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1576   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1577     if (isSafeTruncation(Val, 32) &&
1578         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1579                                      AsmParser->hasInv2PiInlineImm())) {
1580       Inst.addOperand(MCOperand::createImm(Val));
1581       return;
1582     }
1583 
1584     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1585     return;
1586 
1587   case AMDGPU::OPERAND_REG_IMM_INT64:
1588   case AMDGPU::OPERAND_REG_IMM_FP64:
1589   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1590   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1591     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1592       Inst.addOperand(MCOperand::createImm(Val));
1593       return;
1594     }
1595 
1596     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1597     return;
1598 
1599   case AMDGPU::OPERAND_REG_IMM_INT16:
1600   case AMDGPU::OPERAND_REG_IMM_FP16:
1601   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1602   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1603     if (isSafeTruncation(Val, 16) &&
1604         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1605                                      AsmParser->hasInv2PiInlineImm())) {
1606       Inst.addOperand(MCOperand::createImm(Val));
1607       return;
1608     }
1609 
1610     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1611     return;
1612 
1613   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1614   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1615     assert(isSafeTruncation(Val, 16));
1616     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1617                                         AsmParser->hasInv2PiInlineImm()));
1618 
1619     Inst.addOperand(MCOperand::createImm(Val));
1620     return;
1621   }
1622   default:
1623     llvm_unreachable("invalid operand size");
1624   }
1625 }
1626 
1627 template <unsigned Bitwidth>
1628 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1629   APInt Literal(64, Imm.Val);
1630 
1631   if (!Imm.IsFPImm) {
1632     // We got int literal token.
1633     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1634     return;
1635   }
1636 
1637   bool Lost;
1638   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1639   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1640                     APFloat::rmNearestTiesToEven, &Lost);
1641   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1642 }
1643 
1644 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1645   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1646 }
1647 
1648 static bool isInlineValue(unsigned Reg) {
1649   switch (Reg) {
1650   case AMDGPU::SRC_SHARED_BASE:
1651   case AMDGPU::SRC_SHARED_LIMIT:
1652   case AMDGPU::SRC_PRIVATE_BASE:
1653   case AMDGPU::SRC_PRIVATE_LIMIT:
1654   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1655     return true;
1656   default:
1657     return false;
1658   }
1659 }
1660 
1661 bool AMDGPUOperand::isInlineValue() const {
1662   return isRegKind() && ::isInlineValue(getReg());
1663 }
1664 
1665 //===----------------------------------------------------------------------===//
1666 // AsmParser
1667 //===----------------------------------------------------------------------===//
1668 
1669 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1670   if (Is == IS_VGPR) {
1671     switch (RegWidth) {
1672       default: return -1;
1673       case 1: return AMDGPU::VGPR_32RegClassID;
1674       case 2: return AMDGPU::VReg_64RegClassID;
1675       case 3: return AMDGPU::VReg_96RegClassID;
1676       case 4: return AMDGPU::VReg_128RegClassID;
1677       case 8: return AMDGPU::VReg_256RegClassID;
1678       case 16: return AMDGPU::VReg_512RegClassID;
1679     }
1680   } else if (Is == IS_TTMP) {
1681     switch (RegWidth) {
1682       default: return -1;
1683       case 1: return AMDGPU::TTMP_32RegClassID;
1684       case 2: return AMDGPU::TTMP_64RegClassID;
1685       case 4: return AMDGPU::TTMP_128RegClassID;
1686       case 8: return AMDGPU::TTMP_256RegClassID;
1687       case 16: return AMDGPU::TTMP_512RegClassID;
1688     }
1689   } else if (Is == IS_SGPR) {
1690     switch (RegWidth) {
1691       default: return -1;
1692       case 1: return AMDGPU::SGPR_32RegClassID;
1693       case 2: return AMDGPU::SGPR_64RegClassID;
1694       case 4: return AMDGPU::SGPR_128RegClassID;
1695       case 8: return AMDGPU::SGPR_256RegClassID;
1696       case 16: return AMDGPU::SGPR_512RegClassID;
1697     }
1698   }
1699   return -1;
1700 }
1701 
1702 static unsigned getSpecialRegForName(StringRef RegName) {
1703   return StringSwitch<unsigned>(RegName)
1704     .Case("exec", AMDGPU::EXEC)
1705     .Case("vcc", AMDGPU::VCC)
1706     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1707     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1708     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1709     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1710     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1711     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1712     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1713     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1714     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1715     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1716     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1717     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1718     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1719     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1720     .Case("m0", AMDGPU::M0)
1721     .Case("scc", AMDGPU::SCC)
1722     .Case("tba", AMDGPU::TBA)
1723     .Case("tma", AMDGPU::TMA)
1724     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1725     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1726     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1727     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1728     .Case("vcc_lo", AMDGPU::VCC_LO)
1729     .Case("vcc_hi", AMDGPU::VCC_HI)
1730     .Case("exec_lo", AMDGPU::EXEC_LO)
1731     .Case("exec_hi", AMDGPU::EXEC_HI)
1732     .Case("tma_lo", AMDGPU::TMA_LO)
1733     .Case("tma_hi", AMDGPU::TMA_HI)
1734     .Case("tba_lo", AMDGPU::TBA_LO)
1735     .Case("tba_hi", AMDGPU::TBA_HI)
1736     .Case("null", AMDGPU::SGPR_NULL)
1737     .Default(0);
1738 }
1739 
1740 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1741                                     SMLoc &EndLoc) {
1742   auto R = parseRegister();
1743   if (!R) return true;
1744   assert(R->isReg());
1745   RegNo = R->getReg();
1746   StartLoc = R->getStartLoc();
1747   EndLoc = R->getEndLoc();
1748   return false;
1749 }
1750 
1751 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1752                                             RegisterKind RegKind, unsigned Reg1,
1753                                             unsigned RegNum) {
1754   switch (RegKind) {
1755   case IS_SPECIAL:
1756     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1757       Reg = AMDGPU::EXEC;
1758       RegWidth = 2;
1759       return true;
1760     }
1761     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1762       Reg = AMDGPU::FLAT_SCR;
1763       RegWidth = 2;
1764       return true;
1765     }
1766     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1767       Reg = AMDGPU::XNACK_MASK;
1768       RegWidth = 2;
1769       return true;
1770     }
1771     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1772       Reg = AMDGPU::VCC;
1773       RegWidth = 2;
1774       return true;
1775     }
1776     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1777       Reg = AMDGPU::TBA;
1778       RegWidth = 2;
1779       return true;
1780     }
1781     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1782       Reg = AMDGPU::TMA;
1783       RegWidth = 2;
1784       return true;
1785     }
1786     return false;
1787   case IS_VGPR:
1788   case IS_SGPR:
1789   case IS_TTMP:
1790     if (Reg1 != Reg + RegWidth) {
1791       return false;
1792     }
1793     RegWidth++;
1794     return true;
1795   default:
1796     llvm_unreachable("unexpected register kind");
1797   }
1798 }
1799 
1800 static const StringRef Registers[] = {
1801   { "v" },
1802   { "s" },
1803   { "ttmp" },
1804 };
1805 
1806 bool
1807 AMDGPUAsmParser::isRegister(const AsmToken &Token,
1808                             const AsmToken &NextToken) const {
1809 
1810   // A list of consecutive registers: [s0,s1,s2,s3]
1811   if (Token.is(AsmToken::LBrac))
1812     return true;
1813 
1814   if (!Token.is(AsmToken::Identifier))
1815     return false;
1816 
1817   // A single register like s0 or a range of registers like s[0:1]
1818 
1819   StringRef RegName = Token.getString();
1820 
1821   for (StringRef Reg : Registers) {
1822     if (RegName.startswith(Reg)) {
1823       if (Reg.size() < RegName.size()) {
1824         unsigned RegNum;
1825         // A single register with an index: rXX
1826         if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
1827           return true;
1828       } else {
1829         // A range of registers: r[XX:YY].
1830         if (NextToken.is(AsmToken::LBrac))
1831           return true;
1832       }
1833     }
1834   }
1835 
1836   return getSpecialRegForName(RegName);
1837 }
1838 
1839 bool
1840 AMDGPUAsmParser::isRegister()
1841 {
1842   return isRegister(getToken(), peekToken());
1843 }
1844 
1845 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1846                                           unsigned &RegNum, unsigned &RegWidth,
1847                                           unsigned *DwordRegIndex) {
1848   if (DwordRegIndex) { *DwordRegIndex = 0; }
1849   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1850   if (getLexer().is(AsmToken::Identifier)) {
1851     StringRef RegName = Parser.getTok().getString();
1852     if ((Reg = getSpecialRegForName(RegName))) {
1853       Parser.Lex();
1854       RegKind = IS_SPECIAL;
1855     } else {
1856       unsigned RegNumIndex = 0;
1857       if (RegName[0] == 'v') {
1858         RegNumIndex = 1;
1859         RegKind = IS_VGPR;
1860       } else if (RegName[0] == 's') {
1861         RegNumIndex = 1;
1862         RegKind = IS_SGPR;
1863       } else if (RegName.startswith("ttmp")) {
1864         RegNumIndex = strlen("ttmp");
1865         RegKind = IS_TTMP;
1866       } else {
1867         return false;
1868       }
1869       if (RegName.size() > RegNumIndex) {
1870         // Single 32-bit register: vXX.
1871         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1872           return false;
1873         Parser.Lex();
1874         RegWidth = 1;
1875       } else {
1876         // Range of registers: v[XX:YY]. ":YY" is optional.
1877         Parser.Lex();
1878         int64_t RegLo, RegHi;
1879         if (getLexer().isNot(AsmToken::LBrac))
1880           return false;
1881         Parser.Lex();
1882 
1883         if (getParser().parseAbsoluteExpression(RegLo))
1884           return false;
1885 
1886         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1887         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1888           return false;
1889         Parser.Lex();
1890 
1891         if (isRBrace) {
1892           RegHi = RegLo;
1893         } else {
1894           if (getParser().parseAbsoluteExpression(RegHi))
1895             return false;
1896 
1897           if (getLexer().isNot(AsmToken::RBrac))
1898             return false;
1899           Parser.Lex();
1900         }
1901         RegNum = (unsigned) RegLo;
1902         RegWidth = (RegHi - RegLo) + 1;
1903       }
1904     }
1905   } else if (getLexer().is(AsmToken::LBrac)) {
1906     // List of consecutive registers: [s0,s1,s2,s3]
1907     Parser.Lex();
1908     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1909       return false;
1910     if (RegWidth != 1)
1911       return false;
1912     RegisterKind RegKind1;
1913     unsigned Reg1, RegNum1, RegWidth1;
1914     do {
1915       if (getLexer().is(AsmToken::Comma)) {
1916         Parser.Lex();
1917       } else if (getLexer().is(AsmToken::RBrac)) {
1918         Parser.Lex();
1919         break;
1920       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1921         if (RegWidth1 != 1) {
1922           return false;
1923         }
1924         if (RegKind1 != RegKind) {
1925           return false;
1926         }
1927         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1928           return false;
1929         }
1930       } else {
1931         return false;
1932       }
1933     } while (true);
1934   } else {
1935     return false;
1936   }
1937   switch (RegKind) {
1938   case IS_SPECIAL:
1939     RegNum = 0;
1940     RegWidth = 1;
1941     break;
1942   case IS_VGPR:
1943   case IS_SGPR:
1944   case IS_TTMP:
1945   {
1946     unsigned Size = 1;
1947     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1948       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1949       Size = std::min(RegWidth, 4u);
1950     }
1951     if (RegNum % Size != 0)
1952       return false;
1953     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1954     RegNum = RegNum / Size;
1955     int RCID = getRegClass(RegKind, RegWidth);
1956     if (RCID == -1)
1957       return false;
1958     const MCRegisterClass RC = TRI->getRegClass(RCID);
1959     if (RegNum >= RC.getNumRegs())
1960       return false;
1961     Reg = RC.getRegister(RegNum);
1962     break;
1963   }
1964 
1965   default:
1966     llvm_unreachable("unexpected register kind");
1967   }
1968 
1969   if (!subtargetHasRegister(*TRI, Reg))
1970     return false;
1971   return true;
1972 }
1973 
1974 Optional<StringRef>
1975 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
1976   switch (RegKind) {
1977   case IS_VGPR:
1978     return StringRef(".amdgcn.next_free_vgpr");
1979   case IS_SGPR:
1980     return StringRef(".amdgcn.next_free_sgpr");
1981   default:
1982     return None;
1983   }
1984 }
1985 
1986 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
1987   auto SymbolName = getGprCountSymbolName(RegKind);
1988   assert(SymbolName && "initializing invalid register kind");
1989   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1990   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
1991 }
1992 
1993 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
1994                                             unsigned DwordRegIndex,
1995                                             unsigned RegWidth) {
1996   // Symbols are only defined for GCN targets
1997   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
1998     return true;
1999 
2000   auto SymbolName = getGprCountSymbolName(RegKind);
2001   if (!SymbolName)
2002     return true;
2003   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2004 
2005   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2006   int64_t OldCount;
2007 
2008   if (!Sym->isVariable())
2009     return !Error(getParser().getTok().getLoc(),
2010                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2011   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2012     return !Error(
2013         getParser().getTok().getLoc(),
2014         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2015 
2016   if (OldCount <= NewMax)
2017     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2018 
2019   return true;
2020 }
2021 
2022 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2023   const auto &Tok = Parser.getTok();
2024   SMLoc StartLoc = Tok.getLoc();
2025   SMLoc EndLoc = Tok.getEndLoc();
2026   RegisterKind RegKind;
2027   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2028 
2029   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2030     //FIXME: improve error messages (bug 41303).
2031     Error(StartLoc, "not a valid operand.");
2032     return nullptr;
2033   }
2034   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2035     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2036       return nullptr;
2037   } else
2038     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2039   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2040 }
2041 
2042 bool
2043 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier) {
2044   if (HasSP3AbsModifier) {
2045     // This is a workaround for handling expressions
2046     // as arguments of SP3 'abs' modifier, for example:
2047     //     |1.0|
2048     //     |-1|
2049     //     |1+x|
2050     // This syntax is not compatible with syntax of standard
2051     // MC expressions (due to the trailing '|').
2052 
2053     SMLoc EndLoc;
2054     const MCExpr *Expr;
2055     SMLoc StartLoc = getLoc();
2056 
2057     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
2058       return true;
2059     }
2060 
2061     if (!Expr->evaluateAsAbsolute(Val))
2062       return Error(StartLoc, "expected absolute expression");
2063 
2064     return false;
2065   }
2066 
2067   return getParser().parseAbsoluteExpression(Val);
2068 }
2069 
2070 OperandMatchResultTy
2071 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2072   // TODO: add syntactic sugar for 1/(2*PI)
2073 
2074   const auto& Tok = getToken();
2075   const auto& NextTok = peekToken();
2076   bool IsReal = Tok.is(AsmToken::Real);
2077   SMLoc S = Tok.getLoc();
2078   bool Negate = false;
2079 
2080   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2081     lex();
2082     IsReal = true;
2083     Negate = true;
2084   }
2085 
2086   if (IsReal) {
2087     // Floating-point expressions are not supported.
2088     // Can only allow floating-point literals with an
2089     // optional sign.
2090 
2091     StringRef Num = getTokenStr();
2092     lex();
2093 
2094     APFloat RealVal(APFloat::IEEEdouble());
2095     auto roundMode = APFloat::rmNearestTiesToEven;
2096     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2097       return MatchOperand_ParseFail;
2098     }
2099     if (Negate)
2100       RealVal.changeSign();
2101 
2102     Operands.push_back(
2103       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2104                                AMDGPUOperand::ImmTyNone, true));
2105 
2106     return MatchOperand_Success;
2107 
2108     // FIXME: Should enable arbitrary expressions here
2109   } else if (Tok.is(AsmToken::Integer) ||
2110              (Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Integer))){
2111 
2112     int64_t IntVal;
2113     if (parseAbsoluteExpr(IntVal, HasSP3AbsModifier))
2114       return MatchOperand_ParseFail;
2115 
2116     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2117     return MatchOperand_Success;
2118   }
2119 
2120   return MatchOperand_NoMatch;
2121 }
2122 
2123 OperandMatchResultTy
2124 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2125   if (!isRegister())
2126     return MatchOperand_NoMatch;
2127 
2128   if (auto R = parseRegister()) {
2129     assert(R->isReg());
2130     Operands.push_back(std::move(R));
2131     return MatchOperand_Success;
2132   }
2133   return MatchOperand_ParseFail;
2134 }
2135 
2136 OperandMatchResultTy
2137 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2138   auto res = parseReg(Operands);
2139   return (res == MatchOperand_NoMatch)?
2140          parseImm(Operands, HasSP3AbsMod) :
2141          res;
2142 }
2143 
2144 // Check if the current token is an SP3 'neg' modifier.
2145 // Currently this modifier is allowed in the following context:
2146 //
2147 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2148 // 2. Before an 'abs' modifier: -abs(...)
2149 // 3. Before an SP3 'abs' modifier: -|...|
2150 //
2151 // In all other cases "-" is handled as a part
2152 // of an expression that follows the sign.
2153 //
2154 // Note: When "-" is followed by an integer literal,
2155 // this is interpreted as integer negation rather
2156 // than a floating-point NEG modifier applied to N.
2157 // Beside being contr-intuitive, such use of floating-point
2158 // NEG modifier would have resulted in different meaning
2159 // of integer literals used with VOP1/2/C and VOP3,
2160 // for example:
2161 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2162 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2163 // Negative fp literals with preceding "-" are
2164 // handled likewise for unifomtity
2165 //
2166 bool
2167 AMDGPUAsmParser::parseSP3NegModifier() {
2168 
2169   AsmToken NextToken[2];
2170   peekTokens(NextToken);
2171 
2172   if (isToken(AsmToken::Minus) &&
2173       (isRegister(NextToken[0], NextToken[1]) ||
2174        NextToken[0].is(AsmToken::Pipe) ||
2175        isId(NextToken[0], "abs"))) {
2176     lex();
2177     return true;
2178   }
2179 
2180   return false;
2181 }
2182 
2183 OperandMatchResultTy
2184 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2185                                               bool AllowImm) {
2186   bool Neg, SP3Neg;
2187   bool Abs, SP3Abs;
2188   SMLoc Loc;
2189 
2190   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2191   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2192     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2193     return MatchOperand_ParseFail;
2194   }
2195 
2196   SP3Neg = parseSP3NegModifier();
2197 
2198   Loc = getLoc();
2199   Neg = trySkipId("neg");
2200   if (Neg && SP3Neg) {
2201     Error(Loc, "expected register or immediate");
2202     return MatchOperand_ParseFail;
2203   }
2204   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2205     return MatchOperand_ParseFail;
2206 
2207   Abs = trySkipId("abs");
2208   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2209     return MatchOperand_ParseFail;
2210 
2211   Loc = getLoc();
2212   SP3Abs = trySkipToken(AsmToken::Pipe);
2213   if (Abs && SP3Abs) {
2214     Error(Loc, "expected register or immediate");
2215     return MatchOperand_ParseFail;
2216   }
2217 
2218   OperandMatchResultTy Res;
2219   if (AllowImm) {
2220     Res = parseRegOrImm(Operands, SP3Abs);
2221   } else {
2222     Res = parseReg(Operands);
2223   }
2224   if (Res != MatchOperand_Success) {
2225     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2226   }
2227 
2228   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2229     return MatchOperand_ParseFail;
2230   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2231     return MatchOperand_ParseFail;
2232   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2233     return MatchOperand_ParseFail;
2234 
2235   AMDGPUOperand::Modifiers Mods;
2236   Mods.Abs = Abs || SP3Abs;
2237   Mods.Neg = Neg || SP3Neg;
2238 
2239   if (Mods.hasFPModifiers()) {
2240     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2241     Op.setModifiers(Mods);
2242   }
2243   return MatchOperand_Success;
2244 }
2245 
2246 OperandMatchResultTy
2247 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2248                                                bool AllowImm) {
2249   bool Sext = trySkipId("sext");
2250   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2251     return MatchOperand_ParseFail;
2252 
2253   OperandMatchResultTy Res;
2254   if (AllowImm) {
2255     Res = parseRegOrImm(Operands);
2256   } else {
2257     Res = parseReg(Operands);
2258   }
2259   if (Res != MatchOperand_Success) {
2260     return Sext? MatchOperand_ParseFail : Res;
2261   }
2262 
2263   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2264     return MatchOperand_ParseFail;
2265 
2266   AMDGPUOperand::Modifiers Mods;
2267   Mods.Sext = Sext;
2268 
2269   if (Mods.hasIntModifiers()) {
2270     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2271     Op.setModifiers(Mods);
2272   }
2273 
2274   return MatchOperand_Success;
2275 }
2276 
2277 OperandMatchResultTy
2278 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2279   return parseRegOrImmWithFPInputMods(Operands, false);
2280 }
2281 
2282 OperandMatchResultTy
2283 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2284   return parseRegOrImmWithIntInputMods(Operands, false);
2285 }
2286 
2287 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2288   auto Loc = getLoc();
2289   if (trySkipId("off")) {
2290     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2291                                                 AMDGPUOperand::ImmTyOff, false));
2292     return MatchOperand_Success;
2293   }
2294 
2295   if (!isRegister())
2296     return MatchOperand_NoMatch;
2297 
2298   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2299   if (Reg) {
2300     Operands.push_back(std::move(Reg));
2301     return MatchOperand_Success;
2302   }
2303 
2304   return MatchOperand_ParseFail;
2305 
2306 }
2307 
2308 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2309   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2310 
2311   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2312       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2313       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2314       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2315     return Match_InvalidOperand;
2316 
2317   if ((TSFlags & SIInstrFlags::VOP3) &&
2318       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2319       getForcedEncodingSize() != 64)
2320     return Match_PreferE32;
2321 
2322   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2323       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2324     // v_mac_f32/16 allow only dst_sel == DWORD;
2325     auto OpNum =
2326         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2327     const auto &Op = Inst.getOperand(OpNum);
2328     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2329       return Match_InvalidOperand;
2330     }
2331   }
2332 
2333   if (TSFlags & SIInstrFlags::FLAT) {
2334     // FIXME: Produces error without correct column reported.
2335     auto Opcode = Inst.getOpcode();
2336     auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
2337 
2338     const auto &Op = Inst.getOperand(OpNum);
2339     if (!hasFlatOffsets() && Op.getImm() != 0)
2340       return Match_InvalidOperand;
2341 
2342     // GFX10: Address offset is 12-bit signed byte offset. Must be positive for
2343     // FLAT segment. For FLAT segment MSB is ignored and forced to zero.
2344     if (isGFX10()) {
2345       if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
2346         if (!isInt<12>(Op.getImm()))
2347           return Match_InvalidOperand;
2348       } else {
2349         if (!isUInt<11>(Op.getImm()))
2350           return Match_InvalidOperand;
2351       }
2352     }
2353   }
2354 
2355   return Match_Success;
2356 }
2357 
2358 // What asm variants we should check
2359 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2360   if (getForcedEncodingSize() == 32) {
2361     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2362     return makeArrayRef(Variants);
2363   }
2364 
2365   if (isForcedVOP3()) {
2366     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2367     return makeArrayRef(Variants);
2368   }
2369 
2370   if (isForcedSDWA()) {
2371     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2372                                         AMDGPUAsmVariants::SDWA9};
2373     return makeArrayRef(Variants);
2374   }
2375 
2376   if (isForcedDPP()) {
2377     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2378     return makeArrayRef(Variants);
2379   }
2380 
2381   static const unsigned Variants[] = {
2382     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2383     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2384   };
2385 
2386   return makeArrayRef(Variants);
2387 }
2388 
2389 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2390   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2391   const unsigned Num = Desc.getNumImplicitUses();
2392   for (unsigned i = 0; i < Num; ++i) {
2393     unsigned Reg = Desc.ImplicitUses[i];
2394     switch (Reg) {
2395     case AMDGPU::FLAT_SCR:
2396     case AMDGPU::VCC:
2397     case AMDGPU::VCC_LO:
2398     case AMDGPU::VCC_HI:
2399     case AMDGPU::M0:
2400     case AMDGPU::SGPR_NULL:
2401       return Reg;
2402     default:
2403       break;
2404     }
2405   }
2406   return AMDGPU::NoRegister;
2407 }
2408 
2409 // NB: This code is correct only when used to check constant
2410 // bus limitations because GFX7 support no f16 inline constants.
2411 // Note that there are no cases when a GFX7 opcode violates
2412 // constant bus limitations due to the use of an f16 constant.
2413 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2414                                        unsigned OpIdx) const {
2415   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2416 
2417   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2418     return false;
2419   }
2420 
2421   const MCOperand &MO = Inst.getOperand(OpIdx);
2422 
2423   int64_t Val = MO.getImm();
2424   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2425 
2426   switch (OpSize) { // expected operand size
2427   case 8:
2428     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2429   case 4:
2430     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2431   case 2: {
2432     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2433     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2434         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2435         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2436         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2437       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2438     } else {
2439       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2440     }
2441   }
2442   default:
2443     llvm_unreachable("invalid operand size");
2444   }
2445 }
2446 
2447 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2448   const MCOperand &MO = Inst.getOperand(OpIdx);
2449   if (MO.isImm()) {
2450     return !isInlineConstant(Inst, OpIdx);
2451   }
2452   return !MO.isReg() ||
2453          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2454 }
2455 
2456 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2457   const unsigned Opcode = Inst.getOpcode();
2458   const MCInstrDesc &Desc = MII.get(Opcode);
2459   unsigned ConstantBusUseCount = 0;
2460   unsigned NumLiterals = 0;
2461   unsigned LiteralSize;
2462 
2463   if (Desc.TSFlags &
2464       (SIInstrFlags::VOPC |
2465        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2466        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2467        SIInstrFlags::SDWA)) {
2468     // Check special imm operands (used by madmk, etc)
2469     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2470       ++ConstantBusUseCount;
2471     }
2472 
2473     SmallDenseSet<unsigned> SGPRsUsed;
2474     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2475     if (SGPRUsed != AMDGPU::NoRegister) {
2476       SGPRsUsed.insert(SGPRUsed);
2477       ++ConstantBusUseCount;
2478     }
2479 
2480     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2481     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2482     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2483 
2484     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2485 
2486     for (int OpIdx : OpIndices) {
2487       if (OpIdx == -1) break;
2488 
2489       const MCOperand &MO = Inst.getOperand(OpIdx);
2490       if (usesConstantBus(Inst, OpIdx)) {
2491         if (MO.isReg()) {
2492           const unsigned Reg = mc2PseudoReg(MO.getReg());
2493           // Pairs of registers with a partial intersections like these
2494           //   s0, s[0:1]
2495           //   flat_scratch_lo, flat_scratch
2496           //   flat_scratch_lo, flat_scratch_hi
2497           // are theoretically valid but they are disabled anyway.
2498           // Note that this code mimics SIInstrInfo::verifyInstruction
2499           if (!SGPRsUsed.count(Reg)) {
2500             SGPRsUsed.insert(Reg);
2501             ++ConstantBusUseCount;
2502           }
2503           SGPRUsed = Reg;
2504         } else { // Expression or a literal
2505 
2506           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2507             continue; // special operand like VINTERP attr_chan
2508 
2509           // An instruction may use only one literal.
2510           // This has been validated on the previous step.
2511           // See validateVOP3Literal.
2512           // This literal may be used as more than one operand.
2513           // If all these operands are of the same size,
2514           // this literal counts as one scalar value.
2515           // Otherwise it counts as 2 scalar values.
2516           // See "GFX10 Shader Programming", section 3.6.2.3.
2517 
2518           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2519           if (Size < 4) Size = 4;
2520 
2521           if (NumLiterals == 0) {
2522             NumLiterals = 1;
2523             LiteralSize = Size;
2524           } else if (LiteralSize != Size) {
2525             NumLiterals = 2;
2526           }
2527         }
2528       }
2529     }
2530   }
2531   ConstantBusUseCount += NumLiterals;
2532 
2533   if (isGFX10())
2534     return ConstantBusUseCount <= 2;
2535 
2536   return ConstantBusUseCount <= 1;
2537 }
2538 
2539 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2540   const unsigned Opcode = Inst.getOpcode();
2541   const MCInstrDesc &Desc = MII.get(Opcode);
2542 
2543   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2544   if (DstIdx == -1 ||
2545       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2546     return true;
2547   }
2548 
2549   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2550 
2551   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2552   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2553   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2554 
2555   assert(DstIdx != -1);
2556   const MCOperand &Dst = Inst.getOperand(DstIdx);
2557   assert(Dst.isReg());
2558   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2559 
2560   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2561 
2562   for (int SrcIdx : SrcIndices) {
2563     if (SrcIdx == -1) break;
2564     const MCOperand &Src = Inst.getOperand(SrcIdx);
2565     if (Src.isReg()) {
2566       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2567       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2568         return false;
2569       }
2570     }
2571   }
2572 
2573   return true;
2574 }
2575 
2576 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2577 
2578   const unsigned Opc = Inst.getOpcode();
2579   const MCInstrDesc &Desc = MII.get(Opc);
2580 
2581   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2582     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2583     assert(ClampIdx != -1);
2584     return Inst.getOperand(ClampIdx).getImm() == 0;
2585   }
2586 
2587   return true;
2588 }
2589 
2590 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2591 
2592   const unsigned Opc = Inst.getOpcode();
2593   const MCInstrDesc &Desc = MII.get(Opc);
2594 
2595   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2596     return true;
2597 
2598   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2599   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2600   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2601 
2602   assert(VDataIdx != -1);
2603   assert(DMaskIdx != -1);
2604   assert(TFEIdx != -1);
2605 
2606   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2607   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2608   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2609   if (DMask == 0)
2610     DMask = 1;
2611 
2612   unsigned DataSize =
2613     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2614   if (hasPackedD16()) {
2615     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2616     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2617       DataSize = (DataSize + 1) / 2;
2618   }
2619 
2620   return (VDataSize / 4) == DataSize + TFESize;
2621 }
2622 
2623 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2624   const unsigned Opc = Inst.getOpcode();
2625   const MCInstrDesc &Desc = MII.get(Opc);
2626 
2627   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2628     return true;
2629 
2630   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2631   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2632       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2633   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2634   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2635   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2636 
2637   assert(VAddr0Idx != -1);
2638   assert(SrsrcIdx != -1);
2639   assert(DimIdx != -1);
2640   assert(SrsrcIdx > VAddr0Idx);
2641 
2642   unsigned Dim = Inst.getOperand(DimIdx).getImm();
2643   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2644   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2645   unsigned VAddrSize =
2646       IsNSA ? SrsrcIdx - VAddr0Idx
2647             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2648 
2649   unsigned AddrSize = BaseOpcode->NumExtraArgs +
2650                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2651                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2652                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2653   if (!IsNSA) {
2654     if (AddrSize > 8)
2655       AddrSize = 16;
2656     else if (AddrSize > 4)
2657       AddrSize = 8;
2658   }
2659 
2660   return VAddrSize == AddrSize;
2661 }
2662 
2663 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2664 
2665   const unsigned Opc = Inst.getOpcode();
2666   const MCInstrDesc &Desc = MII.get(Opc);
2667 
2668   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2669     return true;
2670   if (!Desc.mayLoad() || !Desc.mayStore())
2671     return true; // Not atomic
2672 
2673   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2674   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2675 
2676   // This is an incomplete check because image_atomic_cmpswap
2677   // may only use 0x3 and 0xf while other atomic operations
2678   // may use 0x1 and 0x3. However these limitations are
2679   // verified when we check that dmask matches dst size.
2680   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2681 }
2682 
2683 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2684 
2685   const unsigned Opc = Inst.getOpcode();
2686   const MCInstrDesc &Desc = MII.get(Opc);
2687 
2688   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2689     return true;
2690 
2691   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2692   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2693 
2694   // GATHER4 instructions use dmask in a different fashion compared to
2695   // other MIMG instructions. The only useful DMASK values are
2696   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2697   // (red,red,red,red) etc.) The ISA document doesn't mention
2698   // this.
2699   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2700 }
2701 
2702 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2703 
2704   const unsigned Opc = Inst.getOpcode();
2705   const MCInstrDesc &Desc = MII.get(Opc);
2706 
2707   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2708     return true;
2709 
2710   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2711   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2712     if (isCI() || isSI())
2713       return false;
2714   }
2715 
2716   return true;
2717 }
2718 
2719 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
2720   const unsigned Opc = Inst.getOpcode();
2721   const MCInstrDesc &Desc = MII.get(Opc);
2722 
2723   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2724     return true;
2725 
2726   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2727   if (DimIdx < 0)
2728     return true;
2729 
2730   long Imm = Inst.getOperand(DimIdx).getImm();
2731   if (Imm < 0 || Imm >= 8)
2732     return false;
2733 
2734   return true;
2735 }
2736 
2737 static bool IsRevOpcode(const unsigned Opcode)
2738 {
2739   switch (Opcode) {
2740   case AMDGPU::V_SUBREV_F32_e32:
2741   case AMDGPU::V_SUBREV_F32_e64:
2742   case AMDGPU::V_SUBREV_F32_e32_gfx10:
2743   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
2744   case AMDGPU::V_SUBREV_F32_e32_vi:
2745   case AMDGPU::V_SUBREV_F32_e64_gfx10:
2746   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
2747   case AMDGPU::V_SUBREV_F32_e64_vi:
2748 
2749   case AMDGPU::V_SUBREV_I32_e32:
2750   case AMDGPU::V_SUBREV_I32_e64:
2751   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
2752   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
2753 
2754   case AMDGPU::V_SUBBREV_U32_e32:
2755   case AMDGPU::V_SUBBREV_U32_e64:
2756   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
2757   case AMDGPU::V_SUBBREV_U32_e32_vi:
2758   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
2759   case AMDGPU::V_SUBBREV_U32_e64_vi:
2760 
2761   case AMDGPU::V_SUBREV_U32_e32:
2762   case AMDGPU::V_SUBREV_U32_e64:
2763   case AMDGPU::V_SUBREV_U32_e32_gfx9:
2764   case AMDGPU::V_SUBREV_U32_e32_vi:
2765   case AMDGPU::V_SUBREV_U32_e64_gfx9:
2766   case AMDGPU::V_SUBREV_U32_e64_vi:
2767 
2768   case AMDGPU::V_SUBREV_F16_e32:
2769   case AMDGPU::V_SUBREV_F16_e64:
2770   case AMDGPU::V_SUBREV_F16_e32_gfx10:
2771   case AMDGPU::V_SUBREV_F16_e32_vi:
2772   case AMDGPU::V_SUBREV_F16_e64_gfx10:
2773   case AMDGPU::V_SUBREV_F16_e64_vi:
2774 
2775   case AMDGPU::V_SUBREV_U16_e32:
2776   case AMDGPU::V_SUBREV_U16_e64:
2777   case AMDGPU::V_SUBREV_U16_e32_vi:
2778   case AMDGPU::V_SUBREV_U16_e64_vi:
2779 
2780   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
2781   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
2782   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
2783 
2784   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
2785   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
2786 
2787   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
2788   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
2789 
2790   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
2791   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
2792 
2793   case AMDGPU::V_LSHRREV_B32_e32:
2794   case AMDGPU::V_LSHRREV_B32_e64:
2795   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
2796   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
2797   case AMDGPU::V_LSHRREV_B32_e32_vi:
2798   case AMDGPU::V_LSHRREV_B32_e64_vi:
2799   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
2800   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
2801 
2802   case AMDGPU::V_ASHRREV_I32_e32:
2803   case AMDGPU::V_ASHRREV_I32_e64:
2804   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
2805   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
2806   case AMDGPU::V_ASHRREV_I32_e32_vi:
2807   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
2808   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
2809   case AMDGPU::V_ASHRREV_I32_e64_vi:
2810 
2811   case AMDGPU::V_LSHLREV_B32_e32:
2812   case AMDGPU::V_LSHLREV_B32_e64:
2813   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
2814   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
2815   case AMDGPU::V_LSHLREV_B32_e32_vi:
2816   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
2817   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
2818   case AMDGPU::V_LSHLREV_B32_e64_vi:
2819 
2820   case AMDGPU::V_LSHLREV_B16_e32:
2821   case AMDGPU::V_LSHLREV_B16_e64:
2822   case AMDGPU::V_LSHLREV_B16_e32_vi:
2823   case AMDGPU::V_LSHLREV_B16_e64_vi:
2824   case AMDGPU::V_LSHLREV_B16_gfx10:
2825 
2826   case AMDGPU::V_LSHRREV_B16_e32:
2827   case AMDGPU::V_LSHRREV_B16_e64:
2828   case AMDGPU::V_LSHRREV_B16_e32_vi:
2829   case AMDGPU::V_LSHRREV_B16_e64_vi:
2830   case AMDGPU::V_LSHRREV_B16_gfx10:
2831 
2832   case AMDGPU::V_ASHRREV_I16_e32:
2833   case AMDGPU::V_ASHRREV_I16_e64:
2834   case AMDGPU::V_ASHRREV_I16_e32_vi:
2835   case AMDGPU::V_ASHRREV_I16_e64_vi:
2836   case AMDGPU::V_ASHRREV_I16_gfx10:
2837 
2838   case AMDGPU::V_LSHLREV_B64:
2839   case AMDGPU::V_LSHLREV_B64_gfx10:
2840   case AMDGPU::V_LSHLREV_B64_vi:
2841 
2842   case AMDGPU::V_LSHRREV_B64:
2843   case AMDGPU::V_LSHRREV_B64_gfx10:
2844   case AMDGPU::V_LSHRREV_B64_vi:
2845 
2846   case AMDGPU::V_ASHRREV_I64:
2847   case AMDGPU::V_ASHRREV_I64_gfx10:
2848   case AMDGPU::V_ASHRREV_I64_vi:
2849 
2850   case AMDGPU::V_PK_LSHLREV_B16:
2851   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
2852   case AMDGPU::V_PK_LSHLREV_B16_vi:
2853 
2854   case AMDGPU::V_PK_LSHRREV_B16:
2855   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
2856   case AMDGPU::V_PK_LSHRREV_B16_vi:
2857   case AMDGPU::V_PK_ASHRREV_I16:
2858   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
2859   case AMDGPU::V_PK_ASHRREV_I16_vi:
2860     return true;
2861   default:
2862     return false;
2863   }
2864 }
2865 
2866 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
2867 
2868   using namespace SIInstrFlags;
2869   const unsigned Opcode = Inst.getOpcode();
2870   const MCInstrDesc &Desc = MII.get(Opcode);
2871 
2872   // lds_direct register is defined so that it can be used
2873   // with 9-bit operands only. Ignore encodings which do not accept these.
2874   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
2875     return true;
2876 
2877   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2878   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2879   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2880 
2881   const int SrcIndices[] = { Src1Idx, Src2Idx };
2882 
2883   // lds_direct cannot be specified as either src1 or src2.
2884   for (int SrcIdx : SrcIndices) {
2885     if (SrcIdx == -1) break;
2886     const MCOperand &Src = Inst.getOperand(SrcIdx);
2887     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
2888       return false;
2889     }
2890   }
2891 
2892   if (Src0Idx == -1)
2893     return true;
2894 
2895   const MCOperand &Src = Inst.getOperand(Src0Idx);
2896   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
2897     return true;
2898 
2899   // lds_direct is specified as src0. Check additional limitations.
2900   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
2901 }
2902 
2903 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
2904   unsigned Opcode = Inst.getOpcode();
2905   const MCInstrDesc &Desc = MII.get(Opcode);
2906   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
2907     return true;
2908 
2909   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2910   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2911 
2912   const int OpIndices[] = { Src0Idx, Src1Idx };
2913 
2914   unsigned NumLiterals = 0;
2915   uint32_t LiteralValue;
2916 
2917   for (int OpIdx : OpIndices) {
2918     if (OpIdx == -1) break;
2919 
2920     const MCOperand &MO = Inst.getOperand(OpIdx);
2921     if (MO.isImm() &&
2922         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
2923         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
2924         !isInlineConstant(Inst, OpIdx)) {
2925       uint32_t Value = static_cast<uint32_t>(MO.getImm());
2926       if (NumLiterals == 0 || LiteralValue != Value) {
2927         LiteralValue = Value;
2928         ++NumLiterals;
2929       }
2930     }
2931   }
2932 
2933   return NumLiterals <= 1;
2934 }
2935 
2936 // VOP3 literal is only allowed in GFX10+ and only one can be used
2937 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
2938   unsigned Opcode = Inst.getOpcode();
2939   const MCInstrDesc &Desc = MII.get(Opcode);
2940   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
2941     return true;
2942 
2943   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2944   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2945   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2946 
2947   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2948 
2949   unsigned NumLiterals = 0;
2950   uint32_t LiteralValue;
2951 
2952   for (int OpIdx : OpIndices) {
2953     if (OpIdx == -1) break;
2954 
2955     const MCOperand &MO = Inst.getOperand(OpIdx);
2956     if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
2957       continue;
2958 
2959     if (!isInlineConstant(Inst, OpIdx)) {
2960       uint32_t Value = static_cast<uint32_t>(MO.getImm());
2961       if (NumLiterals == 0 || LiteralValue != Value) {
2962         LiteralValue = Value;
2963         ++NumLiterals;
2964       }
2965     }
2966   }
2967 
2968   return !NumLiterals ||
2969          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
2970 }
2971 
2972 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2973                                           const SMLoc &IDLoc) {
2974   if (!validateLdsDirect(Inst)) {
2975     Error(IDLoc,
2976       "invalid use of lds_direct");
2977     return false;
2978   }
2979   if (!validateSOPLiteral(Inst)) {
2980     Error(IDLoc,
2981       "only one literal operand is allowed");
2982     return false;
2983   }
2984   if (!validateVOP3Literal(Inst)) {
2985     Error(IDLoc,
2986       "invalid literal operand");
2987     return false;
2988   }
2989   if (!validateConstantBusLimitations(Inst)) {
2990     Error(IDLoc,
2991       "invalid operand (violates constant bus restrictions)");
2992     return false;
2993   }
2994   if (!validateEarlyClobberLimitations(Inst)) {
2995     Error(IDLoc,
2996       "destination must be different than all sources");
2997     return false;
2998   }
2999   if (!validateIntClampSupported(Inst)) {
3000     Error(IDLoc,
3001       "integer clamping is not supported on this GPU");
3002     return false;
3003   }
3004   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3005   if (!validateMIMGD16(Inst)) {
3006     Error(IDLoc,
3007       "d16 modifier is not supported on this GPU");
3008     return false;
3009   }
3010   if (!validateMIMGDim(Inst)) {
3011     Error(IDLoc, "dim modifier is required on this GPU");
3012     return false;
3013   }
3014   if (!validateMIMGDataSize(Inst)) {
3015     Error(IDLoc,
3016       "image data size does not match dmask and tfe");
3017     return false;
3018   }
3019   if (!validateMIMGAddrSize(Inst)) {
3020     Error(IDLoc,
3021       "image address size does not match dim and a16");
3022     return false;
3023   }
3024   if (!validateMIMGAtomicDMask(Inst)) {
3025     Error(IDLoc,
3026       "invalid atomic image dmask");
3027     return false;
3028   }
3029   if (!validateMIMGGatherDMask(Inst)) {
3030     Error(IDLoc,
3031       "invalid image_gather dmask: only one bit must be set");
3032     return false;
3033   }
3034 
3035   return true;
3036 }
3037 
3038 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3039                                             const FeatureBitset &FBS,
3040                                             unsigned VariantID = 0);
3041 
3042 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3043                                               OperandVector &Operands,
3044                                               MCStreamer &Out,
3045                                               uint64_t &ErrorInfo,
3046                                               bool MatchingInlineAsm) {
3047   MCInst Inst;
3048   unsigned Result = Match_Success;
3049   for (auto Variant : getMatchedVariants()) {
3050     uint64_t EI;
3051     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3052                                   Variant);
3053     // We order match statuses from least to most specific. We use most specific
3054     // status as resulting
3055     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3056     if ((R == Match_Success) ||
3057         (R == Match_PreferE32) ||
3058         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3059         (R == Match_InvalidOperand && Result != Match_MissingFeature
3060                                    && Result != Match_PreferE32) ||
3061         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3062                                    && Result != Match_MissingFeature
3063                                    && Result != Match_PreferE32)) {
3064       Result = R;
3065       ErrorInfo = EI;
3066     }
3067     if (R == Match_Success)
3068       break;
3069   }
3070 
3071   switch (Result) {
3072   default: break;
3073   case Match_Success:
3074     if (!validateInstruction(Inst, IDLoc)) {
3075       return true;
3076     }
3077     Inst.setLoc(IDLoc);
3078     Out.EmitInstruction(Inst, getSTI());
3079     return false;
3080 
3081   case Match_MissingFeature:
3082     return Error(IDLoc, "instruction not supported on this GPU");
3083 
3084   case Match_MnemonicFail: {
3085     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3086     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3087         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3088     return Error(IDLoc, "invalid instruction" + Suggestion,
3089                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3090   }
3091 
3092   case Match_InvalidOperand: {
3093     SMLoc ErrorLoc = IDLoc;
3094     if (ErrorInfo != ~0ULL) {
3095       if (ErrorInfo >= Operands.size()) {
3096         return Error(IDLoc, "too few operands for instruction");
3097       }
3098       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3099       if (ErrorLoc == SMLoc())
3100         ErrorLoc = IDLoc;
3101     }
3102     return Error(ErrorLoc, "invalid operand for instruction");
3103   }
3104 
3105   case Match_PreferE32:
3106     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3107                         "should be encoded as e32");
3108   }
3109   llvm_unreachable("Implement any new match types added!");
3110 }
3111 
3112 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3113   int64_t Tmp = -1;
3114   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3115     return true;
3116   }
3117   if (getParser().parseAbsoluteExpression(Tmp)) {
3118     return true;
3119   }
3120   Ret = static_cast<uint32_t>(Tmp);
3121   return false;
3122 }
3123 
3124 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3125                                                uint32_t &Minor) {
3126   if (ParseAsAbsoluteExpression(Major))
3127     return TokError("invalid major version");
3128 
3129   if (getLexer().isNot(AsmToken::Comma))
3130     return TokError("minor version number required, comma expected");
3131   Lex();
3132 
3133   if (ParseAsAbsoluteExpression(Minor))
3134     return TokError("invalid minor version");
3135 
3136   return false;
3137 }
3138 
3139 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3140   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3141     return TokError("directive only supported for amdgcn architecture");
3142 
3143   std::string Target;
3144 
3145   SMLoc TargetStart = getTok().getLoc();
3146   if (getParser().parseEscapedString(Target))
3147     return true;
3148   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3149 
3150   std::string ExpectedTarget;
3151   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3152   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3153 
3154   if (Target != ExpectedTargetOS.str())
3155     return getParser().Error(TargetRange.Start, "target must match options",
3156                              TargetRange);
3157 
3158   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3159   return false;
3160 }
3161 
3162 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3163   return getParser().Error(Range.Start, "value out of range", Range);
3164 }
3165 
3166 bool AMDGPUAsmParser::calculateGPRBlocks(
3167     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3168     bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
3169     unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
3170     unsigned &SGPRBlocks) {
3171   // TODO(scott.linder): These calculations are duplicated from
3172   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3173   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3174 
3175   unsigned NumVGPRs = NextFreeVGPR;
3176   unsigned NumSGPRs = NextFreeSGPR;
3177 
3178   if (Version.Major >= 10)
3179     NumSGPRs = 0;
3180   else {
3181     unsigned MaxAddressableNumSGPRs =
3182         IsaInfo::getAddressableNumSGPRs(&getSTI());
3183 
3184     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3185         NumSGPRs > MaxAddressableNumSGPRs)
3186       return OutOfRangeError(SGPRRange);
3187 
3188     NumSGPRs +=
3189         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3190 
3191     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3192         NumSGPRs > MaxAddressableNumSGPRs)
3193       return OutOfRangeError(SGPRRange);
3194 
3195     if (Features.test(FeatureSGPRInitBug))
3196       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3197   }
3198 
3199   VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
3200   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3201 
3202   return false;
3203 }
3204 
3205 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3206   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3207     return TokError("directive only supported for amdgcn architecture");
3208 
3209   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3210     return TokError("directive only supported for amdhsa OS");
3211 
3212   StringRef KernelName;
3213   if (getParser().parseIdentifier(KernelName))
3214     return true;
3215 
3216   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3217 
3218   StringSet<> Seen;
3219 
3220   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3221 
3222   SMRange VGPRRange;
3223   uint64_t NextFreeVGPR = 0;
3224   SMRange SGPRRange;
3225   uint64_t NextFreeSGPR = 0;
3226   unsigned UserSGPRCount = 0;
3227   bool ReserveVCC = true;
3228   bool ReserveFlatScr = true;
3229   bool ReserveXNACK = hasXNACK();
3230 
3231   while (true) {
3232     while (getLexer().is(AsmToken::EndOfStatement))
3233       Lex();
3234 
3235     if (getLexer().isNot(AsmToken::Identifier))
3236       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3237 
3238     StringRef ID = getTok().getIdentifier();
3239     SMRange IDRange = getTok().getLocRange();
3240     Lex();
3241 
3242     if (ID == ".end_amdhsa_kernel")
3243       break;
3244 
3245     if (Seen.find(ID) != Seen.end())
3246       return TokError(".amdhsa_ directives cannot be repeated");
3247     Seen.insert(ID);
3248 
3249     SMLoc ValStart = getTok().getLoc();
3250     int64_t IVal;
3251     if (getParser().parseAbsoluteExpression(IVal))
3252       return true;
3253     SMLoc ValEnd = getTok().getLoc();
3254     SMRange ValRange = SMRange(ValStart, ValEnd);
3255 
3256     if (IVal < 0)
3257       return OutOfRangeError(ValRange);
3258 
3259     uint64_t Val = IVal;
3260 
3261 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3262   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3263     return OutOfRangeError(RANGE);                                             \
3264   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3265 
3266     if (ID == ".amdhsa_group_segment_fixed_size") {
3267       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3268         return OutOfRangeError(ValRange);
3269       KD.group_segment_fixed_size = Val;
3270     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3271       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3272         return OutOfRangeError(ValRange);
3273       KD.private_segment_fixed_size = Val;
3274     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3275       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3276                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3277                        Val, ValRange);
3278       UserSGPRCount += 4;
3279     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3280       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3281                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3282                        ValRange);
3283       UserSGPRCount += 2;
3284     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3285       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3286                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3287                        ValRange);
3288       UserSGPRCount += 2;
3289     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3290       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3291                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3292                        Val, ValRange);
3293       UserSGPRCount += 2;
3294     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3295       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3296                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3297                        ValRange);
3298       UserSGPRCount += 2;
3299     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3300       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3301                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3302                        ValRange);
3303       UserSGPRCount += 2;
3304     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3305       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3306                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3307                        Val, ValRange);
3308       UserSGPRCount += 1;
3309     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3310       PARSE_BITS_ENTRY(
3311           KD.compute_pgm_rsrc2,
3312           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3313           ValRange);
3314     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3315       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3316                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3317                        ValRange);
3318     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3319       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3320                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3321                        ValRange);
3322     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3323       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3324                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3325                        ValRange);
3326     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3327       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3328                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3329                        ValRange);
3330     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3331       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3332                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3333                        ValRange);
3334     } else if (ID == ".amdhsa_next_free_vgpr") {
3335       VGPRRange = ValRange;
3336       NextFreeVGPR = Val;
3337     } else if (ID == ".amdhsa_next_free_sgpr") {
3338       SGPRRange = ValRange;
3339       NextFreeSGPR = Val;
3340     } else if (ID == ".amdhsa_reserve_vcc") {
3341       if (!isUInt<1>(Val))
3342         return OutOfRangeError(ValRange);
3343       ReserveVCC = Val;
3344     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3345       if (IVersion.Major < 7)
3346         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3347                                  IDRange);
3348       if (!isUInt<1>(Val))
3349         return OutOfRangeError(ValRange);
3350       ReserveFlatScr = Val;
3351     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3352       if (IVersion.Major < 8)
3353         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3354                                  IDRange);
3355       if (!isUInt<1>(Val))
3356         return OutOfRangeError(ValRange);
3357       ReserveXNACK = Val;
3358     } else if (ID == ".amdhsa_float_round_mode_32") {
3359       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3360                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3361     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3362       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3363                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3364     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3365       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3366                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3367     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3368       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3369                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3370                        ValRange);
3371     } else if (ID == ".amdhsa_dx10_clamp") {
3372       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3373                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3374     } else if (ID == ".amdhsa_ieee_mode") {
3375       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3376                        Val, ValRange);
3377     } else if (ID == ".amdhsa_fp16_overflow") {
3378       if (IVersion.Major < 9)
3379         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3380                                  IDRange);
3381       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3382                        ValRange);
3383     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3384       if (IVersion.Major < 10)
3385         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3386                                  IDRange);
3387       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3388                        ValRange);
3389     } else if (ID == ".amdhsa_memory_ordered") {
3390       if (IVersion.Major < 10)
3391         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3392                                  IDRange);
3393       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3394                        ValRange);
3395     } else if (ID == ".amdhsa_forward_progress") {
3396       if (IVersion.Major < 10)
3397         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3398                                  IDRange);
3399       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3400                        ValRange);
3401     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3402       PARSE_BITS_ENTRY(
3403           KD.compute_pgm_rsrc2,
3404           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3405           ValRange);
3406     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3407       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3408                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3409                        Val, ValRange);
3410     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3411       PARSE_BITS_ENTRY(
3412           KD.compute_pgm_rsrc2,
3413           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3414           ValRange);
3415     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3416       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3417                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3418                        Val, ValRange);
3419     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3420       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3421                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3422                        Val, ValRange);
3423     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3424       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3425                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3426                        Val, ValRange);
3427     } else if (ID == ".amdhsa_exception_int_div_zero") {
3428       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3429                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3430                        Val, ValRange);
3431     } else {
3432       return getParser().Error(IDRange.Start,
3433                                "unknown .amdhsa_kernel directive", IDRange);
3434     }
3435 
3436 #undef PARSE_BITS_ENTRY
3437   }
3438 
3439   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3440     return TokError(".amdhsa_next_free_vgpr directive is required");
3441 
3442   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3443     return TokError(".amdhsa_next_free_sgpr directive is required");
3444 
3445   unsigned VGPRBlocks;
3446   unsigned SGPRBlocks;
3447   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3448                          ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
3449                          SGPRRange, VGPRBlocks, SGPRBlocks))
3450     return true;
3451 
3452   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3453           VGPRBlocks))
3454     return OutOfRangeError(VGPRRange);
3455   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3456                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3457 
3458   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3459           SGPRBlocks))
3460     return OutOfRangeError(SGPRRange);
3461   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3462                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3463                   SGPRBlocks);
3464 
3465   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3466     return TokError("too many user SGPRs enabled");
3467   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3468                   UserSGPRCount);
3469 
3470   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3471       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3472       ReserveFlatScr, ReserveXNACK);
3473   return false;
3474 }
3475 
3476 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3477   uint32_t Major;
3478   uint32_t Minor;
3479 
3480   if (ParseDirectiveMajorMinor(Major, Minor))
3481     return true;
3482 
3483   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3484   return false;
3485 }
3486 
3487 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3488   uint32_t Major;
3489   uint32_t Minor;
3490   uint32_t Stepping;
3491   StringRef VendorName;
3492   StringRef ArchName;
3493 
3494   // If this directive has no arguments, then use the ISA version for the
3495   // targeted GPU.
3496   if (getLexer().is(AsmToken::EndOfStatement)) {
3497     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3498     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3499                                                       ISA.Stepping,
3500                                                       "AMD", "AMDGPU");
3501     return false;
3502   }
3503 
3504   if (ParseDirectiveMajorMinor(Major, Minor))
3505     return true;
3506 
3507   if (getLexer().isNot(AsmToken::Comma))
3508     return TokError("stepping version number required, comma expected");
3509   Lex();
3510 
3511   if (ParseAsAbsoluteExpression(Stepping))
3512     return TokError("invalid stepping version");
3513 
3514   if (getLexer().isNot(AsmToken::Comma))
3515     return TokError("vendor name required, comma expected");
3516   Lex();
3517 
3518   if (getLexer().isNot(AsmToken::String))
3519     return TokError("invalid vendor name");
3520 
3521   VendorName = getLexer().getTok().getStringContents();
3522   Lex();
3523 
3524   if (getLexer().isNot(AsmToken::Comma))
3525     return TokError("arch name required, comma expected");
3526   Lex();
3527 
3528   if (getLexer().isNot(AsmToken::String))
3529     return TokError("invalid arch name");
3530 
3531   ArchName = getLexer().getTok().getStringContents();
3532   Lex();
3533 
3534   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3535                                                     VendorName, ArchName);
3536   return false;
3537 }
3538 
3539 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3540                                                amd_kernel_code_t &Header) {
3541   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3542   // assembly for backwards compatibility.
3543   if (ID == "max_scratch_backing_memory_byte_size") {
3544     Parser.eatToEndOfStatement();
3545     return false;
3546   }
3547 
3548   SmallString<40> ErrStr;
3549   raw_svector_ostream Err(ErrStr);
3550   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3551     return TokError(Err.str());
3552   }
3553   Lex();
3554 
3555   if (ID == "enable_wgp_mode") {
3556     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
3557       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3558   }
3559 
3560   if (ID == "enable_mem_ordered") {
3561     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
3562       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3563   }
3564 
3565   if (ID == "enable_fwd_progress") {
3566     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
3567       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3568   }
3569 
3570   return false;
3571 }
3572 
3573 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3574   amd_kernel_code_t Header;
3575   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3576 
3577   while (true) {
3578     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3579     // will set the current token to EndOfStatement.
3580     while(getLexer().is(AsmToken::EndOfStatement))
3581       Lex();
3582 
3583     if (getLexer().isNot(AsmToken::Identifier))
3584       return TokError("expected value identifier or .end_amd_kernel_code_t");
3585 
3586     StringRef ID = getLexer().getTok().getIdentifier();
3587     Lex();
3588 
3589     if (ID == ".end_amd_kernel_code_t")
3590       break;
3591 
3592     if (ParseAMDKernelCodeTValue(ID, Header))
3593       return true;
3594   }
3595 
3596   getTargetStreamer().EmitAMDKernelCodeT(Header);
3597 
3598   return false;
3599 }
3600 
3601 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3602   if (getLexer().isNot(AsmToken::Identifier))
3603     return TokError("expected symbol name");
3604 
3605   StringRef KernelName = Parser.getTok().getString();
3606 
3607   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3608                                            ELF::STT_AMDGPU_HSA_KERNEL);
3609   Lex();
3610   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3611     KernelScope.initialize(getContext());
3612   return false;
3613 }
3614 
3615 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3616   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3617     return Error(getParser().getTok().getLoc(),
3618                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3619                  "architectures");
3620   }
3621 
3622   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3623 
3624   std::string ISAVersionStringFromSTI;
3625   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3626   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3627 
3628   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3629     return Error(getParser().getTok().getLoc(),
3630                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3631                  "arguments specified through the command line");
3632   }
3633 
3634   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3635   Lex();
3636 
3637   return false;
3638 }
3639 
3640 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3641   const char *AssemblerDirectiveBegin;
3642   const char *AssemblerDirectiveEnd;
3643   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3644       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3645           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3646                             HSAMD::V3::AssemblerDirectiveEnd)
3647           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3648                             HSAMD::AssemblerDirectiveEnd);
3649 
3650   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3651     return Error(getParser().getTok().getLoc(),
3652                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
3653                  "not available on non-amdhsa OSes")).str());
3654   }
3655 
3656   std::string HSAMetadataString;
3657   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
3658                           HSAMetadataString))
3659     return true;
3660 
3661   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3662     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3663       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3664   } else {
3665     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3666       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3667   }
3668 
3669   return false;
3670 }
3671 
3672 /// Common code to parse out a block of text (typically YAML) between start and
3673 /// end directives.
3674 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
3675                                           const char *AssemblerDirectiveEnd,
3676                                           std::string &CollectString) {
3677 
3678   raw_string_ostream CollectStream(CollectString);
3679 
3680   getLexer().setSkipSpace(false);
3681 
3682   bool FoundEnd = false;
3683   while (!getLexer().is(AsmToken::Eof)) {
3684     while (getLexer().is(AsmToken::Space)) {
3685       CollectStream << getLexer().getTok().getString();
3686       Lex();
3687     }
3688 
3689     if (getLexer().is(AsmToken::Identifier)) {
3690       StringRef ID = getLexer().getTok().getIdentifier();
3691       if (ID == AssemblerDirectiveEnd) {
3692         Lex();
3693         FoundEnd = true;
3694         break;
3695       }
3696     }
3697 
3698     CollectStream << Parser.parseStringToEndOfStatement()
3699                   << getContext().getAsmInfo()->getSeparatorString();
3700 
3701     Parser.eatToEndOfStatement();
3702   }
3703 
3704   getLexer().setSkipSpace(true);
3705 
3706   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3707     return TokError(Twine("expected directive ") +
3708                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
3709   }
3710 
3711   CollectStream.flush();
3712   return false;
3713 }
3714 
3715 /// Parse the assembler directive for new MsgPack-format PAL metadata.
3716 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
3717   std::string String;
3718   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
3719                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
3720     return true;
3721 
3722   auto PALMetadata = getTargetStreamer().getPALMetadata();
3723   if (!PALMetadata->setFromString(String))
3724     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
3725   return false;
3726 }
3727 
3728 /// Parse the assembler directive for old linear-format PAL metadata.
3729 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3730   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3731     return Error(getParser().getTok().getLoc(),
3732                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3733                  "not available on non-amdpal OSes")).str());
3734   }
3735 
3736   auto PALMetadata = getTargetStreamer().getPALMetadata();
3737   PALMetadata->setLegacy();
3738   for (;;) {
3739     uint32_t Key, Value;
3740     if (ParseAsAbsoluteExpression(Key)) {
3741       return TokError(Twine("invalid value in ") +
3742                       Twine(PALMD::AssemblerDirective));
3743     }
3744     if (getLexer().isNot(AsmToken::Comma)) {
3745       return TokError(Twine("expected an even number of values in ") +
3746                       Twine(PALMD::AssemblerDirective));
3747     }
3748     Lex();
3749     if (ParseAsAbsoluteExpression(Value)) {
3750       return TokError(Twine("invalid value in ") +
3751                       Twine(PALMD::AssemblerDirective));
3752     }
3753     PALMetadata->setRegister(Key, Value);
3754     if (getLexer().isNot(AsmToken::Comma))
3755       break;
3756     Lex();
3757   }
3758   return false;
3759 }
3760 
3761 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3762   StringRef IDVal = DirectiveID.getString();
3763 
3764   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3765     if (IDVal == ".amdgcn_target")
3766       return ParseDirectiveAMDGCNTarget();
3767 
3768     if (IDVal == ".amdhsa_kernel")
3769       return ParseDirectiveAMDHSAKernel();
3770 
3771     // TODO: Restructure/combine with PAL metadata directive.
3772     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3773       return ParseDirectiveHSAMetadata();
3774   } else {
3775     if (IDVal == ".hsa_code_object_version")
3776       return ParseDirectiveHSACodeObjectVersion();
3777 
3778     if (IDVal == ".hsa_code_object_isa")
3779       return ParseDirectiveHSACodeObjectISA();
3780 
3781     if (IDVal == ".amd_kernel_code_t")
3782       return ParseDirectiveAMDKernelCodeT();
3783 
3784     if (IDVal == ".amdgpu_hsa_kernel")
3785       return ParseDirectiveAMDGPUHsaKernel();
3786 
3787     if (IDVal == ".amd_amdgpu_isa")
3788       return ParseDirectiveISAVersion();
3789 
3790     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3791       return ParseDirectiveHSAMetadata();
3792   }
3793 
3794   if (IDVal == PALMD::AssemblerDirectiveBegin)
3795     return ParseDirectivePALMetadataBegin();
3796 
3797   if (IDVal == PALMD::AssemblerDirective)
3798     return ParseDirectivePALMetadata();
3799 
3800   return true;
3801 }
3802 
3803 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
3804                                            unsigned RegNo) const {
3805 
3806   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
3807        R.isValid(); ++R) {
3808     if (*R == RegNo)
3809       return isGFX9() || isGFX10();
3810   }
3811 
3812   // GFX10 has 2 more SGPRs 104 and 105.
3813   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
3814        R.isValid(); ++R) {
3815     if (*R == RegNo)
3816       return hasSGPR104_SGPR105();
3817   }
3818 
3819   switch (RegNo) {
3820   case AMDGPU::TBA:
3821   case AMDGPU::TBA_LO:
3822   case AMDGPU::TBA_HI:
3823   case AMDGPU::TMA:
3824   case AMDGPU::TMA_LO:
3825   case AMDGPU::TMA_HI:
3826     return !isGFX9() && !isGFX10();
3827   case AMDGPU::XNACK_MASK:
3828   case AMDGPU::XNACK_MASK_LO:
3829   case AMDGPU::XNACK_MASK_HI:
3830     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
3831   case AMDGPU::SGPR_NULL:
3832     return isGFX10();
3833   default:
3834     break;
3835   }
3836 
3837   if (isInlineValue(RegNo))
3838     return !isCI() && !isSI() && !isVI();
3839 
3840   if (isCI())
3841     return true;
3842 
3843   if (isSI() || isGFX10()) {
3844     // No flat_scr on SI.
3845     // On GFX10 flat scratch is not a valid register operand and can only be
3846     // accessed with s_setreg/s_getreg.
3847     switch (RegNo) {
3848     case AMDGPU::FLAT_SCR:
3849     case AMDGPU::FLAT_SCR_LO:
3850     case AMDGPU::FLAT_SCR_HI:
3851       return false;
3852     default:
3853       return true;
3854     }
3855   }
3856 
3857   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
3858   // SI/CI have.
3859   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
3860        R.isValid(); ++R) {
3861     if (*R == RegNo)
3862       return hasSGPR102_SGPR103();
3863   }
3864 
3865   return true;
3866 }
3867 
3868 OperandMatchResultTy
3869 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
3870                               OperandMode Mode) {
3871   // Try to parse with a custom parser
3872   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
3873 
3874   // If we successfully parsed the operand or if there as an error parsing,
3875   // we are done.
3876   //
3877   // If we are parsing after we reach EndOfStatement then this means we
3878   // are appending default values to the Operands list.  This is only done
3879   // by custom parser, so we shouldn't continue on to the generic parsing.
3880   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
3881       getLexer().is(AsmToken::EndOfStatement))
3882     return ResTy;
3883 
3884   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
3885     unsigned Prefix = Operands.size();
3886     SMLoc LBraceLoc = getTok().getLoc();
3887     Parser.Lex(); // eat the '['
3888 
3889     for (;;) {
3890       ResTy = parseReg(Operands);
3891       if (ResTy != MatchOperand_Success)
3892         return ResTy;
3893 
3894       if (getLexer().is(AsmToken::RBrac))
3895         break;
3896 
3897       if (getLexer().isNot(AsmToken::Comma))
3898         return MatchOperand_ParseFail;
3899       Parser.Lex();
3900     }
3901 
3902     if (Operands.size() - Prefix > 1) {
3903       Operands.insert(Operands.begin() + Prefix,
3904                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
3905       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
3906                                                     getTok().getLoc()));
3907     }
3908 
3909     Parser.Lex(); // eat the ']'
3910     return MatchOperand_Success;
3911   }
3912 
3913   ResTy = parseRegOrImm(Operands);
3914 
3915   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
3916     return ResTy;
3917 
3918   const auto &Tok = Parser.getTok();
3919   SMLoc S = Tok.getLoc();
3920 
3921   const MCExpr *Expr = nullptr;
3922   if (!Parser.parseExpression(Expr)) {
3923     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3924     return MatchOperand_Success;
3925   }
3926 
3927   // Possibly this is an instruction flag like 'gds'.
3928   if (Tok.getKind() == AsmToken::Identifier) {
3929     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
3930     Parser.Lex();
3931     return MatchOperand_Success;
3932   }
3933 
3934   return MatchOperand_NoMatch;
3935 }
3936 
3937 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
3938   // Clear any forced encodings from the previous instruction.
3939   setForcedEncodingSize(0);
3940   setForcedDPP(false);
3941   setForcedSDWA(false);
3942 
3943   if (Name.endswith("_e64")) {
3944     setForcedEncodingSize(64);
3945     return Name.substr(0, Name.size() - 4);
3946   } else if (Name.endswith("_e32")) {
3947     setForcedEncodingSize(32);
3948     return Name.substr(0, Name.size() - 4);
3949   } else if (Name.endswith("_dpp")) {
3950     setForcedDPP(true);
3951     return Name.substr(0, Name.size() - 4);
3952   } else if (Name.endswith("_sdwa")) {
3953     setForcedSDWA(true);
3954     return Name.substr(0, Name.size() - 5);
3955   }
3956   return Name;
3957 }
3958 
3959 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
3960                                        StringRef Name,
3961                                        SMLoc NameLoc, OperandVector &Operands) {
3962   // Add the instruction mnemonic
3963   Name = parseMnemonicSuffix(Name);
3964   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
3965 
3966   bool IsMIMG = Name.startswith("image_");
3967 
3968   while (!getLexer().is(AsmToken::EndOfStatement)) {
3969     OperandMode Mode = OperandMode_Default;
3970     if (IsMIMG && isGFX10() && Operands.size() == 2)
3971       Mode = OperandMode_NSA;
3972     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
3973 
3974     // Eat the comma or space if there is one.
3975     if (getLexer().is(AsmToken::Comma))
3976       Parser.Lex();
3977 
3978     switch (Res) {
3979       case MatchOperand_Success: break;
3980       case MatchOperand_ParseFail:
3981         Error(getLexer().getLoc(), "failed parsing operand.");
3982         while (!getLexer().is(AsmToken::EndOfStatement)) {
3983           Parser.Lex();
3984         }
3985         return true;
3986       case MatchOperand_NoMatch:
3987         Error(getLexer().getLoc(), "not a valid operand.");
3988         while (!getLexer().is(AsmToken::EndOfStatement)) {
3989           Parser.Lex();
3990         }
3991         return true;
3992     }
3993   }
3994 
3995   return false;
3996 }
3997 
3998 //===----------------------------------------------------------------------===//
3999 // Utility functions
4000 //===----------------------------------------------------------------------===//
4001 
4002 OperandMatchResultTy
4003 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
4004   switch(getLexer().getKind()) {
4005     default: return MatchOperand_NoMatch;
4006     case AsmToken::Identifier: {
4007       StringRef Name = Parser.getTok().getString();
4008       if (!Name.equals(Prefix)) {
4009         return MatchOperand_NoMatch;
4010       }
4011 
4012       Parser.Lex();
4013       if (getLexer().isNot(AsmToken::Colon))
4014         return MatchOperand_ParseFail;
4015 
4016       Parser.Lex();
4017 
4018       bool IsMinus = false;
4019       if (getLexer().getKind() == AsmToken::Minus) {
4020         Parser.Lex();
4021         IsMinus = true;
4022       }
4023 
4024       if (getLexer().isNot(AsmToken::Integer))
4025         return MatchOperand_ParseFail;
4026 
4027       if (getParser().parseAbsoluteExpression(Int))
4028         return MatchOperand_ParseFail;
4029 
4030       if (IsMinus)
4031         Int = -Int;
4032       break;
4033     }
4034   }
4035   return MatchOperand_Success;
4036 }
4037 
4038 OperandMatchResultTy
4039 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4040                                     AMDGPUOperand::ImmTy ImmTy,
4041                                     bool (*ConvertResult)(int64_t&)) {
4042   SMLoc S = Parser.getTok().getLoc();
4043   int64_t Value = 0;
4044 
4045   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4046   if (Res != MatchOperand_Success)
4047     return Res;
4048 
4049   if (ConvertResult && !ConvertResult(Value)) {
4050     return MatchOperand_ParseFail;
4051   }
4052 
4053   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4054   return MatchOperand_Success;
4055 }
4056 
4057 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
4058   const char *Prefix,
4059   OperandVector &Operands,
4060   AMDGPUOperand::ImmTy ImmTy,
4061   bool (*ConvertResult)(int64_t&)) {
4062   StringRef Name = Parser.getTok().getString();
4063   if (!Name.equals(Prefix))
4064     return MatchOperand_NoMatch;
4065 
4066   Parser.Lex();
4067   if (getLexer().isNot(AsmToken::Colon))
4068     return MatchOperand_ParseFail;
4069 
4070   Parser.Lex();
4071   if (getLexer().isNot(AsmToken::LBrac))
4072     return MatchOperand_ParseFail;
4073   Parser.Lex();
4074 
4075   unsigned Val = 0;
4076   SMLoc S = Parser.getTok().getLoc();
4077 
4078   // FIXME: How to verify the number of elements matches the number of src
4079   // operands?
4080   for (int I = 0; I < 4; ++I) {
4081     if (I != 0) {
4082       if (getLexer().is(AsmToken::RBrac))
4083         break;
4084 
4085       if (getLexer().isNot(AsmToken::Comma))
4086         return MatchOperand_ParseFail;
4087       Parser.Lex();
4088     }
4089 
4090     if (getLexer().isNot(AsmToken::Integer))
4091       return MatchOperand_ParseFail;
4092 
4093     int64_t Op;
4094     if (getParser().parseAbsoluteExpression(Op))
4095       return MatchOperand_ParseFail;
4096 
4097     if (Op != 0 && Op != 1)
4098       return MatchOperand_ParseFail;
4099     Val |= (Op << I);
4100   }
4101 
4102   Parser.Lex();
4103   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4104   return MatchOperand_Success;
4105 }
4106 
4107 OperandMatchResultTy
4108 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4109                                AMDGPUOperand::ImmTy ImmTy) {
4110   int64_t Bit = 0;
4111   SMLoc S = Parser.getTok().getLoc();
4112 
4113   // We are at the end of the statement, and this is a default argument, so
4114   // use a default value.
4115   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4116     switch(getLexer().getKind()) {
4117       case AsmToken::Identifier: {
4118         StringRef Tok = Parser.getTok().getString();
4119         if (Tok == Name) {
4120           if (Tok == "r128" && isGFX9())
4121             Error(S, "r128 modifier is not supported on this GPU");
4122           if (Tok == "a16" && !isGFX9())
4123             Error(S, "a16 modifier is not supported on this GPU");
4124           Bit = 1;
4125           Parser.Lex();
4126         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4127           Bit = 0;
4128           Parser.Lex();
4129         } else {
4130           return MatchOperand_NoMatch;
4131         }
4132         break;
4133       }
4134       default:
4135         return MatchOperand_NoMatch;
4136     }
4137   }
4138 
4139   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4140     return MatchOperand_ParseFail;
4141 
4142   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4143   return MatchOperand_Success;
4144 }
4145 
4146 static void addOptionalImmOperand(
4147   MCInst& Inst, const OperandVector& Operands,
4148   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4149   AMDGPUOperand::ImmTy ImmT,
4150   int64_t Default = 0) {
4151   auto i = OptionalIdx.find(ImmT);
4152   if (i != OptionalIdx.end()) {
4153     unsigned Idx = i->second;
4154     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4155   } else {
4156     Inst.addOperand(MCOperand::createImm(Default));
4157   }
4158 }
4159 
4160 OperandMatchResultTy
4161 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4162   if (getLexer().isNot(AsmToken::Identifier)) {
4163     return MatchOperand_NoMatch;
4164   }
4165   StringRef Tok = Parser.getTok().getString();
4166   if (Tok != Prefix) {
4167     return MatchOperand_NoMatch;
4168   }
4169 
4170   Parser.Lex();
4171   if (getLexer().isNot(AsmToken::Colon)) {
4172     return MatchOperand_ParseFail;
4173   }
4174 
4175   Parser.Lex();
4176   if (getLexer().isNot(AsmToken::Identifier)) {
4177     return MatchOperand_ParseFail;
4178   }
4179 
4180   Value = Parser.getTok().getString();
4181   return MatchOperand_Success;
4182 }
4183 
4184 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4185 // values to live in a joint format operand in the MCInst encoding.
4186 OperandMatchResultTy
4187 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4188   SMLoc S = Parser.getTok().getLoc();
4189   int64_t Dfmt = 0, Nfmt = 0;
4190   // dfmt and nfmt can appear in either order, and each is optional.
4191   bool GotDfmt = false, GotNfmt = false;
4192   while (!GotDfmt || !GotNfmt) {
4193     if (!GotDfmt) {
4194       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4195       if (Res != MatchOperand_NoMatch) {
4196         if (Res != MatchOperand_Success)
4197           return Res;
4198         if (Dfmt >= 16) {
4199           Error(Parser.getTok().getLoc(), "out of range dfmt");
4200           return MatchOperand_ParseFail;
4201         }
4202         GotDfmt = true;
4203         Parser.Lex();
4204         continue;
4205       }
4206     }
4207     if (!GotNfmt) {
4208       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4209       if (Res != MatchOperand_NoMatch) {
4210         if (Res != MatchOperand_Success)
4211           return Res;
4212         if (Nfmt >= 8) {
4213           Error(Parser.getTok().getLoc(), "out of range nfmt");
4214           return MatchOperand_ParseFail;
4215         }
4216         GotNfmt = true;
4217         Parser.Lex();
4218         continue;
4219       }
4220     }
4221     break;
4222   }
4223   if (!GotDfmt && !GotNfmt)
4224     return MatchOperand_NoMatch;
4225   auto Format = Dfmt | Nfmt << 4;
4226   Operands.push_back(
4227       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4228   return MatchOperand_Success;
4229 }
4230 
4231 //===----------------------------------------------------------------------===//
4232 // ds
4233 //===----------------------------------------------------------------------===//
4234 
4235 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4236                                     const OperandVector &Operands) {
4237   OptionalImmIndexMap OptionalIdx;
4238 
4239   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4240     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4241 
4242     // Add the register arguments
4243     if (Op.isReg()) {
4244       Op.addRegOperands(Inst, 1);
4245       continue;
4246     }
4247 
4248     // Handle optional arguments
4249     OptionalIdx[Op.getImmTy()] = i;
4250   }
4251 
4252   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4253   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4254   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4255 
4256   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4257 }
4258 
4259 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4260                                 bool IsGdsHardcoded) {
4261   OptionalImmIndexMap OptionalIdx;
4262 
4263   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4264     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4265 
4266     // Add the register arguments
4267     if (Op.isReg()) {
4268       Op.addRegOperands(Inst, 1);
4269       continue;
4270     }
4271 
4272     if (Op.isToken() && Op.getToken() == "gds") {
4273       IsGdsHardcoded = true;
4274       continue;
4275     }
4276 
4277     // Handle optional arguments
4278     OptionalIdx[Op.getImmTy()] = i;
4279   }
4280 
4281   AMDGPUOperand::ImmTy OffsetType =
4282     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4283      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4284      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4285                                                       AMDGPUOperand::ImmTyOffset;
4286 
4287   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4288 
4289   if (!IsGdsHardcoded) {
4290     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4291   }
4292   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4293 }
4294 
4295 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4296   OptionalImmIndexMap OptionalIdx;
4297 
4298   unsigned OperandIdx[4];
4299   unsigned EnMask = 0;
4300   int SrcIdx = 0;
4301 
4302   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4303     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4304 
4305     // Add the register arguments
4306     if (Op.isReg()) {
4307       assert(SrcIdx < 4);
4308       OperandIdx[SrcIdx] = Inst.size();
4309       Op.addRegOperands(Inst, 1);
4310       ++SrcIdx;
4311       continue;
4312     }
4313 
4314     if (Op.isOff()) {
4315       assert(SrcIdx < 4);
4316       OperandIdx[SrcIdx] = Inst.size();
4317       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4318       ++SrcIdx;
4319       continue;
4320     }
4321 
4322     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4323       Op.addImmOperands(Inst, 1);
4324       continue;
4325     }
4326 
4327     if (Op.isToken() && Op.getToken() == "done")
4328       continue;
4329 
4330     // Handle optional arguments
4331     OptionalIdx[Op.getImmTy()] = i;
4332   }
4333 
4334   assert(SrcIdx == 4);
4335 
4336   bool Compr = false;
4337   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4338     Compr = true;
4339     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4340     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4341     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4342   }
4343 
4344   for (auto i = 0; i < SrcIdx; ++i) {
4345     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4346       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4347     }
4348   }
4349 
4350   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4351   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4352 
4353   Inst.addOperand(MCOperand::createImm(EnMask));
4354 }
4355 
4356 //===----------------------------------------------------------------------===//
4357 // s_waitcnt
4358 //===----------------------------------------------------------------------===//
4359 
4360 static bool
4361 encodeCnt(
4362   const AMDGPU::IsaVersion ISA,
4363   int64_t &IntVal,
4364   int64_t CntVal,
4365   bool Saturate,
4366   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4367   unsigned (*decode)(const IsaVersion &Version, unsigned))
4368 {
4369   bool Failed = false;
4370 
4371   IntVal = encode(ISA, IntVal, CntVal);
4372   if (CntVal != decode(ISA, IntVal)) {
4373     if (Saturate) {
4374       IntVal = encode(ISA, IntVal, -1);
4375     } else {
4376       Failed = true;
4377     }
4378   }
4379   return Failed;
4380 }
4381 
4382 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4383   StringRef CntName = Parser.getTok().getString();
4384   int64_t CntVal;
4385 
4386   Parser.Lex();
4387   if (getLexer().isNot(AsmToken::LParen))
4388     return true;
4389 
4390   Parser.Lex();
4391   if (getLexer().isNot(AsmToken::Integer))
4392     return true;
4393 
4394   SMLoc ValLoc = Parser.getTok().getLoc();
4395   if (getParser().parseAbsoluteExpression(CntVal))
4396     return true;
4397 
4398   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4399 
4400   bool Failed = true;
4401   bool Sat = CntName.endswith("_sat");
4402 
4403   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4404     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4405   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4406     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4407   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4408     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4409   }
4410 
4411   if (Failed) {
4412     Error(ValLoc, "too large value for " + CntName);
4413     return true;
4414   }
4415 
4416   if (getLexer().isNot(AsmToken::RParen)) {
4417     return true;
4418   }
4419 
4420   Parser.Lex();
4421   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
4422     const AsmToken NextToken = getLexer().peekTok();
4423     if (NextToken.is(AsmToken::Identifier)) {
4424       Parser.Lex();
4425     }
4426   }
4427 
4428   return false;
4429 }
4430 
4431 OperandMatchResultTy
4432 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4433   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4434   int64_t Waitcnt = getWaitcntBitMask(ISA);
4435   SMLoc S = Parser.getTok().getLoc();
4436 
4437   switch(getLexer().getKind()) {
4438     default: return MatchOperand_ParseFail;
4439     case AsmToken::Integer:
4440       // The operand can be an integer value.
4441       if (getParser().parseAbsoluteExpression(Waitcnt))
4442         return MatchOperand_ParseFail;
4443       break;
4444 
4445     case AsmToken::Identifier:
4446       do {
4447         if (parseCnt(Waitcnt))
4448           return MatchOperand_ParseFail;
4449       } while(getLexer().isNot(AsmToken::EndOfStatement));
4450       break;
4451   }
4452   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4453   return MatchOperand_Success;
4454 }
4455 
4456 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
4457                                           int64_t &Width) {
4458   using namespace llvm::AMDGPU::Hwreg;
4459 
4460   if (Parser.getTok().getString() != "hwreg")
4461     return true;
4462   Parser.Lex();
4463 
4464   if (getLexer().isNot(AsmToken::LParen))
4465     return true;
4466   Parser.Lex();
4467 
4468   if (getLexer().is(AsmToken::Identifier)) {
4469     HwReg.IsSymbolic = true;
4470     HwReg.Id = ID_UNKNOWN_;
4471     const StringRef tok = Parser.getTok().getString();
4472     int Last = ID_SYMBOLIC_LAST_;
4473     if (isSI() || isCI() || isVI())
4474       Last = ID_SYMBOLIC_FIRST_GFX9_;
4475     else if (isGFX9())
4476       Last = ID_SYMBOLIC_FIRST_GFX10_;
4477     for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
4478       if (tok == IdSymbolic[i]) {
4479         HwReg.Id = i;
4480         break;
4481       }
4482     }
4483     Parser.Lex();
4484   } else {
4485     HwReg.IsSymbolic = false;
4486     if (getLexer().isNot(AsmToken::Integer))
4487       return true;
4488     if (getParser().parseAbsoluteExpression(HwReg.Id))
4489       return true;
4490   }
4491 
4492   if (getLexer().is(AsmToken::RParen)) {
4493     Parser.Lex();
4494     return false;
4495   }
4496 
4497   // optional params
4498   if (getLexer().isNot(AsmToken::Comma))
4499     return true;
4500   Parser.Lex();
4501 
4502   if (getLexer().isNot(AsmToken::Integer))
4503     return true;
4504   if (getParser().parseAbsoluteExpression(Offset))
4505     return true;
4506 
4507   if (getLexer().isNot(AsmToken::Comma))
4508     return true;
4509   Parser.Lex();
4510 
4511   if (getLexer().isNot(AsmToken::Integer))
4512     return true;
4513   if (getParser().parseAbsoluteExpression(Width))
4514     return true;
4515 
4516   if (getLexer().isNot(AsmToken::RParen))
4517     return true;
4518   Parser.Lex();
4519 
4520   return false;
4521 }
4522 
4523 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4524   using namespace llvm::AMDGPU::Hwreg;
4525 
4526   int64_t Imm16Val = 0;
4527   SMLoc S = Parser.getTok().getLoc();
4528 
4529   switch(getLexer().getKind()) {
4530     default: return MatchOperand_NoMatch;
4531     case AsmToken::Integer:
4532       // The operand can be an integer value.
4533       if (getParser().parseAbsoluteExpression(Imm16Val))
4534         return MatchOperand_NoMatch;
4535       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4536         Error(S, "invalid immediate: only 16-bit values are legal");
4537         // Do not return error code, but create an imm operand anyway and proceed
4538         // to the next operand, if any. That avoids unneccessary error messages.
4539       }
4540       break;
4541 
4542     case AsmToken::Identifier: {
4543         OperandInfoTy HwReg(ID_UNKNOWN_);
4544         int64_t Offset = OFFSET_DEFAULT_;
4545         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
4546         if (parseHwregConstruct(HwReg, Offset, Width))
4547           return MatchOperand_ParseFail;
4548         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
4549           if (HwReg.IsSymbolic)
4550             Error(S, "invalid symbolic name of hardware register");
4551           else
4552             Error(S, "invalid code of hardware register: only 6-bit values are legal");
4553         }
4554         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
4555           Error(S, "invalid bit offset: only 5-bit values are legal");
4556         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
4557           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
4558         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
4559       }
4560       break;
4561   }
4562   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
4563   return MatchOperand_Success;
4564 }
4565 
4566 bool AMDGPUOperand::isSWaitCnt() const {
4567   return isImm();
4568 }
4569 
4570 bool AMDGPUOperand::isHwreg() const {
4571   return isImmTy(ImmTyHwreg);
4572 }
4573 
4574 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
4575   using namespace llvm::AMDGPU::SendMsg;
4576 
4577   if (Parser.getTok().getString() != "sendmsg")
4578     return true;
4579   Parser.Lex();
4580 
4581   if (getLexer().isNot(AsmToken::LParen))
4582     return true;
4583   Parser.Lex();
4584 
4585   if (getLexer().is(AsmToken::Identifier)) {
4586     Msg.IsSymbolic = true;
4587     Msg.Id = ID_UNKNOWN_;
4588     const std::string tok = Parser.getTok().getString();
4589     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
4590       switch(i) {
4591         default: continue; // Omit gaps.
4592         case ID_GS_ALLOC_REQ:
4593           if (isSI() || isCI() || isVI())
4594             continue;
4595           break;
4596         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:
4597         case ID_SYSMSG: break;
4598       }
4599       if (tok == IdSymbolic[i]) {
4600         Msg.Id = i;
4601         break;
4602       }
4603     }
4604     Parser.Lex();
4605   } else {
4606     Msg.IsSymbolic = false;
4607     if (getLexer().isNot(AsmToken::Integer))
4608       return true;
4609     if (getParser().parseAbsoluteExpression(Msg.Id))
4610       return true;
4611     if (getLexer().is(AsmToken::Integer))
4612       if (getParser().parseAbsoluteExpression(Msg.Id))
4613         Msg.Id = ID_UNKNOWN_;
4614   }
4615   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
4616     return false;
4617 
4618   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
4619     if (getLexer().isNot(AsmToken::RParen))
4620       return true;
4621     Parser.Lex();
4622     return false;
4623   }
4624 
4625   if (getLexer().isNot(AsmToken::Comma))
4626     return true;
4627   Parser.Lex();
4628 
4629   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
4630   Operation.Id = ID_UNKNOWN_;
4631   if (getLexer().is(AsmToken::Identifier)) {
4632     Operation.IsSymbolic = true;
4633     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
4634     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
4635     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
4636     const StringRef Tok = Parser.getTok().getString();
4637     for (int i = F; i < L; ++i) {
4638       if (Tok == S[i]) {
4639         Operation.Id = i;
4640         break;
4641       }
4642     }
4643     Parser.Lex();
4644   } else {
4645     Operation.IsSymbolic = false;
4646     if (getLexer().isNot(AsmToken::Integer))
4647       return true;
4648     if (getParser().parseAbsoluteExpression(Operation.Id))
4649       return true;
4650   }
4651 
4652   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4653     // Stream id is optional.
4654     if (getLexer().is(AsmToken::RParen)) {
4655       Parser.Lex();
4656       return false;
4657     }
4658 
4659     if (getLexer().isNot(AsmToken::Comma))
4660       return true;
4661     Parser.Lex();
4662 
4663     if (getLexer().isNot(AsmToken::Integer))
4664       return true;
4665     if (getParser().parseAbsoluteExpression(StreamId))
4666       return true;
4667   }
4668 
4669   if (getLexer().isNot(AsmToken::RParen))
4670     return true;
4671   Parser.Lex();
4672   return false;
4673 }
4674 
4675 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
4676   if (getLexer().getKind() != AsmToken::Identifier)
4677     return MatchOperand_NoMatch;
4678 
4679   StringRef Str = Parser.getTok().getString();
4680   int Slot = StringSwitch<int>(Str)
4681     .Case("p10", 0)
4682     .Case("p20", 1)
4683     .Case("p0", 2)
4684     .Default(-1);
4685 
4686   SMLoc S = Parser.getTok().getLoc();
4687   if (Slot == -1)
4688     return MatchOperand_ParseFail;
4689 
4690   Parser.Lex();
4691   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
4692                                               AMDGPUOperand::ImmTyInterpSlot));
4693   return MatchOperand_Success;
4694 }
4695 
4696 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
4697   if (getLexer().getKind() != AsmToken::Identifier)
4698     return MatchOperand_NoMatch;
4699 
4700   StringRef Str = Parser.getTok().getString();
4701   if (!Str.startswith("attr"))
4702     return MatchOperand_NoMatch;
4703 
4704   StringRef Chan = Str.take_back(2);
4705   int AttrChan = StringSwitch<int>(Chan)
4706     .Case(".x", 0)
4707     .Case(".y", 1)
4708     .Case(".z", 2)
4709     .Case(".w", 3)
4710     .Default(-1);
4711   if (AttrChan == -1)
4712     return MatchOperand_ParseFail;
4713 
4714   Str = Str.drop_back(2).drop_front(4);
4715 
4716   uint8_t Attr;
4717   if (Str.getAsInteger(10, Attr))
4718     return MatchOperand_ParseFail;
4719 
4720   SMLoc S = Parser.getTok().getLoc();
4721   Parser.Lex();
4722   if (Attr > 63) {
4723     Error(S, "out of bounds attr");
4724     return MatchOperand_Success;
4725   }
4726 
4727   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4728 
4729   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4730                                               AMDGPUOperand::ImmTyInterpAttr));
4731   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4732                                               AMDGPUOperand::ImmTyAttrChan));
4733   return MatchOperand_Success;
4734 }
4735 
4736 void AMDGPUAsmParser::errorExpTgt() {
4737   Error(Parser.getTok().getLoc(), "invalid exp target");
4738 }
4739 
4740 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4741                                                       uint8_t &Val) {
4742   if (Str == "null") {
4743     Val = 9;
4744     return MatchOperand_Success;
4745   }
4746 
4747   if (Str.startswith("mrt")) {
4748     Str = Str.drop_front(3);
4749     if (Str == "z") { // == mrtz
4750       Val = 8;
4751       return MatchOperand_Success;
4752     }
4753 
4754     if (Str.getAsInteger(10, Val))
4755       return MatchOperand_ParseFail;
4756 
4757     if (Val > 7)
4758       errorExpTgt();
4759 
4760     return MatchOperand_Success;
4761   }
4762 
4763   if (Str.startswith("pos")) {
4764     Str = Str.drop_front(3);
4765     if (Str.getAsInteger(10, Val))
4766       return MatchOperand_ParseFail;
4767 
4768     if (Val > 4 || (Val == 4 && !isGFX10()))
4769       errorExpTgt();
4770 
4771     Val += 12;
4772     return MatchOperand_Success;
4773   }
4774 
4775   if (isGFX10() && Str == "prim") {
4776     Val = 20;
4777     return MatchOperand_Success;
4778   }
4779 
4780   if (Str.startswith("param")) {
4781     Str = Str.drop_front(5);
4782     if (Str.getAsInteger(10, Val))
4783       return MatchOperand_ParseFail;
4784 
4785     if (Val >= 32)
4786       errorExpTgt();
4787 
4788     Val += 32;
4789     return MatchOperand_Success;
4790   }
4791 
4792   if (Str.startswith("invalid_target_")) {
4793     Str = Str.drop_front(15);
4794     if (Str.getAsInteger(10, Val))
4795       return MatchOperand_ParseFail;
4796 
4797     errorExpTgt();
4798     return MatchOperand_Success;
4799   }
4800 
4801   return MatchOperand_NoMatch;
4802 }
4803 
4804 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4805   uint8_t Val;
4806   StringRef Str = Parser.getTok().getString();
4807 
4808   auto Res = parseExpTgtImpl(Str, Val);
4809   if (Res != MatchOperand_Success)
4810     return Res;
4811 
4812   SMLoc S = Parser.getTok().getLoc();
4813   Parser.Lex();
4814 
4815   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4816                                               AMDGPUOperand::ImmTyExpTgt));
4817   return MatchOperand_Success;
4818 }
4819 
4820 OperandMatchResultTy
4821 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4822   using namespace llvm::AMDGPU::SendMsg;
4823 
4824   int64_t Imm16Val = 0;
4825   SMLoc S = Parser.getTok().getLoc();
4826 
4827   switch(getLexer().getKind()) {
4828   default:
4829     return MatchOperand_NoMatch;
4830   case AsmToken::Integer:
4831     // The operand can be an integer value.
4832     if (getParser().parseAbsoluteExpression(Imm16Val))
4833       return MatchOperand_NoMatch;
4834     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4835       Error(S, "invalid immediate: only 16-bit values are legal");
4836       // Do not return error code, but create an imm operand anyway and proceed
4837       // to the next operand, if any. That avoids unneccessary error messages.
4838     }
4839     break;
4840   case AsmToken::Identifier: {
4841       OperandInfoTy Msg(ID_UNKNOWN_);
4842       OperandInfoTy Operation(OP_UNKNOWN_);
4843       int64_t StreamId = STREAM_ID_DEFAULT_;
4844       if (parseSendMsgConstruct(Msg, Operation, StreamId))
4845         return MatchOperand_ParseFail;
4846       do {
4847         // Validate and encode message ID.
4848         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4849                 || (Msg.Id == ID_GS_ALLOC_REQ && !isSI() && !isCI() && !isVI())
4850                 || Msg.Id == ID_SYSMSG)) {
4851           if (Msg.IsSymbolic)
4852             Error(S, "invalid/unsupported symbolic name of message");
4853           else
4854             Error(S, "invalid/unsupported code of message");
4855           break;
4856         }
4857         Imm16Val = (Msg.Id << ID_SHIFT_);
4858         // Validate and encode operation ID.
4859         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4860           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4861             if (Operation.IsSymbolic)
4862               Error(S, "invalid symbolic name of GS_OP");
4863             else
4864               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4865             break;
4866           }
4867           if (Operation.Id == OP_GS_NOP
4868               && Msg.Id != ID_GS_DONE) {
4869             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
4870             break;
4871           }
4872           Imm16Val |= (Operation.Id << OP_SHIFT_);
4873         }
4874         if (Msg.Id == ID_SYSMSG) {
4875           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
4876             if (Operation.IsSymbolic)
4877               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
4878             else
4879               Error(S, "invalid/unsupported code of SYSMSG_OP");
4880             break;
4881           }
4882           Imm16Val |= (Operation.Id << OP_SHIFT_);
4883         }
4884         // Validate and encode stream ID.
4885         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4886           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
4887             Error(S, "invalid stream id: only 2-bit values are legal");
4888             break;
4889           }
4890           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
4891         }
4892       } while (false);
4893     }
4894     break;
4895   }
4896   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
4897   return MatchOperand_Success;
4898 }
4899 
4900 bool AMDGPUOperand::isSendMsg() const {
4901   return isImmTy(ImmTySendMsg);
4902 }
4903 
4904 //===----------------------------------------------------------------------===//
4905 // parser helpers
4906 //===----------------------------------------------------------------------===//
4907 
4908 bool
4909 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
4910   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
4911 }
4912 
4913 bool
4914 AMDGPUAsmParser::isId(const StringRef Id) const {
4915   return isId(getToken(), Id);
4916 }
4917 
4918 bool
4919 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
4920   return getTokenKind() == Kind;
4921 }
4922 
4923 bool
4924 AMDGPUAsmParser::trySkipId(const StringRef Id) {
4925   if (isId(Id)) {
4926     lex();
4927     return true;
4928   }
4929   return false;
4930 }
4931 
4932 bool
4933 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
4934   if (isToken(Kind)) {
4935     lex();
4936     return true;
4937   }
4938   return false;
4939 }
4940 
4941 bool
4942 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
4943                            const StringRef ErrMsg) {
4944   if (!trySkipToken(Kind)) {
4945     Error(getLoc(), ErrMsg);
4946     return false;
4947   }
4948   return true;
4949 }
4950 
4951 bool
4952 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
4953   return !getParser().parseAbsoluteExpression(Imm);
4954 }
4955 
4956 bool
4957 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
4958   if (isToken(AsmToken::String)) {
4959     Val = getToken().getStringContents();
4960     lex();
4961     return true;
4962   } else {
4963     Error(getLoc(), ErrMsg);
4964     return false;
4965   }
4966 }
4967 
4968 AsmToken
4969 AMDGPUAsmParser::getToken() const {
4970   return Parser.getTok();
4971 }
4972 
4973 AsmToken
4974 AMDGPUAsmParser::peekToken() {
4975   return getLexer().peekTok();
4976 }
4977 
4978 void
4979 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
4980   auto TokCount = getLexer().peekTokens(Tokens);
4981 
4982   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
4983     Tokens[Idx] = AsmToken(AsmToken::Error, "");
4984 }
4985 
4986 AsmToken::TokenKind
4987 AMDGPUAsmParser::getTokenKind() const {
4988   return getLexer().getKind();
4989 }
4990 
4991 SMLoc
4992 AMDGPUAsmParser::getLoc() const {
4993   return getToken().getLoc();
4994 }
4995 
4996 StringRef
4997 AMDGPUAsmParser::getTokenStr() const {
4998   return getToken().getString();
4999 }
5000 
5001 void
5002 AMDGPUAsmParser::lex() {
5003   Parser.Lex();
5004 }
5005 
5006 //===----------------------------------------------------------------------===//
5007 // swizzle
5008 //===----------------------------------------------------------------------===//
5009 
5010 LLVM_READNONE
5011 static unsigned
5012 encodeBitmaskPerm(const unsigned AndMask,
5013                   const unsigned OrMask,
5014                   const unsigned XorMask) {
5015   using namespace llvm::AMDGPU::Swizzle;
5016 
5017   return BITMASK_PERM_ENC |
5018          (AndMask << BITMASK_AND_SHIFT) |
5019          (OrMask  << BITMASK_OR_SHIFT)  |
5020          (XorMask << BITMASK_XOR_SHIFT);
5021 }
5022 
5023 bool
5024 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5025                                       const unsigned MinVal,
5026                                       const unsigned MaxVal,
5027                                       const StringRef ErrMsg) {
5028   for (unsigned i = 0; i < OpNum; ++i) {
5029     if (!skipToken(AsmToken::Comma, "expected a comma")){
5030       return false;
5031     }
5032     SMLoc ExprLoc = Parser.getTok().getLoc();
5033     if (!parseExpr(Op[i])) {
5034       return false;
5035     }
5036     if (Op[i] < MinVal || Op[i] > MaxVal) {
5037       Error(ExprLoc, ErrMsg);
5038       return false;
5039     }
5040   }
5041 
5042   return true;
5043 }
5044 
5045 bool
5046 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5047   using namespace llvm::AMDGPU::Swizzle;
5048 
5049   int64_t Lane[LANE_NUM];
5050   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5051                            "expected a 2-bit lane id")) {
5052     Imm = QUAD_PERM_ENC;
5053     for (unsigned I = 0; I < LANE_NUM; ++I) {
5054       Imm |= Lane[I] << (LANE_SHIFT * I);
5055     }
5056     return true;
5057   }
5058   return false;
5059 }
5060 
5061 bool
5062 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5063   using namespace llvm::AMDGPU::Swizzle;
5064 
5065   SMLoc S = Parser.getTok().getLoc();
5066   int64_t GroupSize;
5067   int64_t LaneIdx;
5068 
5069   if (!parseSwizzleOperands(1, &GroupSize,
5070                             2, 32,
5071                             "group size must be in the interval [2,32]")) {
5072     return false;
5073   }
5074   if (!isPowerOf2_64(GroupSize)) {
5075     Error(S, "group size must be a power of two");
5076     return false;
5077   }
5078   if (parseSwizzleOperands(1, &LaneIdx,
5079                            0, GroupSize - 1,
5080                            "lane id must be in the interval [0,group size - 1]")) {
5081     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5082     return true;
5083   }
5084   return false;
5085 }
5086 
5087 bool
5088 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5089   using namespace llvm::AMDGPU::Swizzle;
5090 
5091   SMLoc S = Parser.getTok().getLoc();
5092   int64_t GroupSize;
5093 
5094   if (!parseSwizzleOperands(1, &GroupSize,
5095       2, 32, "group size must be in the interval [2,32]")) {
5096     return false;
5097   }
5098   if (!isPowerOf2_64(GroupSize)) {
5099     Error(S, "group size must be a power of two");
5100     return false;
5101   }
5102 
5103   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5104   return true;
5105 }
5106 
5107 bool
5108 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5109   using namespace llvm::AMDGPU::Swizzle;
5110 
5111   SMLoc S = Parser.getTok().getLoc();
5112   int64_t GroupSize;
5113 
5114   if (!parseSwizzleOperands(1, &GroupSize,
5115       1, 16, "group size must be in the interval [1,16]")) {
5116     return false;
5117   }
5118   if (!isPowerOf2_64(GroupSize)) {
5119     Error(S, "group size must be a power of two");
5120     return false;
5121   }
5122 
5123   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5124   return true;
5125 }
5126 
5127 bool
5128 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5129   using namespace llvm::AMDGPU::Swizzle;
5130 
5131   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5132     return false;
5133   }
5134 
5135   StringRef Ctl;
5136   SMLoc StrLoc = Parser.getTok().getLoc();
5137   if (!parseString(Ctl)) {
5138     return false;
5139   }
5140   if (Ctl.size() != BITMASK_WIDTH) {
5141     Error(StrLoc, "expected a 5-character mask");
5142     return false;
5143   }
5144 
5145   unsigned AndMask = 0;
5146   unsigned OrMask = 0;
5147   unsigned XorMask = 0;
5148 
5149   for (size_t i = 0; i < Ctl.size(); ++i) {
5150     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5151     switch(Ctl[i]) {
5152     default:
5153       Error(StrLoc, "invalid mask");
5154       return false;
5155     case '0':
5156       break;
5157     case '1':
5158       OrMask |= Mask;
5159       break;
5160     case 'p':
5161       AndMask |= Mask;
5162       break;
5163     case 'i':
5164       AndMask |= Mask;
5165       XorMask |= Mask;
5166       break;
5167     }
5168   }
5169 
5170   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5171   return true;
5172 }
5173 
5174 bool
5175 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5176 
5177   SMLoc OffsetLoc = Parser.getTok().getLoc();
5178 
5179   if (!parseExpr(Imm)) {
5180     return false;
5181   }
5182   if (!isUInt<16>(Imm)) {
5183     Error(OffsetLoc, "expected a 16-bit offset");
5184     return false;
5185   }
5186   return true;
5187 }
5188 
5189 bool
5190 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5191   using namespace llvm::AMDGPU::Swizzle;
5192 
5193   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5194 
5195     SMLoc ModeLoc = Parser.getTok().getLoc();
5196     bool Ok = false;
5197 
5198     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5199       Ok = parseSwizzleQuadPerm(Imm);
5200     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5201       Ok = parseSwizzleBitmaskPerm(Imm);
5202     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5203       Ok = parseSwizzleBroadcast(Imm);
5204     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5205       Ok = parseSwizzleSwap(Imm);
5206     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5207       Ok = parseSwizzleReverse(Imm);
5208     } else {
5209       Error(ModeLoc, "expected a swizzle mode");
5210     }
5211 
5212     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5213   }
5214 
5215   return false;
5216 }
5217 
5218 OperandMatchResultTy
5219 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5220   SMLoc S = Parser.getTok().getLoc();
5221   int64_t Imm = 0;
5222 
5223   if (trySkipId("offset")) {
5224 
5225     bool Ok = false;
5226     if (skipToken(AsmToken::Colon, "expected a colon")) {
5227       if (trySkipId("swizzle")) {
5228         Ok = parseSwizzleMacro(Imm);
5229       } else {
5230         Ok = parseSwizzleOffset(Imm);
5231       }
5232     }
5233 
5234     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5235 
5236     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5237   } else {
5238     // Swizzle "offset" operand is optional.
5239     // If it is omitted, try parsing other optional operands.
5240     return parseOptionalOpr(Operands);
5241   }
5242 }
5243 
5244 bool
5245 AMDGPUOperand::isSwizzle() const {
5246   return isImmTy(ImmTySwizzle);
5247 }
5248 
5249 //===----------------------------------------------------------------------===//
5250 // VGPR Index Mode
5251 //===----------------------------------------------------------------------===//
5252 
5253 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5254 
5255   using namespace llvm::AMDGPU::VGPRIndexMode;
5256 
5257   if (trySkipToken(AsmToken::RParen)) {
5258     return OFF;
5259   }
5260 
5261   int64_t Imm = 0;
5262 
5263   while (true) {
5264     unsigned Mode = 0;
5265     SMLoc S = Parser.getTok().getLoc();
5266 
5267     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5268       if (trySkipId(IdSymbolic[ModeId])) {
5269         Mode = 1 << ModeId;
5270         break;
5271       }
5272     }
5273 
5274     if (Mode == 0) {
5275       Error(S, (Imm == 0)?
5276                "expected a VGPR index mode or a closing parenthesis" :
5277                "expected a VGPR index mode");
5278       break;
5279     }
5280 
5281     if (Imm & Mode) {
5282       Error(S, "duplicate VGPR index mode");
5283       break;
5284     }
5285     Imm |= Mode;
5286 
5287     if (trySkipToken(AsmToken::RParen))
5288       break;
5289     if (!skipToken(AsmToken::Comma,
5290                    "expected a comma or a closing parenthesis"))
5291       break;
5292   }
5293 
5294   return Imm;
5295 }
5296 
5297 OperandMatchResultTy
5298 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5299 
5300   int64_t Imm = 0;
5301   SMLoc S = Parser.getTok().getLoc();
5302 
5303   if (getLexer().getKind() == AsmToken::Identifier &&
5304       Parser.getTok().getString() == "gpr_idx" &&
5305       getLexer().peekTok().is(AsmToken::LParen)) {
5306 
5307     Parser.Lex();
5308     Parser.Lex();
5309 
5310     // If parse failed, trigger an error but do not return error code
5311     // to avoid excessive error messages.
5312     Imm = parseGPRIdxMacro();
5313 
5314   } else {
5315     if (getParser().parseAbsoluteExpression(Imm))
5316       return MatchOperand_NoMatch;
5317     if (Imm < 0 || !isUInt<4>(Imm)) {
5318       Error(S, "invalid immediate: only 4-bit values are legal");
5319     }
5320   }
5321 
5322   Operands.push_back(
5323       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5324   return MatchOperand_Success;
5325 }
5326 
5327 bool AMDGPUOperand::isGPRIdxMode() const {
5328   return isImmTy(ImmTyGprIdxMode);
5329 }
5330 
5331 //===----------------------------------------------------------------------===//
5332 // sopp branch targets
5333 //===----------------------------------------------------------------------===//
5334 
5335 OperandMatchResultTy
5336 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5337   SMLoc S = Parser.getTok().getLoc();
5338 
5339   switch (getLexer().getKind()) {
5340     default: return MatchOperand_ParseFail;
5341     case AsmToken::Integer: {
5342       int64_t Imm;
5343       if (getParser().parseAbsoluteExpression(Imm))
5344         return MatchOperand_ParseFail;
5345       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
5346       return MatchOperand_Success;
5347     }
5348 
5349     case AsmToken::Identifier:
5350       Operands.push_back(AMDGPUOperand::CreateExpr(this,
5351           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
5352                                   Parser.getTok().getString()), getContext()), S));
5353       Parser.Lex();
5354       return MatchOperand_Success;
5355   }
5356 }
5357 
5358 //===----------------------------------------------------------------------===//
5359 // mubuf
5360 //===----------------------------------------------------------------------===//
5361 
5362 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5363   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5364 }
5365 
5366 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5367   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5368 }
5369 
5370 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5371   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5372 }
5373 
5374 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5375                                const OperandVector &Operands,
5376                                bool IsAtomic,
5377                                bool IsAtomicReturn,
5378                                bool IsLds) {
5379   bool IsLdsOpcode = IsLds;
5380   bool HasLdsModifier = false;
5381   OptionalImmIndexMap OptionalIdx;
5382   assert(IsAtomicReturn ? IsAtomic : true);
5383   unsigned FirstOperandIdx = 1;
5384 
5385   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5386     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5387 
5388     // Add the register arguments
5389     if (Op.isReg()) {
5390       Op.addRegOperands(Inst, 1);
5391       // Insert a tied src for atomic return dst.
5392       // This cannot be postponed as subsequent calls to
5393       // addImmOperands rely on correct number of MC operands.
5394       if (IsAtomicReturn && i == FirstOperandIdx)
5395         Op.addRegOperands(Inst, 1);
5396       continue;
5397     }
5398 
5399     // Handle the case where soffset is an immediate
5400     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5401       Op.addImmOperands(Inst, 1);
5402       continue;
5403     }
5404 
5405     HasLdsModifier |= Op.isLDS();
5406 
5407     // Handle tokens like 'offen' which are sometimes hard-coded into the
5408     // asm string.  There are no MCInst operands for these.
5409     if (Op.isToken()) {
5410       continue;
5411     }
5412     assert(Op.isImm());
5413 
5414     // Handle optional arguments
5415     OptionalIdx[Op.getImmTy()] = i;
5416   }
5417 
5418   // This is a workaround for an llvm quirk which may result in an
5419   // incorrect instruction selection. Lds and non-lds versions of
5420   // MUBUF instructions are identical except that lds versions
5421   // have mandatory 'lds' modifier. However this modifier follows
5422   // optional modifiers and llvm asm matcher regards this 'lds'
5423   // modifier as an optional one. As a result, an lds version
5424   // of opcode may be selected even if it has no 'lds' modifier.
5425   if (IsLdsOpcode && !HasLdsModifier) {
5426     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5427     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5428       Inst.setOpcode(NoLdsOpcode);
5429       IsLdsOpcode = false;
5430     }
5431   }
5432 
5433   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5434   if (!IsAtomic) { // glc is hard-coded.
5435     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5436   }
5437   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5438 
5439   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5440     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5441   }
5442 
5443   if (isGFX10())
5444     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5445 }
5446 
5447 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5448   OptionalImmIndexMap OptionalIdx;
5449 
5450   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5451     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5452 
5453     // Add the register arguments
5454     if (Op.isReg()) {
5455       Op.addRegOperands(Inst, 1);
5456       continue;
5457     }
5458 
5459     // Handle the case where soffset is an immediate
5460     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5461       Op.addImmOperands(Inst, 1);
5462       continue;
5463     }
5464 
5465     // Handle tokens like 'offen' which are sometimes hard-coded into the
5466     // asm string.  There are no MCInst operands for these.
5467     if (Op.isToken()) {
5468       continue;
5469     }
5470     assert(Op.isImm());
5471 
5472     // Handle optional arguments
5473     OptionalIdx[Op.getImmTy()] = i;
5474   }
5475 
5476   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5477                         AMDGPUOperand::ImmTyOffset);
5478   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5479   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5480   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5481   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5482 
5483   if (isGFX10())
5484     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5485 }
5486 
5487 //===----------------------------------------------------------------------===//
5488 // mimg
5489 //===----------------------------------------------------------------------===//
5490 
5491 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5492                               bool IsAtomic) {
5493   unsigned I = 1;
5494   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5495   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5496     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5497   }
5498 
5499   if (IsAtomic) {
5500     // Add src, same as dst
5501     assert(Desc.getNumDefs() == 1);
5502     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5503   }
5504 
5505   OptionalImmIndexMap OptionalIdx;
5506 
5507   for (unsigned E = Operands.size(); I != E; ++I) {
5508     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5509 
5510     // Add the register arguments
5511     if (Op.isReg()) {
5512       Op.addRegOperands(Inst, 1);
5513     } else if (Op.isImmModifier()) {
5514       OptionalIdx[Op.getImmTy()] = I;
5515     } else if (!Op.isToken()) {
5516       llvm_unreachable("unexpected operand type");
5517     }
5518   }
5519 
5520   bool IsGFX10 = isGFX10();
5521 
5522   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5523   if (IsGFX10)
5524     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5525   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5526   if (IsGFX10)
5527     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5528   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5529   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5530   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5531   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5532   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5533   if (!IsGFX10)
5534     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5535   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5536 }
5537 
5538 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5539   cvtMIMG(Inst, Operands, true);
5540 }
5541 
5542 //===----------------------------------------------------------------------===//
5543 // smrd
5544 //===----------------------------------------------------------------------===//
5545 
5546 bool AMDGPUOperand::isSMRDOffset8() const {
5547   return isImm() && isUInt<8>(getImm());
5548 }
5549 
5550 bool AMDGPUOperand::isSMRDOffset20() const {
5551   return isImm() && isUInt<20>(getImm());
5552 }
5553 
5554 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5555   // 32-bit literals are only supported on CI and we only want to use them
5556   // when the offset is > 8-bits.
5557   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5558 }
5559 
5560 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5561   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5562 }
5563 
5564 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5565   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5566 }
5567 
5568 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5569   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5570 }
5571 
5572 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
5573   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5574 }
5575 
5576 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
5577   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5578 }
5579 
5580 //===----------------------------------------------------------------------===//
5581 // vop3
5582 //===----------------------------------------------------------------------===//
5583 
5584 static bool ConvertOmodMul(int64_t &Mul) {
5585   if (Mul != 1 && Mul != 2 && Mul != 4)
5586     return false;
5587 
5588   Mul >>= 1;
5589   return true;
5590 }
5591 
5592 static bool ConvertOmodDiv(int64_t &Div) {
5593   if (Div == 1) {
5594     Div = 0;
5595     return true;
5596   }
5597 
5598   if (Div == 2) {
5599     Div = 3;
5600     return true;
5601   }
5602 
5603   return false;
5604 }
5605 
5606 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5607   if (BoundCtrl == 0) {
5608     BoundCtrl = 1;
5609     return true;
5610   }
5611 
5612   if (BoundCtrl == -1) {
5613     BoundCtrl = 0;
5614     return true;
5615   }
5616 
5617   return false;
5618 }
5619 
5620 // Note: the order in this table matches the order of operands in AsmString.
5621 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5622   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5623   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5624   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5625   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5626   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5627   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5628   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5629   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5630   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5631   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
5632   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5633   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5634   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5635   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5636   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5637   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5638   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5639   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5640   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5641   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5642   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5643   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5644   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5645   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5646   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5647   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
5648   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5649   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5650   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5651   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5652   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5653   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5654   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5655   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5656   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5657   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5658   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5659   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5660   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
5661 };
5662 
5663 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5664   unsigned size = Operands.size();
5665   assert(size > 0);
5666 
5667   OperandMatchResultTy res = parseOptionalOpr(Operands);
5668 
5669   // This is a hack to enable hardcoded mandatory operands which follow
5670   // optional operands.
5671   //
5672   // Current design assumes that all operands after the first optional operand
5673   // are also optional. However implementation of some instructions violates
5674   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5675   //
5676   // To alleviate this problem, we have to (implicitly) parse extra operands
5677   // to make sure autogenerated parser of custom operands never hit hardcoded
5678   // mandatory operands.
5679 
5680   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5681 
5682     // We have parsed the first optional operand.
5683     // Parse as many operands as necessary to skip all mandatory operands.
5684 
5685     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5686       if (res != MatchOperand_Success ||
5687           getLexer().is(AsmToken::EndOfStatement)) break;
5688       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5689       res = parseOptionalOpr(Operands);
5690     }
5691   }
5692 
5693   return res;
5694 }
5695 
5696 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
5697   OperandMatchResultTy res;
5698   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
5699     // try to parse any optional operand here
5700     if (Op.IsBit) {
5701       res = parseNamedBit(Op.Name, Operands, Op.Type);
5702     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
5703       res = parseOModOperand(Operands);
5704     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
5705                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
5706                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
5707       res = parseSDWASel(Operands, Op.Name, Op.Type);
5708     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
5709       res = parseSDWADstUnused(Operands);
5710     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
5711                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
5712                Op.Type == AMDGPUOperand::ImmTyNegLo ||
5713                Op.Type == AMDGPUOperand::ImmTyNegHi) {
5714       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
5715                                         Op.ConvertResult);
5716     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
5717       res = parseDim(Operands);
5718     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
5719       res = parseDfmtNfmt(Operands);
5720     } else {
5721       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
5722     }
5723     if (res != MatchOperand_NoMatch) {
5724       return res;
5725     }
5726   }
5727   return MatchOperand_NoMatch;
5728 }
5729 
5730 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
5731   StringRef Name = Parser.getTok().getString();
5732   if (Name == "mul") {
5733     return parseIntWithPrefix("mul", Operands,
5734                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
5735   }
5736 
5737   if (Name == "div") {
5738     return parseIntWithPrefix("div", Operands,
5739                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
5740   }
5741 
5742   return MatchOperand_NoMatch;
5743 }
5744 
5745 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
5746   cvtVOP3P(Inst, Operands);
5747 
5748   int Opc = Inst.getOpcode();
5749 
5750   int SrcNum;
5751   const int Ops[] = { AMDGPU::OpName::src0,
5752                       AMDGPU::OpName::src1,
5753                       AMDGPU::OpName::src2 };
5754   for (SrcNum = 0;
5755        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
5756        ++SrcNum);
5757   assert(SrcNum > 0);
5758 
5759   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5760   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5761 
5762   if ((OpSel & (1 << SrcNum)) != 0) {
5763     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
5764     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
5765     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
5766   }
5767 }
5768 
5769 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
5770       // 1. This operand is input modifiers
5771   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
5772       // 2. This is not last operand
5773       && Desc.NumOperands > (OpNum + 1)
5774       // 3. Next operand is register class
5775       && Desc.OpInfo[OpNum + 1].RegClass != -1
5776       // 4. Next register is not tied to any other operand
5777       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
5778 }
5779 
5780 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
5781 {
5782   OptionalImmIndexMap OptionalIdx;
5783   unsigned Opc = Inst.getOpcode();
5784 
5785   unsigned I = 1;
5786   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5787   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5788     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5789   }
5790 
5791   for (unsigned E = Operands.size(); I != E; ++I) {
5792     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5793     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5794       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5795     } else if (Op.isInterpSlot() ||
5796                Op.isInterpAttr() ||
5797                Op.isAttrChan()) {
5798       Inst.addOperand(MCOperand::createImm(Op.getImm()));
5799     } else if (Op.isImmModifier()) {
5800       OptionalIdx[Op.getImmTy()] = I;
5801     } else {
5802       llvm_unreachable("unhandled operand type");
5803     }
5804   }
5805 
5806   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
5807     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
5808   }
5809 
5810   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5811     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5812   }
5813 
5814   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5815     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5816   }
5817 }
5818 
5819 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
5820                               OptionalImmIndexMap &OptionalIdx) {
5821   unsigned Opc = Inst.getOpcode();
5822 
5823   unsigned I = 1;
5824   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5825   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5826     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5827   }
5828 
5829   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
5830     // This instruction has src modifiers
5831     for (unsigned E = Operands.size(); I != E; ++I) {
5832       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5833       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5834         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5835       } else if (Op.isImmModifier()) {
5836         OptionalIdx[Op.getImmTy()] = I;
5837       } else if (Op.isRegOrImm()) {
5838         Op.addRegOrImmOperands(Inst, 1);
5839       } else {
5840         llvm_unreachable("unhandled operand type");
5841       }
5842     }
5843   } else {
5844     // No src modifiers
5845     for (unsigned E = Operands.size(); I != E; ++I) {
5846       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5847       if (Op.isMod()) {
5848         OptionalIdx[Op.getImmTy()] = I;
5849       } else {
5850         Op.addRegOrImmOperands(Inst, 1);
5851       }
5852     }
5853   }
5854 
5855   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5856     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5857   }
5858 
5859   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5860     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5861   }
5862 
5863   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
5864   // it has src2 register operand that is tied to dst operand
5865   // we don't allow modifiers for this operand in assembler so src2_modifiers
5866   // should be 0.
5867   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
5868       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
5869       Opc == AMDGPU::V_MAC_F32_e64_vi ||
5870       Opc == AMDGPU::V_MAC_F16_e64_vi ||
5871       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
5872       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
5873       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
5874     auto it = Inst.begin();
5875     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5876     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5877     ++it;
5878     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5879   }
5880 }
5881 
5882 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5883   OptionalImmIndexMap OptionalIdx;
5884   cvtVOP3(Inst, Operands, OptionalIdx);
5885 }
5886 
5887 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5888                                const OperandVector &Operands) {
5889   OptionalImmIndexMap OptIdx;
5890   const int Opc = Inst.getOpcode();
5891   const MCInstrDesc &Desc = MII.get(Opc);
5892 
5893   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5894 
5895   cvtVOP3(Inst, Operands, OptIdx);
5896 
5897   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5898     assert(!IsPacked);
5899     Inst.addOperand(Inst.getOperand(0));
5900   }
5901 
5902   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5903   // instruction, and then figure out where to actually put the modifiers
5904 
5905   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
5906 
5907   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5908   if (OpSelHiIdx != -1) {
5909     int DefaultVal = IsPacked ? -1 : 0;
5910     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
5911                           DefaultVal);
5912   }
5913 
5914   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
5915   if (NegLoIdx != -1) {
5916     assert(IsPacked);
5917     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
5918     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
5919   }
5920 
5921   const int Ops[] = { AMDGPU::OpName::src0,
5922                       AMDGPU::OpName::src1,
5923                       AMDGPU::OpName::src2 };
5924   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
5925                          AMDGPU::OpName::src1_modifiers,
5926                          AMDGPU::OpName::src2_modifiers };
5927 
5928   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5929 
5930   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5931   unsigned OpSelHi = 0;
5932   unsigned NegLo = 0;
5933   unsigned NegHi = 0;
5934 
5935   if (OpSelHiIdx != -1) {
5936     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5937   }
5938 
5939   if (NegLoIdx != -1) {
5940     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
5941     NegLo = Inst.getOperand(NegLoIdx).getImm();
5942     NegHi = Inst.getOperand(NegHiIdx).getImm();
5943   }
5944 
5945   for (int J = 0; J < 3; ++J) {
5946     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
5947     if (OpIdx == -1)
5948       break;
5949 
5950     uint32_t ModVal = 0;
5951 
5952     if ((OpSel & (1 << J)) != 0)
5953       ModVal |= SISrcMods::OP_SEL_0;
5954 
5955     if ((OpSelHi & (1 << J)) != 0)
5956       ModVal |= SISrcMods::OP_SEL_1;
5957 
5958     if ((NegLo & (1 << J)) != 0)
5959       ModVal |= SISrcMods::NEG;
5960 
5961     if ((NegHi & (1 << J)) != 0)
5962       ModVal |= SISrcMods::NEG_HI;
5963 
5964     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
5965 
5966     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
5967   }
5968 }
5969 
5970 //===----------------------------------------------------------------------===//
5971 // dpp
5972 //===----------------------------------------------------------------------===//
5973 
5974 bool AMDGPUOperand::isDPPCtrl() const {
5975   using namespace AMDGPU::DPP;
5976 
5977   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
5978   if (result) {
5979     int64_t Imm = getImm();
5980     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
5981            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
5982            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
5983            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
5984            (Imm == DppCtrl::WAVE_SHL1) ||
5985            (Imm == DppCtrl::WAVE_ROL1) ||
5986            (Imm == DppCtrl::WAVE_SHR1) ||
5987            (Imm == DppCtrl::WAVE_ROR1) ||
5988            (Imm == DppCtrl::ROW_MIRROR) ||
5989            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
5990            (Imm == DppCtrl::BCAST15) ||
5991            (Imm == DppCtrl::BCAST31);
5992   }
5993   return false;
5994 }
5995 
5996 bool AMDGPUOperand::isS16Imm() const {
5997   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
5998 }
5999 
6000 bool AMDGPUOperand::isU16Imm() const {
6001   return isImm() && isUInt<16>(getImm());
6002 }
6003 
6004 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6005   if (!isGFX10())
6006     return MatchOperand_NoMatch;
6007 
6008   SMLoc S = Parser.getTok().getLoc();
6009 
6010   if (getLexer().isNot(AsmToken::Identifier))
6011     return MatchOperand_NoMatch;
6012   if (getLexer().getTok().getString() != "dim")
6013     return MatchOperand_NoMatch;
6014 
6015   Parser.Lex();
6016   if (getLexer().isNot(AsmToken::Colon))
6017     return MatchOperand_ParseFail;
6018 
6019   Parser.Lex();
6020 
6021   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6022   // integer.
6023   std::string Token;
6024   if (getLexer().is(AsmToken::Integer)) {
6025     SMLoc Loc = getLexer().getTok().getEndLoc();
6026     Token = getLexer().getTok().getString();
6027     Parser.Lex();
6028     if (getLexer().getTok().getLoc() != Loc)
6029       return MatchOperand_ParseFail;
6030   }
6031   if (getLexer().isNot(AsmToken::Identifier))
6032     return MatchOperand_ParseFail;
6033   Token += getLexer().getTok().getString();
6034 
6035   StringRef DimId = Token;
6036   if (DimId.startswith("SQ_RSRC_IMG_"))
6037     DimId = DimId.substr(12);
6038 
6039   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6040   if (!DimInfo)
6041     return MatchOperand_ParseFail;
6042 
6043   Parser.Lex();
6044 
6045   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6046                                               AMDGPUOperand::ImmTyDim));
6047   return MatchOperand_Success;
6048 }
6049 
6050 OperandMatchResultTy
6051 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6052   using namespace AMDGPU::DPP;
6053 
6054   SMLoc S = Parser.getTok().getLoc();
6055   StringRef Prefix;
6056   int64_t Int;
6057 
6058   if (getLexer().getKind() == AsmToken::Identifier) {
6059     Prefix = Parser.getTok().getString();
6060   } else {
6061     return MatchOperand_NoMatch;
6062   }
6063 
6064   if (Prefix == "row_mirror") {
6065     Int = DppCtrl::ROW_MIRROR;
6066     Parser.Lex();
6067   } else if (Prefix == "row_half_mirror") {
6068     Int = DppCtrl::ROW_HALF_MIRROR;
6069     Parser.Lex();
6070   } else {
6071     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6072     if (Prefix != "quad_perm"
6073         && Prefix != "row_shl"
6074         && Prefix != "row_shr"
6075         && Prefix != "row_ror"
6076         && Prefix != "wave_shl"
6077         && Prefix != "wave_rol"
6078         && Prefix != "wave_shr"
6079         && Prefix != "wave_ror"
6080         && Prefix != "row_bcast") {
6081       return MatchOperand_NoMatch;
6082     }
6083 
6084     Parser.Lex();
6085     if (getLexer().isNot(AsmToken::Colon))
6086       return MatchOperand_ParseFail;
6087 
6088     if (Prefix == "quad_perm") {
6089       // quad_perm:[%d,%d,%d,%d]
6090       Parser.Lex();
6091       if (getLexer().isNot(AsmToken::LBrac))
6092         return MatchOperand_ParseFail;
6093       Parser.Lex();
6094 
6095       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6096         return MatchOperand_ParseFail;
6097 
6098       for (int i = 0; i < 3; ++i) {
6099         if (getLexer().isNot(AsmToken::Comma))
6100           return MatchOperand_ParseFail;
6101         Parser.Lex();
6102 
6103         int64_t Temp;
6104         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6105           return MatchOperand_ParseFail;
6106         const int shift = i*2 + 2;
6107         Int += (Temp << shift);
6108       }
6109 
6110       if (getLexer().isNot(AsmToken::RBrac))
6111         return MatchOperand_ParseFail;
6112       Parser.Lex();
6113     } else {
6114       // sel:%d
6115       Parser.Lex();
6116       if (getParser().parseAbsoluteExpression(Int))
6117         return MatchOperand_ParseFail;
6118 
6119       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6120         Int |= DppCtrl::ROW_SHL0;
6121       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6122         Int |= DppCtrl::ROW_SHR0;
6123       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6124         Int |= DppCtrl::ROW_ROR0;
6125       } else if (Prefix == "wave_shl" && 1 == Int) {
6126         Int = DppCtrl::WAVE_SHL1;
6127       } else if (Prefix == "wave_rol" && 1 == Int) {
6128         Int = DppCtrl::WAVE_ROL1;
6129       } else if (Prefix == "wave_shr" && 1 == Int) {
6130         Int = DppCtrl::WAVE_SHR1;
6131       } else if (Prefix == "wave_ror" && 1 == Int) {
6132         Int = DppCtrl::WAVE_ROR1;
6133       } else if (Prefix == "row_bcast") {
6134         if (Int == 15) {
6135           Int = DppCtrl::BCAST15;
6136         } else if (Int == 31) {
6137           Int = DppCtrl::BCAST31;
6138         } else {
6139           return MatchOperand_ParseFail;
6140         }
6141       } else {
6142         return MatchOperand_ParseFail;
6143       }
6144     }
6145   }
6146 
6147   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6148   return MatchOperand_Success;
6149 }
6150 
6151 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6152   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6153 }
6154 
6155 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6156   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6157 }
6158 
6159 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6160   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6161 }
6162 
6163 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6164   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6165 }
6166 
6167 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
6168   OptionalImmIndexMap OptionalIdx;
6169 
6170   unsigned I = 1;
6171   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6172   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6173     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6174   }
6175 
6176   for (unsigned E = Operands.size(); I != E; ++I) {
6177     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6178                                             MCOI::TIED_TO);
6179     if (TiedTo != -1) {
6180       assert((unsigned)TiedTo < Inst.getNumOperands());
6181       // handle tied old or src2 for MAC instructions
6182       Inst.addOperand(Inst.getOperand(TiedTo));
6183     }
6184     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6185     // Add the register arguments
6186     if (Op.isReg() && Op.getReg() == AMDGPU::VCC) {
6187       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6188       // Skip it.
6189       continue;
6190     }
6191     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6192       Op.addRegWithFPInputModsOperands(Inst, 2);
6193     } else if (Op.isDPPCtrl()) {
6194       Op.addImmOperands(Inst, 1);
6195     } else if (Op.isImm()) {
6196       // Handle optional arguments
6197       OptionalIdx[Op.getImmTy()] = I;
6198     } else {
6199       llvm_unreachable("Invalid operand type");
6200     }
6201   }
6202 
6203   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6204   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6205   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6206 }
6207 
6208 //===----------------------------------------------------------------------===//
6209 // sdwa
6210 //===----------------------------------------------------------------------===//
6211 
6212 OperandMatchResultTy
6213 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6214                               AMDGPUOperand::ImmTy Type) {
6215   using namespace llvm::AMDGPU::SDWA;
6216 
6217   SMLoc S = Parser.getTok().getLoc();
6218   StringRef Value;
6219   OperandMatchResultTy res;
6220 
6221   res = parseStringWithPrefix(Prefix, Value);
6222   if (res != MatchOperand_Success) {
6223     return res;
6224   }
6225 
6226   int64_t Int;
6227   Int = StringSwitch<int64_t>(Value)
6228         .Case("BYTE_0", SdwaSel::BYTE_0)
6229         .Case("BYTE_1", SdwaSel::BYTE_1)
6230         .Case("BYTE_2", SdwaSel::BYTE_2)
6231         .Case("BYTE_3", SdwaSel::BYTE_3)
6232         .Case("WORD_0", SdwaSel::WORD_0)
6233         .Case("WORD_1", SdwaSel::WORD_1)
6234         .Case("DWORD", SdwaSel::DWORD)
6235         .Default(0xffffffff);
6236   Parser.Lex(); // eat last token
6237 
6238   if (Int == 0xffffffff) {
6239     return MatchOperand_ParseFail;
6240   }
6241 
6242   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6243   return MatchOperand_Success;
6244 }
6245 
6246 OperandMatchResultTy
6247 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6248   using namespace llvm::AMDGPU::SDWA;
6249 
6250   SMLoc S = Parser.getTok().getLoc();
6251   StringRef Value;
6252   OperandMatchResultTy res;
6253 
6254   res = parseStringWithPrefix("dst_unused", Value);
6255   if (res != MatchOperand_Success) {
6256     return res;
6257   }
6258 
6259   int64_t Int;
6260   Int = StringSwitch<int64_t>(Value)
6261         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6262         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6263         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6264         .Default(0xffffffff);
6265   Parser.Lex(); // eat last token
6266 
6267   if (Int == 0xffffffff) {
6268     return MatchOperand_ParseFail;
6269   }
6270 
6271   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6272   return MatchOperand_Success;
6273 }
6274 
6275 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6276   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6277 }
6278 
6279 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6280   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6281 }
6282 
6283 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6284   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6285 }
6286 
6287 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6288   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6289 }
6290 
6291 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6292                               uint64_t BasicInstType, bool skipVcc) {
6293   using namespace llvm::AMDGPU::SDWA;
6294 
6295   OptionalImmIndexMap OptionalIdx;
6296   bool skippedVcc = false;
6297 
6298   unsigned I = 1;
6299   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6300   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6301     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6302   }
6303 
6304   for (unsigned E = Operands.size(); I != E; ++I) {
6305     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6306     if (skipVcc && !skippedVcc && Op.isReg() && Op.getReg() == AMDGPU::VCC) {
6307       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6308       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6309       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6310       // Skip VCC only if we didn't skip it on previous iteration.
6311       if (BasicInstType == SIInstrFlags::VOP2 &&
6312           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6313         skippedVcc = true;
6314         continue;
6315       } else if (BasicInstType == SIInstrFlags::VOPC &&
6316                  Inst.getNumOperands() == 0) {
6317         skippedVcc = true;
6318         continue;
6319       }
6320     }
6321     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6322       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6323     } else if (Op.isImm()) {
6324       // Handle optional arguments
6325       OptionalIdx[Op.getImmTy()] = I;
6326     } else {
6327       llvm_unreachable("Invalid operand type");
6328     }
6329     skippedVcc = false;
6330   }
6331 
6332   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6333       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6334       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6335     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6336     switch (BasicInstType) {
6337     case SIInstrFlags::VOP1:
6338       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6339       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6340         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6341       }
6342       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6343       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6344       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6345       break;
6346 
6347     case SIInstrFlags::VOP2:
6348       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6349       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6350         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6351       }
6352       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6353       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6354       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6355       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6356       break;
6357 
6358     case SIInstrFlags::VOPC:
6359       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6360         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6361       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6362       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6363       break;
6364 
6365     default:
6366       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6367     }
6368   }
6369 
6370   // special case v_mac_{f16, f32}:
6371   // it has src2 register operand that is tied to dst operand
6372   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6373       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6374     auto it = Inst.begin();
6375     std::advance(
6376       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6377     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6378   }
6379 }
6380 
6381 /// Force static initialization.
6382 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6383   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6384   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6385 }
6386 
6387 #define GET_REGISTER_MATCHER
6388 #define GET_MATCHER_IMPLEMENTATION
6389 #define GET_MNEMONIC_SPELL_CHECKER
6390 #include "AMDGPUGenAsmMatcher.inc"
6391 
6392 // This fuction should be defined after auto-generated include so that we have
6393 // MatchClassKind enum defined
6394 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6395                                                      unsigned Kind) {
6396   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6397   // But MatchInstructionImpl() expects to meet token and fails to validate
6398   // operand. This method checks if we are given immediate operand but expect to
6399   // get corresponding token.
6400   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6401   switch (Kind) {
6402   case MCK_addr64:
6403     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6404   case MCK_gds:
6405     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6406   case MCK_lds:
6407     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6408   case MCK_glc:
6409     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6410   case MCK_idxen:
6411     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6412   case MCK_offen:
6413     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6414   case MCK_SSrcB32:
6415     // When operands have expression values, they will return true for isToken,
6416     // because it is not possible to distinguish between a token and an
6417     // expression at parse time. MatchInstructionImpl() will always try to
6418     // match an operand as a token, when isToken returns true, and when the
6419     // name of the expression is not a valid token, the match will fail,
6420     // so we need to handle it here.
6421     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6422   case MCK_SSrcF32:
6423     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6424   case MCK_SoppBrTarget:
6425     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6426   case MCK_VReg32OrOff:
6427     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6428   case MCK_InterpSlot:
6429     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6430   case MCK_Attr:
6431     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6432   case MCK_AttrChan:
6433     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6434   default:
6435     return Match_InvalidOperand;
6436   }
6437 }
6438 
6439 //===----------------------------------------------------------------------===//
6440 // endpgm
6441 //===----------------------------------------------------------------------===//
6442 
6443 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6444   SMLoc S = Parser.getTok().getLoc();
6445   int64_t Imm = 0;
6446 
6447   if (!parseExpr(Imm)) {
6448     // The operand is optional, if not present default to 0
6449     Imm = 0;
6450   }
6451 
6452   if (!isUInt<16>(Imm)) {
6453     Error(S, "expected a 16-bit value");
6454     return MatchOperand_ParseFail;
6455   }
6456 
6457   Operands.push_back(
6458       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6459   return MatchOperand_Success;
6460 }
6461 
6462 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
6463