1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/MC/MCInst.h"
32 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/MC/MCInstrInfo.h"
34 #include "llvm/MC/MCParser/MCAsmLexer.h"
35 #include "llvm/MC/MCParser/MCAsmParser.h"
36 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
38 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
39 #include "llvm/MC/MCRegisterInfo.h"
40 #include "llvm/MC/MCStreamer.h"
41 #include "llvm/MC/MCSubtargetInfo.h"
42 #include "llvm/MC/MCSymbol.h"
43 #include "llvm/Support/AMDGPUMetadata.h"
44 #include "llvm/Support/AMDHSAKernelDescriptor.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/MachineValueType.h"
49 #include "llvm/Support/MathExtras.h"
50 #include "llvm/Support/SMLoc.h"
51 #include "llvm/Support/TargetParser.h"
52 #include "llvm/Support/TargetRegistry.h"
53 #include "llvm/Support/raw_ostream.h"
54 #include <algorithm>
55 #include <cassert>
56 #include <cstdint>
57 #include <cstring>
58 #include <iterator>
59 #include <map>
60 #include <memory>
61 #include <string>
62 
63 using namespace llvm;
64 using namespace llvm::AMDGPU;
65 using namespace llvm::amdhsa;
66 
67 namespace {
68 
69 class AMDGPUAsmParser;
70 
71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
72 
73 //===----------------------------------------------------------------------===//
74 // Operand
75 //===----------------------------------------------------------------------===//
76 
77 class AMDGPUOperand : public MCParsedAsmOperand {
78   enum KindTy {
79     Token,
80     Immediate,
81     Register,
82     Expression
83   } Kind;
84 
85   SMLoc StartLoc, EndLoc;
86   const AMDGPUAsmParser *AsmParser;
87 
88 public:
89   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
90     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
91 
92   using Ptr = std::unique_ptr<AMDGPUOperand>;
93 
94   struct Modifiers {
95     bool Abs = false;
96     bool Neg = false;
97     bool Sext = false;
98 
99     bool hasFPModifiers() const { return Abs || Neg; }
100     bool hasIntModifiers() const { return Sext; }
101     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
102 
103     int64_t getFPModifiersOperand() const {
104       int64_t Operand = 0;
105       Operand |= Abs ? SISrcMods::ABS : 0u;
106       Operand |= Neg ? SISrcMods::NEG : 0u;
107       return Operand;
108     }
109 
110     int64_t getIntModifiersOperand() const {
111       int64_t Operand = 0;
112       Operand |= Sext ? SISrcMods::SEXT : 0u;
113       return Operand;
114     }
115 
116     int64_t getModifiersOperand() const {
117       assert(!(hasFPModifiers() && hasIntModifiers())
118            && "fp and int modifiers should not be used simultaneously");
119       if (hasFPModifiers()) {
120         return getFPModifiersOperand();
121       } else if (hasIntModifiers()) {
122         return getIntModifiersOperand();
123       } else {
124         return 0;
125       }
126     }
127 
128     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
129   };
130 
131   enum ImmTy {
132     ImmTyNone,
133     ImmTyGDS,
134     ImmTyLDS,
135     ImmTyOffen,
136     ImmTyIdxen,
137     ImmTyAddr64,
138     ImmTyOffset,
139     ImmTyInstOffset,
140     ImmTyOffset0,
141     ImmTyOffset1,
142     ImmTyDLC,
143     ImmTyGLC,
144     ImmTySLC,
145     ImmTyTFE,
146     ImmTyD16,
147     ImmTyClampSI,
148     ImmTyOModSI,
149     ImmTyDppCtrl,
150     ImmTyDppRowMask,
151     ImmTyDppBankMask,
152     ImmTyDppBoundCtrl,
153     ImmTySdwaDstSel,
154     ImmTySdwaSrc0Sel,
155     ImmTySdwaSrc1Sel,
156     ImmTySdwaDstUnused,
157     ImmTyDMask,
158     ImmTyDim,
159     ImmTyUNorm,
160     ImmTyDA,
161     ImmTyR128A16,
162     ImmTyLWE,
163     ImmTyExpTgt,
164     ImmTyExpCompr,
165     ImmTyExpVM,
166     ImmTyFORMAT,
167     ImmTyHwreg,
168     ImmTyOff,
169     ImmTySendMsg,
170     ImmTyInterpSlot,
171     ImmTyInterpAttr,
172     ImmTyAttrChan,
173     ImmTyOpSel,
174     ImmTyOpSelHi,
175     ImmTyNegLo,
176     ImmTyNegHi,
177     ImmTySwizzle,
178     ImmTyGprIdxMode,
179     ImmTyEndpgm,
180     ImmTyHigh
181   };
182 
183 private:
184   struct TokOp {
185     const char *Data;
186     unsigned Length;
187   };
188 
189   struct ImmOp {
190     int64_t Val;
191     ImmTy Type;
192     bool IsFPImm;
193     Modifiers Mods;
194   };
195 
196   struct RegOp {
197     unsigned RegNo;
198     Modifiers Mods;
199   };
200 
201   union {
202     TokOp Tok;
203     ImmOp Imm;
204     RegOp Reg;
205     const MCExpr *Expr;
206   };
207 
208 public:
209   bool isToken() const override {
210     if (Kind == Token)
211       return true;
212 
213     if (Kind != Expression || !Expr)
214       return false;
215 
216     // When parsing operands, we can't always tell if something was meant to be
217     // a token, like 'gds', or an expression that references a global variable.
218     // In this case, we assume the string is an expression, and if we need to
219     // interpret is a token, then we treat the symbol name as the token.
220     return isa<MCSymbolRefExpr>(Expr);
221   }
222 
223   bool isImm() const override {
224     return Kind == Immediate;
225   }
226 
227   bool isInlinableImm(MVT type) const;
228   bool isLiteralImm(MVT type) const;
229 
230   bool isRegKind() const {
231     return Kind == Register;
232   }
233 
234   bool isReg() const override {
235     return isRegKind() && !hasModifiers();
236   }
237 
238   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
239     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
240   }
241 
242   bool isRegOrImmWithInt16InputMods() const {
243     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
244   }
245 
246   bool isRegOrImmWithInt32InputMods() const {
247     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
248   }
249 
250   bool isRegOrImmWithInt64InputMods() const {
251     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
252   }
253 
254   bool isRegOrImmWithFP16InputMods() const {
255     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
256   }
257 
258   bool isRegOrImmWithFP32InputMods() const {
259     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
260   }
261 
262   bool isRegOrImmWithFP64InputMods() const {
263     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
264   }
265 
266   bool isVReg() const {
267     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
268            isRegClass(AMDGPU::VReg_64RegClassID) ||
269            isRegClass(AMDGPU::VReg_96RegClassID) ||
270            isRegClass(AMDGPU::VReg_128RegClassID) ||
271            isRegClass(AMDGPU::VReg_256RegClassID) ||
272            isRegClass(AMDGPU::VReg_512RegClassID);
273   }
274 
275   bool isVReg32() const {
276     return isRegClass(AMDGPU::VGPR_32RegClassID);
277   }
278 
279   bool isVReg32OrOff() const {
280     return isOff() || isVReg32();
281   }
282 
283   bool isSDWAOperand(MVT type) const;
284   bool isSDWAFP16Operand() const;
285   bool isSDWAFP32Operand() const;
286   bool isSDWAInt16Operand() const;
287   bool isSDWAInt32Operand() const;
288 
289   bool isImmTy(ImmTy ImmT) const {
290     return isImm() && Imm.Type == ImmT;
291   }
292 
293   bool isImmModifier() const {
294     return isImm() && Imm.Type != ImmTyNone;
295   }
296 
297   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
298   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
299   bool isDMask() const { return isImmTy(ImmTyDMask); }
300   bool isDim() const { return isImmTy(ImmTyDim); }
301   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
302   bool isDA() const { return isImmTy(ImmTyDA); }
303   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
304   bool isLWE() const { return isImmTy(ImmTyLWE); }
305   bool isOff() const { return isImmTy(ImmTyOff); }
306   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
307   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
308   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
309   bool isOffen() const { return isImmTy(ImmTyOffen); }
310   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
311   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
312   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
313   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
314   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
315 
316   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
317   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
318   bool isGDS() const { return isImmTy(ImmTyGDS); }
319   bool isLDS() const { return isImmTy(ImmTyLDS); }
320   bool isDLC() const { return isImmTy(ImmTyDLC); }
321   bool isGLC() const { return isImmTy(ImmTyGLC); }
322   bool isSLC() const { return isImmTy(ImmTySLC); }
323   bool isTFE() const { return isImmTy(ImmTyTFE); }
324   bool isD16() const { return isImmTy(ImmTyD16); }
325   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
326   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
327   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
328   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
329   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
330   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
331   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
332   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
333   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
334   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
335   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
336   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
337   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
338   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
339   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
340   bool isHigh() const { return isImmTy(ImmTyHigh); }
341 
342   bool isMod() const {
343     return isClampSI() || isOModSI();
344   }
345 
346   bool isRegOrImm() const {
347     return isReg() || isImm();
348   }
349 
350   bool isRegClass(unsigned RCID) const;
351 
352   bool isInlineValue() const;
353 
354   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
355     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
356   }
357 
358   bool isSCSrcB16() const {
359     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
360   }
361 
362   bool isSCSrcV2B16() const {
363     return isSCSrcB16();
364   }
365 
366   bool isSCSrcB32() const {
367     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
368   }
369 
370   bool isSCSrcB64() const {
371     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
372   }
373 
374   bool isSCSrcF16() const {
375     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
376   }
377 
378   bool isSCSrcV2F16() const {
379     return isSCSrcF16();
380   }
381 
382   bool isSCSrcF32() const {
383     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
384   }
385 
386   bool isSCSrcF64() const {
387     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
388   }
389 
390   bool isSSrcB32() const {
391     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
392   }
393 
394   bool isSSrcB16() const {
395     return isSCSrcB16() || isLiteralImm(MVT::i16);
396   }
397 
398   bool isSSrcV2B16() const {
399     llvm_unreachable("cannot happen");
400     return isSSrcB16();
401   }
402 
403   bool isSSrcB64() const {
404     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
405     // See isVSrc64().
406     return isSCSrcB64() || isLiteralImm(MVT::i64);
407   }
408 
409   bool isSSrcF32() const {
410     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
411   }
412 
413   bool isSSrcF64() const {
414     return isSCSrcB64() || isLiteralImm(MVT::f64);
415   }
416 
417   bool isSSrcF16() const {
418     return isSCSrcB16() || isLiteralImm(MVT::f16);
419   }
420 
421   bool isSSrcV2F16() const {
422     llvm_unreachable("cannot happen");
423     return isSSrcF16();
424   }
425 
426   bool isSSrcOrLdsB32() const {
427     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
428            isLiteralImm(MVT::i32) || isExpr();
429   }
430 
431   bool isVCSrcB32() const {
432     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
433   }
434 
435   bool isVCSrcB64() const {
436     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
437   }
438 
439   bool isVCSrcB16() const {
440     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
441   }
442 
443   bool isVCSrcV2B16() const {
444     return isVCSrcB16();
445   }
446 
447   bool isVCSrcF32() const {
448     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
449   }
450 
451   bool isVCSrcF64() const {
452     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
453   }
454 
455   bool isVCSrcF16() const {
456     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
457   }
458 
459   bool isVCSrcV2F16() const {
460     return isVCSrcF16();
461   }
462 
463   bool isVSrcB32() const {
464     return isVCSrcF32() || isLiteralImm(MVT::i32);
465   }
466 
467   bool isVSrcB64() const {
468     return isVCSrcF64() || isLiteralImm(MVT::i64);
469   }
470 
471   bool isVSrcB16() const {
472     return isVCSrcF16() || isLiteralImm(MVT::i16);
473   }
474 
475   bool isVSrcV2B16() const {
476     return isVSrcB16() || isLiteralImm(MVT::v2i16);
477   }
478 
479   bool isVSrcF32() const {
480     return isVCSrcF32() || isLiteralImm(MVT::f32);
481   }
482 
483   bool isVSrcF64() const {
484     return isVCSrcF64() || isLiteralImm(MVT::f64);
485   }
486 
487   bool isVSrcF16() const {
488     return isVCSrcF16() || isLiteralImm(MVT::f16);
489   }
490 
491   bool isVSrcV2F16() const {
492     return isVSrcF16() || isLiteralImm(MVT::v2f16);
493   }
494 
495   bool isKImmFP32() const {
496     return isLiteralImm(MVT::f32);
497   }
498 
499   bool isKImmFP16() const {
500     return isLiteralImm(MVT::f16);
501   }
502 
503   bool isMem() const override {
504     return false;
505   }
506 
507   bool isExpr() const {
508     return Kind == Expression;
509   }
510 
511   bool isSoppBrTarget() const {
512     return isExpr() || isImm();
513   }
514 
515   bool isSWaitCnt() const;
516   bool isHwreg() const;
517   bool isSendMsg() const;
518   bool isSwizzle() const;
519   bool isSMRDOffset8() const;
520   bool isSMRDOffset20() const;
521   bool isSMRDLiteralOffset() const;
522   bool isDPPCtrl() const;
523   bool isGPRIdxMode() const;
524   bool isS16Imm() const;
525   bool isU16Imm() const;
526   bool isEndpgm() const;
527 
528   StringRef getExpressionAsToken() const {
529     assert(isExpr());
530     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
531     return S->getSymbol().getName();
532   }
533 
534   StringRef getToken() const {
535     assert(isToken());
536 
537     if (Kind == Expression)
538       return getExpressionAsToken();
539 
540     return StringRef(Tok.Data, Tok.Length);
541   }
542 
543   int64_t getImm() const {
544     assert(isImm());
545     return Imm.Val;
546   }
547 
548   ImmTy getImmTy() const {
549     assert(isImm());
550     return Imm.Type;
551   }
552 
553   unsigned getReg() const override {
554     assert(isRegKind());
555     return Reg.RegNo;
556   }
557 
558   SMLoc getStartLoc() const override {
559     return StartLoc;
560   }
561 
562   SMLoc getEndLoc() const override {
563     return EndLoc;
564   }
565 
566   SMRange getLocRange() const {
567     return SMRange(StartLoc, EndLoc);
568   }
569 
570   Modifiers getModifiers() const {
571     assert(isRegKind() || isImmTy(ImmTyNone));
572     return isRegKind() ? Reg.Mods : Imm.Mods;
573   }
574 
575   void setModifiers(Modifiers Mods) {
576     assert(isRegKind() || isImmTy(ImmTyNone));
577     if (isRegKind())
578       Reg.Mods = Mods;
579     else
580       Imm.Mods = Mods;
581   }
582 
583   bool hasModifiers() const {
584     return getModifiers().hasModifiers();
585   }
586 
587   bool hasFPModifiers() const {
588     return getModifiers().hasFPModifiers();
589   }
590 
591   bool hasIntModifiers() const {
592     return getModifiers().hasIntModifiers();
593   }
594 
595   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
596 
597   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
598 
599   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
600 
601   template <unsigned Bitwidth>
602   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
603 
604   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
605     addKImmFPOperands<16>(Inst, N);
606   }
607 
608   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
609     addKImmFPOperands<32>(Inst, N);
610   }
611 
612   void addRegOperands(MCInst &Inst, unsigned N) const;
613 
614   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
615     if (isRegKind())
616       addRegOperands(Inst, N);
617     else if (isExpr())
618       Inst.addOperand(MCOperand::createExpr(Expr));
619     else
620       addImmOperands(Inst, N);
621   }
622 
623   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
624     Modifiers Mods = getModifiers();
625     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
626     if (isRegKind()) {
627       addRegOperands(Inst, N);
628     } else {
629       addImmOperands(Inst, N, false);
630     }
631   }
632 
633   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
634     assert(!hasIntModifiers());
635     addRegOrImmWithInputModsOperands(Inst, N);
636   }
637 
638   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
639     assert(!hasFPModifiers());
640     addRegOrImmWithInputModsOperands(Inst, N);
641   }
642 
643   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
644     Modifiers Mods = getModifiers();
645     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
646     assert(isRegKind());
647     addRegOperands(Inst, N);
648   }
649 
650   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
651     assert(!hasIntModifiers());
652     addRegWithInputModsOperands(Inst, N);
653   }
654 
655   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
656     assert(!hasFPModifiers());
657     addRegWithInputModsOperands(Inst, N);
658   }
659 
660   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
661     if (isImm())
662       addImmOperands(Inst, N);
663     else {
664       assert(isExpr());
665       Inst.addOperand(MCOperand::createExpr(Expr));
666     }
667   }
668 
669   static void printImmTy(raw_ostream& OS, ImmTy Type) {
670     switch (Type) {
671     case ImmTyNone: OS << "None"; break;
672     case ImmTyGDS: OS << "GDS"; break;
673     case ImmTyLDS: OS << "LDS"; break;
674     case ImmTyOffen: OS << "Offen"; break;
675     case ImmTyIdxen: OS << "Idxen"; break;
676     case ImmTyAddr64: OS << "Addr64"; break;
677     case ImmTyOffset: OS << "Offset"; break;
678     case ImmTyInstOffset: OS << "InstOffset"; break;
679     case ImmTyOffset0: OS << "Offset0"; break;
680     case ImmTyOffset1: OS << "Offset1"; break;
681     case ImmTyDLC: OS << "DLC"; break;
682     case ImmTyGLC: OS << "GLC"; break;
683     case ImmTySLC: OS << "SLC"; break;
684     case ImmTyTFE: OS << "TFE"; break;
685     case ImmTyD16: OS << "D16"; break;
686     case ImmTyFORMAT: OS << "FORMAT"; break;
687     case ImmTyClampSI: OS << "ClampSI"; break;
688     case ImmTyOModSI: OS << "OModSI"; break;
689     case ImmTyDppCtrl: OS << "DppCtrl"; break;
690     case ImmTyDppRowMask: OS << "DppRowMask"; break;
691     case ImmTyDppBankMask: OS << "DppBankMask"; break;
692     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
693     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
694     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
695     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
696     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
697     case ImmTyDMask: OS << "DMask"; break;
698     case ImmTyDim: OS << "Dim"; break;
699     case ImmTyUNorm: OS << "UNorm"; break;
700     case ImmTyDA: OS << "DA"; break;
701     case ImmTyR128A16: OS << "R128A16"; break;
702     case ImmTyLWE: OS << "LWE"; break;
703     case ImmTyOff: OS << "Off"; break;
704     case ImmTyExpTgt: OS << "ExpTgt"; break;
705     case ImmTyExpCompr: OS << "ExpCompr"; break;
706     case ImmTyExpVM: OS << "ExpVM"; break;
707     case ImmTyHwreg: OS << "Hwreg"; break;
708     case ImmTySendMsg: OS << "SendMsg"; break;
709     case ImmTyInterpSlot: OS << "InterpSlot"; break;
710     case ImmTyInterpAttr: OS << "InterpAttr"; break;
711     case ImmTyAttrChan: OS << "AttrChan"; break;
712     case ImmTyOpSel: OS << "OpSel"; break;
713     case ImmTyOpSelHi: OS << "OpSelHi"; break;
714     case ImmTyNegLo: OS << "NegLo"; break;
715     case ImmTyNegHi: OS << "NegHi"; break;
716     case ImmTySwizzle: OS << "Swizzle"; break;
717     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
718     case ImmTyHigh: OS << "High"; break;
719     case ImmTyEndpgm:
720       OS << "Endpgm";
721       break;
722     }
723   }
724 
725   void print(raw_ostream &OS) const override {
726     switch (Kind) {
727     case Register:
728       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
729       break;
730     case Immediate:
731       OS << '<' << getImm();
732       if (getImmTy() != ImmTyNone) {
733         OS << " type: "; printImmTy(OS, getImmTy());
734       }
735       OS << " mods: " << Imm.Mods << '>';
736       break;
737     case Token:
738       OS << '\'' << getToken() << '\'';
739       break;
740     case Expression:
741       OS << "<expr " << *Expr << '>';
742       break;
743     }
744   }
745 
746   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
747                                       int64_t Val, SMLoc Loc,
748                                       ImmTy Type = ImmTyNone,
749                                       bool IsFPImm = false) {
750     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
751     Op->Imm.Val = Val;
752     Op->Imm.IsFPImm = IsFPImm;
753     Op->Imm.Type = Type;
754     Op->Imm.Mods = Modifiers();
755     Op->StartLoc = Loc;
756     Op->EndLoc = Loc;
757     return Op;
758   }
759 
760   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
761                                         StringRef Str, SMLoc Loc,
762                                         bool HasExplicitEncodingSize = true) {
763     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
764     Res->Tok.Data = Str.data();
765     Res->Tok.Length = Str.size();
766     Res->StartLoc = Loc;
767     Res->EndLoc = Loc;
768     return Res;
769   }
770 
771   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
772                                       unsigned RegNo, SMLoc S,
773                                       SMLoc E) {
774     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
775     Op->Reg.RegNo = RegNo;
776     Op->Reg.Mods = Modifiers();
777     Op->StartLoc = S;
778     Op->EndLoc = E;
779     return Op;
780   }
781 
782   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
783                                        const class MCExpr *Expr, SMLoc S) {
784     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
785     Op->Expr = Expr;
786     Op->StartLoc = S;
787     Op->EndLoc = S;
788     return Op;
789   }
790 };
791 
792 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
793   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
794   return OS;
795 }
796 
797 //===----------------------------------------------------------------------===//
798 // AsmParser
799 //===----------------------------------------------------------------------===//
800 
801 // Holds info related to the current kernel, e.g. count of SGPRs used.
802 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
803 // .amdgpu_hsa_kernel or at EOF.
804 class KernelScopeInfo {
805   int SgprIndexUnusedMin = -1;
806   int VgprIndexUnusedMin = -1;
807   MCContext *Ctx = nullptr;
808 
809   void usesSgprAt(int i) {
810     if (i >= SgprIndexUnusedMin) {
811       SgprIndexUnusedMin = ++i;
812       if (Ctx) {
813         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
814         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
815       }
816     }
817   }
818 
819   void usesVgprAt(int i) {
820     if (i >= VgprIndexUnusedMin) {
821       VgprIndexUnusedMin = ++i;
822       if (Ctx) {
823         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
824         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
825       }
826     }
827   }
828 
829 public:
830   KernelScopeInfo() = default;
831 
832   void initialize(MCContext &Context) {
833     Ctx = &Context;
834     usesSgprAt(SgprIndexUnusedMin = -1);
835     usesVgprAt(VgprIndexUnusedMin = -1);
836   }
837 
838   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
839     switch (RegKind) {
840       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
841       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
842       default: break;
843     }
844   }
845 };
846 
847 class AMDGPUAsmParser : public MCTargetAsmParser {
848   MCAsmParser &Parser;
849 
850   // Number of extra operands parsed after the first optional operand.
851   // This may be necessary to skip hardcoded mandatory operands.
852   static const unsigned MAX_OPR_LOOKAHEAD = 8;
853 
854   unsigned ForcedEncodingSize = 0;
855   bool ForcedDPP = false;
856   bool ForcedSDWA = false;
857   KernelScopeInfo KernelScope;
858 
859   /// @name Auto-generated Match Functions
860   /// {
861 
862 #define GET_ASSEMBLER_HEADER
863 #include "AMDGPUGenAsmMatcher.inc"
864 
865   /// }
866 
867 private:
868   bool ParseAsAbsoluteExpression(uint32_t &Ret);
869   bool OutOfRangeError(SMRange Range);
870   /// Calculate VGPR/SGPR blocks required for given target, reserved
871   /// registers, and user-specified NextFreeXGPR values.
872   ///
873   /// \param Features [in] Target features, used for bug corrections.
874   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
875   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
876   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
877   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
878   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
879   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
880   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
881   /// \param VGPRBlocks [out] Result VGPR block count.
882   /// \param SGPRBlocks [out] Result SGPR block count.
883   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
884                           bool FlatScrUsed, bool XNACKUsed,
885                           unsigned NextFreeVGPR, SMRange VGPRRange,
886                           unsigned NextFreeSGPR, SMRange SGPRRange,
887                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
888   bool ParseDirectiveAMDGCNTarget();
889   bool ParseDirectiveAMDHSAKernel();
890   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
891   bool ParseDirectiveHSACodeObjectVersion();
892   bool ParseDirectiveHSACodeObjectISA();
893   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
894   bool ParseDirectiveAMDKernelCodeT();
895   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
896   bool ParseDirectiveAMDGPUHsaKernel();
897 
898   bool ParseDirectiveISAVersion();
899   bool ParseDirectiveHSAMetadata();
900   bool ParseDirectivePALMetadataBegin();
901   bool ParseDirectivePALMetadata();
902 
903   /// Common code to parse out a block of text (typically YAML) between start and
904   /// end directives.
905   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
906                            const char *AssemblerDirectiveEnd,
907                            std::string &CollectString);
908 
909   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
910                              RegisterKind RegKind, unsigned Reg1,
911                              unsigned RegNum);
912   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
913                            unsigned& RegNum, unsigned& RegWidth,
914                            unsigned *DwordRegIndex);
915   bool isRegister();
916   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
917   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
918   void initializeGprCountSymbol(RegisterKind RegKind);
919   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
920                              unsigned RegWidth);
921   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
922                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
923   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
924                  bool IsGdsHardcoded);
925 
926 public:
927   enum AMDGPUMatchResultTy {
928     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
929   };
930   enum OperandMode {
931     OperandMode_Default,
932     OperandMode_NSA,
933   };
934 
935   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
936 
937   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
938                const MCInstrInfo &MII,
939                const MCTargetOptions &Options)
940       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
941     MCAsmParserExtension::Initialize(Parser);
942 
943     if (getFeatureBits().none()) {
944       // Set default features.
945       copySTI().ToggleFeature("southern-islands");
946     }
947 
948     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
949 
950     {
951       // TODO: make those pre-defined variables read-only.
952       // Currently there is none suitable machinery in the core llvm-mc for this.
953       // MCSymbol::isRedefinable is intended for another purpose, and
954       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
955       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
956       MCContext &Ctx = getContext();
957       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
958         MCSymbol *Sym =
959             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
960         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
961         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
962         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
963         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
964         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
965       } else {
966         MCSymbol *Sym =
967             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
968         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
969         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
970         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
971         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
972         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
973       }
974       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
975         initializeGprCountSymbol(IS_VGPR);
976         initializeGprCountSymbol(IS_SGPR);
977       } else
978         KernelScope.initialize(getContext());
979     }
980   }
981 
982   bool hasXNACK() const {
983     return AMDGPU::hasXNACK(getSTI());
984   }
985 
986   bool hasMIMG_R128() const {
987     return AMDGPU::hasMIMG_R128(getSTI());
988   }
989 
990   bool hasPackedD16() const {
991     return AMDGPU::hasPackedD16(getSTI());
992   }
993 
994   bool isSI() const {
995     return AMDGPU::isSI(getSTI());
996   }
997 
998   bool isCI() const {
999     return AMDGPU::isCI(getSTI());
1000   }
1001 
1002   bool isVI() const {
1003     return AMDGPU::isVI(getSTI());
1004   }
1005 
1006   bool isGFX9() const {
1007     return AMDGPU::isGFX9(getSTI());
1008   }
1009 
1010   bool isGFX10() const {
1011     return AMDGPU::isGFX10(getSTI());
1012   }
1013 
1014   bool hasInv2PiInlineImm() const {
1015     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1016   }
1017 
1018   bool hasFlatOffsets() const {
1019     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1020   }
1021 
1022   bool hasSGPR102_SGPR103() const {
1023     return !isVI() && !isGFX9();
1024   }
1025 
1026   bool hasSGPR104_SGPR105() const {
1027     return isGFX10();
1028   }
1029 
1030   bool hasIntClamp() const {
1031     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1032   }
1033 
1034   AMDGPUTargetStreamer &getTargetStreamer() {
1035     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1036     return static_cast<AMDGPUTargetStreamer &>(TS);
1037   }
1038 
1039   const MCRegisterInfo *getMRI() const {
1040     // We need this const_cast because for some reason getContext() is not const
1041     // in MCAsmParser.
1042     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1043   }
1044 
1045   const MCInstrInfo *getMII() const {
1046     return &MII;
1047   }
1048 
1049   const FeatureBitset &getFeatureBits() const {
1050     return getSTI().getFeatureBits();
1051   }
1052 
1053   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1054   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1055   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1056 
1057   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1058   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1059   bool isForcedDPP() const { return ForcedDPP; }
1060   bool isForcedSDWA() const { return ForcedSDWA; }
1061   ArrayRef<unsigned> getMatchedVariants() const;
1062 
1063   std::unique_ptr<AMDGPUOperand> parseRegister();
1064   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1065   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1066   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1067                                       unsigned Kind) override;
1068   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1069                                OperandVector &Operands, MCStreamer &Out,
1070                                uint64_t &ErrorInfo,
1071                                bool MatchingInlineAsm) override;
1072   bool ParseDirective(AsmToken DirectiveID) override;
1073   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1074                                     OperandMode Mode = OperandMode_Default);
1075   StringRef parseMnemonicSuffix(StringRef Name);
1076   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1077                         SMLoc NameLoc, OperandVector &Operands) override;
1078   //bool ProcessInstruction(MCInst &Inst);
1079 
1080   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1081 
1082   OperandMatchResultTy
1083   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1084                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1085                      bool (*ConvertResult)(int64_t &) = nullptr);
1086 
1087   OperandMatchResultTy parseOperandArrayWithPrefix(
1088     const char *Prefix,
1089     OperandVector &Operands,
1090     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1091     bool (*ConvertResult)(int64_t&) = nullptr);
1092 
1093   OperandMatchResultTy
1094   parseNamedBit(const char *Name, OperandVector &Operands,
1095                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1096   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1097                                              StringRef &Value);
1098 
1099   bool parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier = false);
1100   bool parseSP3NegModifier();
1101   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1102   OperandMatchResultTy parseReg(OperandVector &Operands);
1103   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1104   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1105   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1106   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1107   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1108   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1109   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1110 
1111   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1112   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1113   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1114   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1115 
1116   bool parseCnt(int64_t &IntVal);
1117   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1118   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1119 
1120 private:
1121   struct OperandInfoTy {
1122     int64_t Id;
1123     bool IsSymbolic = false;
1124 
1125     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1126   };
1127 
1128   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1129   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1130 
1131   void errorExpTgt();
1132   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1133 
1134   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1135   bool validateSOPLiteral(const MCInst &Inst) const;
1136   bool validateConstantBusLimitations(const MCInst &Inst);
1137   bool validateEarlyClobberLimitations(const MCInst &Inst);
1138   bool validateIntClampSupported(const MCInst &Inst);
1139   bool validateMIMGAtomicDMask(const MCInst &Inst);
1140   bool validateMIMGGatherDMask(const MCInst &Inst);
1141   bool validateMIMGDataSize(const MCInst &Inst);
1142   bool validateMIMGAddrSize(const MCInst &Inst);
1143   bool validateMIMGD16(const MCInst &Inst);
1144   bool validateMIMGDim(const MCInst &Inst);
1145   bool validateLdsDirect(const MCInst &Inst);
1146   bool validateVOP3Literal(const MCInst &Inst) const;
1147   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1148   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1149   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1150 
1151   bool isId(const StringRef Id) const;
1152   bool isId(const AsmToken &Token, const StringRef Id) const;
1153   bool isToken(const AsmToken::TokenKind Kind) const;
1154   bool trySkipId(const StringRef Id);
1155   bool trySkipToken(const AsmToken::TokenKind Kind);
1156   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1157   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1158   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1159   AsmToken::TokenKind getTokenKind() const;
1160   bool parseExpr(int64_t &Imm);
1161   StringRef getTokenStr() const;
1162   AsmToken peekToken();
1163   AsmToken getToken() const;
1164   SMLoc getLoc() const;
1165   void lex();
1166 
1167 public:
1168   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1169   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1170 
1171   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1172   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1173   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1174   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1175   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1176 
1177   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1178                             const unsigned MinVal,
1179                             const unsigned MaxVal,
1180                             const StringRef ErrMsg);
1181   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1182   bool parseSwizzleOffset(int64_t &Imm);
1183   bool parseSwizzleMacro(int64_t &Imm);
1184   bool parseSwizzleQuadPerm(int64_t &Imm);
1185   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1186   bool parseSwizzleBroadcast(int64_t &Imm);
1187   bool parseSwizzleSwap(int64_t &Imm);
1188   bool parseSwizzleReverse(int64_t &Imm);
1189 
1190   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1191   int64_t parseGPRIdxMacro();
1192 
1193   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1194   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1195   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1196   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1197   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1198 
1199   AMDGPUOperand::Ptr defaultDLC() const;
1200   AMDGPUOperand::Ptr defaultGLC() const;
1201   AMDGPUOperand::Ptr defaultSLC() const;
1202 
1203   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1204   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1205   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1206   AMDGPUOperand::Ptr defaultOffsetU12() const;
1207   AMDGPUOperand::Ptr defaultOffsetS13() const;
1208 
1209   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1210 
1211   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1212                OptionalImmIndexMap &OptionalIdx);
1213   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1214   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1215   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1216 
1217   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1218 
1219   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1220                bool IsAtomic = false);
1221   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1222 
1223   OperandMatchResultTy parseDim(OperandVector &Operands);
1224   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1225   AMDGPUOperand::Ptr defaultRowMask() const;
1226   AMDGPUOperand::Ptr defaultBankMask() const;
1227   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1228   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1229 
1230   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1231                                     AMDGPUOperand::ImmTy Type);
1232   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1233   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1234   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1235   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1236   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1237   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1238                 uint64_t BasicInstType, bool skipVcc = false);
1239 
1240   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1241   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1242 };
1243 
1244 struct OptionalOperand {
1245   const char *Name;
1246   AMDGPUOperand::ImmTy Type;
1247   bool IsBit;
1248   bool (*ConvertResult)(int64_t&);
1249 };
1250 
1251 } // end anonymous namespace
1252 
1253 // May be called with integer type with equivalent bitwidth.
1254 static const fltSemantics *getFltSemantics(unsigned Size) {
1255   switch (Size) {
1256   case 4:
1257     return &APFloat::IEEEsingle();
1258   case 8:
1259     return &APFloat::IEEEdouble();
1260   case 2:
1261     return &APFloat::IEEEhalf();
1262   default:
1263     llvm_unreachable("unsupported fp type");
1264   }
1265 }
1266 
1267 static const fltSemantics *getFltSemantics(MVT VT) {
1268   return getFltSemantics(VT.getSizeInBits() / 8);
1269 }
1270 
1271 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1272   switch (OperandType) {
1273   case AMDGPU::OPERAND_REG_IMM_INT32:
1274   case AMDGPU::OPERAND_REG_IMM_FP32:
1275   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1276   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1277     return &APFloat::IEEEsingle();
1278   case AMDGPU::OPERAND_REG_IMM_INT64:
1279   case AMDGPU::OPERAND_REG_IMM_FP64:
1280   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1281   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1282     return &APFloat::IEEEdouble();
1283   case AMDGPU::OPERAND_REG_IMM_INT16:
1284   case AMDGPU::OPERAND_REG_IMM_FP16:
1285   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1286   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1287   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1288   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1289   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1290   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1291     return &APFloat::IEEEhalf();
1292   default:
1293     llvm_unreachable("unsupported fp type");
1294   }
1295 }
1296 
1297 //===----------------------------------------------------------------------===//
1298 // Operand
1299 //===----------------------------------------------------------------------===//
1300 
1301 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1302   bool Lost;
1303 
1304   // Convert literal to single precision
1305   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1306                                                APFloat::rmNearestTiesToEven,
1307                                                &Lost);
1308   // We allow precision lost but not overflow or underflow
1309   if (Status != APFloat::opOK &&
1310       Lost &&
1311       ((Status & APFloat::opOverflow)  != 0 ||
1312        (Status & APFloat::opUnderflow) != 0)) {
1313     return false;
1314   }
1315 
1316   return true;
1317 }
1318 
1319 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1320   return isUIntN(Size, Val) || isIntN(Size, Val);
1321 }
1322 
1323 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1324 
1325   // This is a hack to enable named inline values like
1326   // shared_base with both 32-bit and 64-bit operands.
1327   // Note that these values are defined as
1328   // 32-bit operands only.
1329   if (isInlineValue()) {
1330     return true;
1331   }
1332 
1333   if (!isImmTy(ImmTyNone)) {
1334     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1335     return false;
1336   }
1337   // TODO: We should avoid using host float here. It would be better to
1338   // check the float bit values which is what a few other places do.
1339   // We've had bot failures before due to weird NaN support on mips hosts.
1340 
1341   APInt Literal(64, Imm.Val);
1342 
1343   if (Imm.IsFPImm) { // We got fp literal token
1344     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1345       return AMDGPU::isInlinableLiteral64(Imm.Val,
1346                                           AsmParser->hasInv2PiInlineImm());
1347     }
1348 
1349     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1350     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1351       return false;
1352 
1353     if (type.getScalarSizeInBits() == 16) {
1354       return AMDGPU::isInlinableLiteral16(
1355         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1356         AsmParser->hasInv2PiInlineImm());
1357     }
1358 
1359     // Check if single precision literal is inlinable
1360     return AMDGPU::isInlinableLiteral32(
1361       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1362       AsmParser->hasInv2PiInlineImm());
1363   }
1364 
1365   // We got int literal token.
1366   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1367     return AMDGPU::isInlinableLiteral64(Imm.Val,
1368                                         AsmParser->hasInv2PiInlineImm());
1369   }
1370 
1371   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1372     return false;
1373   }
1374 
1375   if (type.getScalarSizeInBits() == 16) {
1376     return AMDGPU::isInlinableLiteral16(
1377       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1378       AsmParser->hasInv2PiInlineImm());
1379   }
1380 
1381   return AMDGPU::isInlinableLiteral32(
1382     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1383     AsmParser->hasInv2PiInlineImm());
1384 }
1385 
1386 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1387   // Check that this immediate can be added as literal
1388   if (!isImmTy(ImmTyNone)) {
1389     return false;
1390   }
1391 
1392   if (!Imm.IsFPImm) {
1393     // We got int literal token.
1394 
1395     if (type == MVT::f64 && hasFPModifiers()) {
1396       // Cannot apply fp modifiers to int literals preserving the same semantics
1397       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1398       // disable these cases.
1399       return false;
1400     }
1401 
1402     unsigned Size = type.getSizeInBits();
1403     if (Size == 64)
1404       Size = 32;
1405 
1406     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1407     // types.
1408     return isSafeTruncation(Imm.Val, Size);
1409   }
1410 
1411   // We got fp literal token
1412   if (type == MVT::f64) { // Expected 64-bit fp operand
1413     // We would set low 64-bits of literal to zeroes but we accept this literals
1414     return true;
1415   }
1416 
1417   if (type == MVT::i64) { // Expected 64-bit int operand
1418     // We don't allow fp literals in 64-bit integer instructions. It is
1419     // unclear how we should encode them.
1420     return false;
1421   }
1422 
1423   // We allow fp literals with f16x2 operands assuming that the specified
1424   // literal goes into the lower half and the upper half is zero. We also
1425   // require that the literal may be losslesly converted to f16.
1426   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1427                      (type == MVT::v2i16)? MVT::i16 : type;
1428 
1429   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1430   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1431 }
1432 
1433 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1434   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1435 }
1436 
1437 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1438   if (AsmParser->isVI())
1439     return isVReg32();
1440   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1441     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1442   else
1443     return false;
1444 }
1445 
1446 bool AMDGPUOperand::isSDWAFP16Operand() const {
1447   return isSDWAOperand(MVT::f16);
1448 }
1449 
1450 bool AMDGPUOperand::isSDWAFP32Operand() const {
1451   return isSDWAOperand(MVT::f32);
1452 }
1453 
1454 bool AMDGPUOperand::isSDWAInt16Operand() const {
1455   return isSDWAOperand(MVT::i16);
1456 }
1457 
1458 bool AMDGPUOperand::isSDWAInt32Operand() const {
1459   return isSDWAOperand(MVT::i32);
1460 }
1461 
1462 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1463 {
1464   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1465   assert(Size == 2 || Size == 4 || Size == 8);
1466 
1467   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1468 
1469   if (Imm.Mods.Abs) {
1470     Val &= ~FpSignMask;
1471   }
1472   if (Imm.Mods.Neg) {
1473     Val ^= FpSignMask;
1474   }
1475 
1476   return Val;
1477 }
1478 
1479 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1480   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1481                              Inst.getNumOperands())) {
1482     addLiteralImmOperand(Inst, Imm.Val,
1483                          ApplyModifiers &
1484                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1485   } else {
1486     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1487     Inst.addOperand(MCOperand::createImm(Imm.Val));
1488   }
1489 }
1490 
1491 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1492   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1493   auto OpNum = Inst.getNumOperands();
1494   // Check that this operand accepts literals
1495   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1496 
1497   if (ApplyModifiers) {
1498     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1499     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1500     Val = applyInputFPModifiers(Val, Size);
1501   }
1502 
1503   APInt Literal(64, Val);
1504   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1505 
1506   if (Imm.IsFPImm) { // We got fp literal token
1507     switch (OpTy) {
1508     case AMDGPU::OPERAND_REG_IMM_INT64:
1509     case AMDGPU::OPERAND_REG_IMM_FP64:
1510     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1511     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1512       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1513                                        AsmParser->hasInv2PiInlineImm())) {
1514         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1515         return;
1516       }
1517 
1518       // Non-inlineable
1519       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1520         // For fp operands we check if low 32 bits are zeros
1521         if (Literal.getLoBits(32) != 0) {
1522           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1523           "Can't encode literal as exact 64-bit floating-point operand. "
1524           "Low 32-bits will be set to zero");
1525         }
1526 
1527         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1528         return;
1529       }
1530 
1531       // We don't allow fp literals in 64-bit integer instructions. It is
1532       // unclear how we should encode them. This case should be checked earlier
1533       // in predicate methods (isLiteralImm())
1534       llvm_unreachable("fp literal in 64-bit integer instruction.");
1535 
1536     case AMDGPU::OPERAND_REG_IMM_INT32:
1537     case AMDGPU::OPERAND_REG_IMM_FP32:
1538     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1539     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1540     case AMDGPU::OPERAND_REG_IMM_INT16:
1541     case AMDGPU::OPERAND_REG_IMM_FP16:
1542     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1543     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1544     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1545     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1546     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1547     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1548       bool lost;
1549       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1550       // Convert literal to single precision
1551       FPLiteral.convert(*getOpFltSemantics(OpTy),
1552                         APFloat::rmNearestTiesToEven, &lost);
1553       // We allow precision lost but not overflow or underflow. This should be
1554       // checked earlier in isLiteralImm()
1555 
1556       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1557       Inst.addOperand(MCOperand::createImm(ImmVal));
1558       return;
1559     }
1560     default:
1561       llvm_unreachable("invalid operand size");
1562     }
1563 
1564     return;
1565   }
1566 
1567   // We got int literal token.
1568   // Only sign extend inline immediates.
1569   switch (OpTy) {
1570   case AMDGPU::OPERAND_REG_IMM_INT32:
1571   case AMDGPU::OPERAND_REG_IMM_FP32:
1572   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1573   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1574   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1575   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1576     if (isSafeTruncation(Val, 32) &&
1577         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1578                                      AsmParser->hasInv2PiInlineImm())) {
1579       Inst.addOperand(MCOperand::createImm(Val));
1580       return;
1581     }
1582 
1583     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1584     return;
1585 
1586   case AMDGPU::OPERAND_REG_IMM_INT64:
1587   case AMDGPU::OPERAND_REG_IMM_FP64:
1588   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1589   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1590     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1591       Inst.addOperand(MCOperand::createImm(Val));
1592       return;
1593     }
1594 
1595     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1596     return;
1597 
1598   case AMDGPU::OPERAND_REG_IMM_INT16:
1599   case AMDGPU::OPERAND_REG_IMM_FP16:
1600   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1601   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1602     if (isSafeTruncation(Val, 16) &&
1603         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1604                                      AsmParser->hasInv2PiInlineImm())) {
1605       Inst.addOperand(MCOperand::createImm(Val));
1606       return;
1607     }
1608 
1609     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1610     return;
1611 
1612   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1613   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1614     assert(isSafeTruncation(Val, 16));
1615     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1616                                         AsmParser->hasInv2PiInlineImm()));
1617 
1618     Inst.addOperand(MCOperand::createImm(Val));
1619     return;
1620   }
1621   default:
1622     llvm_unreachable("invalid operand size");
1623   }
1624 }
1625 
1626 template <unsigned Bitwidth>
1627 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1628   APInt Literal(64, Imm.Val);
1629 
1630   if (!Imm.IsFPImm) {
1631     // We got int literal token.
1632     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1633     return;
1634   }
1635 
1636   bool Lost;
1637   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1638   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1639                     APFloat::rmNearestTiesToEven, &Lost);
1640   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1641 }
1642 
1643 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1644   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1645 }
1646 
1647 static bool isInlineValue(unsigned Reg) {
1648   switch (Reg) {
1649   case AMDGPU::SRC_SHARED_BASE:
1650   case AMDGPU::SRC_SHARED_LIMIT:
1651   case AMDGPU::SRC_PRIVATE_BASE:
1652   case AMDGPU::SRC_PRIVATE_LIMIT:
1653   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1654     return true;
1655   default:
1656     return false;
1657   }
1658 }
1659 
1660 bool AMDGPUOperand::isInlineValue() const {
1661   return isRegKind() && ::isInlineValue(getReg());
1662 }
1663 
1664 //===----------------------------------------------------------------------===//
1665 // AsmParser
1666 //===----------------------------------------------------------------------===//
1667 
1668 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1669   if (Is == IS_VGPR) {
1670     switch (RegWidth) {
1671       default: return -1;
1672       case 1: return AMDGPU::VGPR_32RegClassID;
1673       case 2: return AMDGPU::VReg_64RegClassID;
1674       case 3: return AMDGPU::VReg_96RegClassID;
1675       case 4: return AMDGPU::VReg_128RegClassID;
1676       case 8: return AMDGPU::VReg_256RegClassID;
1677       case 16: return AMDGPU::VReg_512RegClassID;
1678     }
1679   } else if (Is == IS_TTMP) {
1680     switch (RegWidth) {
1681       default: return -1;
1682       case 1: return AMDGPU::TTMP_32RegClassID;
1683       case 2: return AMDGPU::TTMP_64RegClassID;
1684       case 4: return AMDGPU::TTMP_128RegClassID;
1685       case 8: return AMDGPU::TTMP_256RegClassID;
1686       case 16: return AMDGPU::TTMP_512RegClassID;
1687     }
1688   } else if (Is == IS_SGPR) {
1689     switch (RegWidth) {
1690       default: return -1;
1691       case 1: return AMDGPU::SGPR_32RegClassID;
1692       case 2: return AMDGPU::SGPR_64RegClassID;
1693       case 4: return AMDGPU::SGPR_128RegClassID;
1694       case 8: return AMDGPU::SGPR_256RegClassID;
1695       case 16: return AMDGPU::SGPR_512RegClassID;
1696     }
1697   }
1698   return -1;
1699 }
1700 
1701 static unsigned getSpecialRegForName(StringRef RegName) {
1702   return StringSwitch<unsigned>(RegName)
1703     .Case("exec", AMDGPU::EXEC)
1704     .Case("vcc", AMDGPU::VCC)
1705     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1706     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1707     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1708     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1709     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1710     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1711     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1712     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1713     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1714     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1715     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1716     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1717     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1718     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1719     .Case("m0", AMDGPU::M0)
1720     .Case("scc", AMDGPU::SCC)
1721     .Case("tba", AMDGPU::TBA)
1722     .Case("tma", AMDGPU::TMA)
1723     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1724     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1725     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1726     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1727     .Case("vcc_lo", AMDGPU::VCC_LO)
1728     .Case("vcc_hi", AMDGPU::VCC_HI)
1729     .Case("exec_lo", AMDGPU::EXEC_LO)
1730     .Case("exec_hi", AMDGPU::EXEC_HI)
1731     .Case("tma_lo", AMDGPU::TMA_LO)
1732     .Case("tma_hi", AMDGPU::TMA_HI)
1733     .Case("tba_lo", AMDGPU::TBA_LO)
1734     .Case("tba_hi", AMDGPU::TBA_HI)
1735     .Case("null", AMDGPU::SGPR_NULL)
1736     .Default(0);
1737 }
1738 
1739 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1740                                     SMLoc &EndLoc) {
1741   auto R = parseRegister();
1742   if (!R) return true;
1743   assert(R->isReg());
1744   RegNo = R->getReg();
1745   StartLoc = R->getStartLoc();
1746   EndLoc = R->getEndLoc();
1747   return false;
1748 }
1749 
1750 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1751                                             RegisterKind RegKind, unsigned Reg1,
1752                                             unsigned RegNum) {
1753   switch (RegKind) {
1754   case IS_SPECIAL:
1755     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1756       Reg = AMDGPU::EXEC;
1757       RegWidth = 2;
1758       return true;
1759     }
1760     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1761       Reg = AMDGPU::FLAT_SCR;
1762       RegWidth = 2;
1763       return true;
1764     }
1765     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1766       Reg = AMDGPU::XNACK_MASK;
1767       RegWidth = 2;
1768       return true;
1769     }
1770     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1771       Reg = AMDGPU::VCC;
1772       RegWidth = 2;
1773       return true;
1774     }
1775     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1776       Reg = AMDGPU::TBA;
1777       RegWidth = 2;
1778       return true;
1779     }
1780     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1781       Reg = AMDGPU::TMA;
1782       RegWidth = 2;
1783       return true;
1784     }
1785     return false;
1786   case IS_VGPR:
1787   case IS_SGPR:
1788   case IS_TTMP:
1789     if (Reg1 != Reg + RegWidth) {
1790       return false;
1791     }
1792     RegWidth++;
1793     return true;
1794   default:
1795     llvm_unreachable("unexpected register kind");
1796   }
1797 }
1798 
1799 static const StringRef Registers[] = {
1800   { "v" },
1801   { "s" },
1802   { "ttmp" },
1803 };
1804 
1805 bool
1806 AMDGPUAsmParser::isRegister(const AsmToken &Token,
1807                             const AsmToken &NextToken) const {
1808 
1809   // A list of consecutive registers: [s0,s1,s2,s3]
1810   if (Token.is(AsmToken::LBrac))
1811     return true;
1812 
1813   if (!Token.is(AsmToken::Identifier))
1814     return false;
1815 
1816   // A single register like s0 or a range of registers like s[0:1]
1817 
1818   StringRef RegName = Token.getString();
1819 
1820   for (StringRef Reg : Registers) {
1821     if (RegName.startswith(Reg)) {
1822       if (Reg.size() < RegName.size()) {
1823         unsigned RegNum;
1824         // A single register with an index: rXX
1825         if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
1826           return true;
1827       } else {
1828         // A range of registers: r[XX:YY].
1829         if (NextToken.is(AsmToken::LBrac))
1830           return true;
1831       }
1832     }
1833   }
1834 
1835   return getSpecialRegForName(RegName);
1836 }
1837 
1838 bool
1839 AMDGPUAsmParser::isRegister()
1840 {
1841   return isRegister(getToken(), peekToken());
1842 }
1843 
1844 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1845                                           unsigned &RegNum, unsigned &RegWidth,
1846                                           unsigned *DwordRegIndex) {
1847   if (DwordRegIndex) { *DwordRegIndex = 0; }
1848   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1849   if (getLexer().is(AsmToken::Identifier)) {
1850     StringRef RegName = Parser.getTok().getString();
1851     if ((Reg = getSpecialRegForName(RegName))) {
1852       Parser.Lex();
1853       RegKind = IS_SPECIAL;
1854     } else {
1855       unsigned RegNumIndex = 0;
1856       if (RegName[0] == 'v') {
1857         RegNumIndex = 1;
1858         RegKind = IS_VGPR;
1859       } else if (RegName[0] == 's') {
1860         RegNumIndex = 1;
1861         RegKind = IS_SGPR;
1862       } else if (RegName.startswith("ttmp")) {
1863         RegNumIndex = strlen("ttmp");
1864         RegKind = IS_TTMP;
1865       } else {
1866         return false;
1867       }
1868       if (RegName.size() > RegNumIndex) {
1869         // Single 32-bit register: vXX.
1870         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1871           return false;
1872         Parser.Lex();
1873         RegWidth = 1;
1874       } else {
1875         // Range of registers: v[XX:YY]. ":YY" is optional.
1876         Parser.Lex();
1877         int64_t RegLo, RegHi;
1878         if (getLexer().isNot(AsmToken::LBrac))
1879           return false;
1880         Parser.Lex();
1881 
1882         if (getParser().parseAbsoluteExpression(RegLo))
1883           return false;
1884 
1885         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1886         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1887           return false;
1888         Parser.Lex();
1889 
1890         if (isRBrace) {
1891           RegHi = RegLo;
1892         } else {
1893           if (getParser().parseAbsoluteExpression(RegHi))
1894             return false;
1895 
1896           if (getLexer().isNot(AsmToken::RBrac))
1897             return false;
1898           Parser.Lex();
1899         }
1900         RegNum = (unsigned) RegLo;
1901         RegWidth = (RegHi - RegLo) + 1;
1902       }
1903     }
1904   } else if (getLexer().is(AsmToken::LBrac)) {
1905     // List of consecutive registers: [s0,s1,s2,s3]
1906     Parser.Lex();
1907     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1908       return false;
1909     if (RegWidth != 1)
1910       return false;
1911     RegisterKind RegKind1;
1912     unsigned Reg1, RegNum1, RegWidth1;
1913     do {
1914       if (getLexer().is(AsmToken::Comma)) {
1915         Parser.Lex();
1916       } else if (getLexer().is(AsmToken::RBrac)) {
1917         Parser.Lex();
1918         break;
1919       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1920         if (RegWidth1 != 1) {
1921           return false;
1922         }
1923         if (RegKind1 != RegKind) {
1924           return false;
1925         }
1926         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1927           return false;
1928         }
1929       } else {
1930         return false;
1931       }
1932     } while (true);
1933   } else {
1934     return false;
1935   }
1936   switch (RegKind) {
1937   case IS_SPECIAL:
1938     RegNum = 0;
1939     RegWidth = 1;
1940     break;
1941   case IS_VGPR:
1942   case IS_SGPR:
1943   case IS_TTMP:
1944   {
1945     unsigned Size = 1;
1946     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1947       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1948       Size = std::min(RegWidth, 4u);
1949     }
1950     if (RegNum % Size != 0)
1951       return false;
1952     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1953     RegNum = RegNum / Size;
1954     int RCID = getRegClass(RegKind, RegWidth);
1955     if (RCID == -1)
1956       return false;
1957     const MCRegisterClass RC = TRI->getRegClass(RCID);
1958     if (RegNum >= RC.getNumRegs())
1959       return false;
1960     Reg = RC.getRegister(RegNum);
1961     break;
1962   }
1963 
1964   default:
1965     llvm_unreachable("unexpected register kind");
1966   }
1967 
1968   if (!subtargetHasRegister(*TRI, Reg))
1969     return false;
1970   return true;
1971 }
1972 
1973 Optional<StringRef>
1974 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
1975   switch (RegKind) {
1976   case IS_VGPR:
1977     return StringRef(".amdgcn.next_free_vgpr");
1978   case IS_SGPR:
1979     return StringRef(".amdgcn.next_free_sgpr");
1980   default:
1981     return None;
1982   }
1983 }
1984 
1985 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
1986   auto SymbolName = getGprCountSymbolName(RegKind);
1987   assert(SymbolName && "initializing invalid register kind");
1988   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1989   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
1990 }
1991 
1992 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
1993                                             unsigned DwordRegIndex,
1994                                             unsigned RegWidth) {
1995   // Symbols are only defined for GCN targets
1996   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
1997     return true;
1998 
1999   auto SymbolName = getGprCountSymbolName(RegKind);
2000   if (!SymbolName)
2001     return true;
2002   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2003 
2004   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2005   int64_t OldCount;
2006 
2007   if (!Sym->isVariable())
2008     return !Error(getParser().getTok().getLoc(),
2009                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2010   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2011     return !Error(
2012         getParser().getTok().getLoc(),
2013         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2014 
2015   if (OldCount <= NewMax)
2016     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2017 
2018   return true;
2019 }
2020 
2021 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2022   const auto &Tok = Parser.getTok();
2023   SMLoc StartLoc = Tok.getLoc();
2024   SMLoc EndLoc = Tok.getEndLoc();
2025   RegisterKind RegKind;
2026   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2027 
2028   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2029     //FIXME: improve error messages (bug 41303).
2030     Error(StartLoc, "not a valid operand.");
2031     return nullptr;
2032   }
2033   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2034     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2035       return nullptr;
2036   } else
2037     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2038   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2039 }
2040 
2041 bool
2042 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier) {
2043   if (HasSP3AbsModifier) {
2044     // This is a workaround for handling expressions
2045     // as arguments of SP3 'abs' modifier, for example:
2046     //     |1.0|
2047     //     |-1|
2048     //     |1+x|
2049     // This syntax is not compatible with syntax of standard
2050     // MC expressions (due to the trailing '|').
2051 
2052     SMLoc EndLoc;
2053     const MCExpr *Expr;
2054     SMLoc StartLoc = getLoc();
2055 
2056     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
2057       return true;
2058     }
2059 
2060     if (!Expr->evaluateAsAbsolute(Val))
2061       return Error(StartLoc, "expected absolute expression");
2062 
2063     return false;
2064   }
2065 
2066   return getParser().parseAbsoluteExpression(Val);
2067 }
2068 
2069 OperandMatchResultTy
2070 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2071   // TODO: add syntactic sugar for 1/(2*PI)
2072 
2073   const auto& Tok = getToken();
2074   const auto& NextTok = peekToken();
2075   bool IsReal = Tok.is(AsmToken::Real);
2076   SMLoc S = Tok.getLoc();
2077   bool Negate = false;
2078 
2079   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2080     lex();
2081     IsReal = true;
2082     Negate = true;
2083   }
2084 
2085   if (IsReal) {
2086     // Floating-point expressions are not supported.
2087     // Can only allow floating-point literals with an
2088     // optional sign.
2089 
2090     StringRef Num = getTokenStr();
2091     lex();
2092 
2093     APFloat RealVal(APFloat::IEEEdouble());
2094     auto roundMode = APFloat::rmNearestTiesToEven;
2095     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2096       return MatchOperand_ParseFail;
2097     }
2098     if (Negate)
2099       RealVal.changeSign();
2100 
2101     Operands.push_back(
2102       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2103                                AMDGPUOperand::ImmTyNone, true));
2104 
2105     return MatchOperand_Success;
2106 
2107     // FIXME: Should enable arbitrary expressions here
2108   } else if (Tok.is(AsmToken::Integer) ||
2109              (Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Integer))){
2110 
2111     int64_t IntVal;
2112     if (parseAbsoluteExpr(IntVal, HasSP3AbsModifier))
2113       return MatchOperand_ParseFail;
2114 
2115     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2116     return MatchOperand_Success;
2117   }
2118 
2119   return MatchOperand_NoMatch;
2120 }
2121 
2122 OperandMatchResultTy
2123 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2124   if (!isRegister())
2125     return MatchOperand_NoMatch;
2126 
2127   if (auto R = parseRegister()) {
2128     assert(R->isReg());
2129     Operands.push_back(std::move(R));
2130     return MatchOperand_Success;
2131   }
2132   return MatchOperand_ParseFail;
2133 }
2134 
2135 OperandMatchResultTy
2136 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2137   auto res = parseReg(Operands);
2138   return (res == MatchOperand_NoMatch)?
2139          parseImm(Operands, HasSP3AbsMod) :
2140          res;
2141 }
2142 
2143 // Check if the current token is an SP3 'neg' modifier.
2144 // Currently this modifier is allowed in the following context:
2145 //
2146 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2147 // 2. Before an 'abs' modifier: -abs(...)
2148 // 3. Before an SP3 'abs' modifier: -|...|
2149 //
2150 // In all other cases "-" is handled as a part
2151 // of an expression that follows the sign.
2152 //
2153 // Note: When "-" is followed by an integer literal,
2154 // this is interpreted as integer negation rather
2155 // than a floating-point NEG modifier applied to N.
2156 // Beside being contr-intuitive, such use of floating-point
2157 // NEG modifier would have resulted in different meaning
2158 // of integer literals used with VOP1/2/C and VOP3,
2159 // for example:
2160 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2161 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2162 // Negative fp literals with preceding "-" are
2163 // handled likewise for unifomtity
2164 //
2165 bool
2166 AMDGPUAsmParser::parseSP3NegModifier() {
2167 
2168   AsmToken NextToken[2];
2169   peekTokens(NextToken);
2170 
2171   if (isToken(AsmToken::Minus) &&
2172       (isRegister(NextToken[0], NextToken[1]) ||
2173        NextToken[0].is(AsmToken::Pipe) ||
2174        isId(NextToken[0], "abs"))) {
2175     lex();
2176     return true;
2177   }
2178 
2179   return false;
2180 }
2181 
2182 OperandMatchResultTy
2183 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2184                                               bool AllowImm) {
2185   bool Neg, SP3Neg;
2186   bool Abs, SP3Abs;
2187   SMLoc Loc;
2188 
2189   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2190   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2191     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2192     return MatchOperand_ParseFail;
2193   }
2194 
2195   SP3Neg = parseSP3NegModifier();
2196 
2197   Loc = getLoc();
2198   Neg = trySkipId("neg");
2199   if (Neg && SP3Neg) {
2200     Error(Loc, "expected register or immediate");
2201     return MatchOperand_ParseFail;
2202   }
2203   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2204     return MatchOperand_ParseFail;
2205 
2206   Abs = trySkipId("abs");
2207   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2208     return MatchOperand_ParseFail;
2209 
2210   Loc = getLoc();
2211   SP3Abs = trySkipToken(AsmToken::Pipe);
2212   if (Abs && SP3Abs) {
2213     Error(Loc, "expected register or immediate");
2214     return MatchOperand_ParseFail;
2215   }
2216 
2217   OperandMatchResultTy Res;
2218   if (AllowImm) {
2219     Res = parseRegOrImm(Operands, SP3Abs);
2220   } else {
2221     Res = parseReg(Operands);
2222   }
2223   if (Res != MatchOperand_Success) {
2224     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2225   }
2226 
2227   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2228     return MatchOperand_ParseFail;
2229   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2230     return MatchOperand_ParseFail;
2231   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2232     return MatchOperand_ParseFail;
2233 
2234   AMDGPUOperand::Modifiers Mods;
2235   Mods.Abs = Abs || SP3Abs;
2236   Mods.Neg = Neg || SP3Neg;
2237 
2238   if (Mods.hasFPModifiers()) {
2239     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2240     Op.setModifiers(Mods);
2241   }
2242   return MatchOperand_Success;
2243 }
2244 
2245 OperandMatchResultTy
2246 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2247                                                bool AllowImm) {
2248   bool Sext = trySkipId("sext");
2249   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2250     return MatchOperand_ParseFail;
2251 
2252   OperandMatchResultTy Res;
2253   if (AllowImm) {
2254     Res = parseRegOrImm(Operands);
2255   } else {
2256     Res = parseReg(Operands);
2257   }
2258   if (Res != MatchOperand_Success) {
2259     return Sext? MatchOperand_ParseFail : Res;
2260   }
2261 
2262   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2263     return MatchOperand_ParseFail;
2264 
2265   AMDGPUOperand::Modifiers Mods;
2266   Mods.Sext = Sext;
2267 
2268   if (Mods.hasIntModifiers()) {
2269     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2270     Op.setModifiers(Mods);
2271   }
2272 
2273   return MatchOperand_Success;
2274 }
2275 
2276 OperandMatchResultTy
2277 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2278   return parseRegOrImmWithFPInputMods(Operands, false);
2279 }
2280 
2281 OperandMatchResultTy
2282 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2283   return parseRegOrImmWithIntInputMods(Operands, false);
2284 }
2285 
2286 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2287   auto Loc = getLoc();
2288   if (trySkipId("off")) {
2289     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2290                                                 AMDGPUOperand::ImmTyOff, false));
2291     return MatchOperand_Success;
2292   }
2293 
2294   if (!isRegister())
2295     return MatchOperand_NoMatch;
2296 
2297   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2298   if (Reg) {
2299     Operands.push_back(std::move(Reg));
2300     return MatchOperand_Success;
2301   }
2302 
2303   return MatchOperand_ParseFail;
2304 
2305 }
2306 
2307 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2308   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2309 
2310   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2311       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2312       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2313       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2314     return Match_InvalidOperand;
2315 
2316   if ((TSFlags & SIInstrFlags::VOP3) &&
2317       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2318       getForcedEncodingSize() != 64)
2319     return Match_PreferE32;
2320 
2321   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2322       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2323     // v_mac_f32/16 allow only dst_sel == DWORD;
2324     auto OpNum =
2325         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2326     const auto &Op = Inst.getOperand(OpNum);
2327     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2328       return Match_InvalidOperand;
2329     }
2330   }
2331 
2332   if (TSFlags & SIInstrFlags::FLAT) {
2333     // FIXME: Produces error without correct column reported.
2334     auto Opcode = Inst.getOpcode();
2335     auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
2336 
2337     const auto &Op = Inst.getOperand(OpNum);
2338     if (!hasFlatOffsets() && Op.getImm() != 0)
2339       return Match_InvalidOperand;
2340 
2341     // GFX10: Address offset is 12-bit signed byte offset. Must be positive for
2342     // FLAT segment. For FLAT segment MSB is ignored and forced to zero.
2343     if (isGFX10()) {
2344       if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
2345         if (!isInt<12>(Op.getImm()))
2346           return Match_InvalidOperand;
2347       } else {
2348         if (!isUInt<11>(Op.getImm()))
2349           return Match_InvalidOperand;
2350       }
2351     }
2352   }
2353 
2354   return Match_Success;
2355 }
2356 
2357 // What asm variants we should check
2358 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2359   if (getForcedEncodingSize() == 32) {
2360     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2361     return makeArrayRef(Variants);
2362   }
2363 
2364   if (isForcedVOP3()) {
2365     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2366     return makeArrayRef(Variants);
2367   }
2368 
2369   if (isForcedSDWA()) {
2370     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2371                                         AMDGPUAsmVariants::SDWA9};
2372     return makeArrayRef(Variants);
2373   }
2374 
2375   if (isForcedDPP()) {
2376     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2377     return makeArrayRef(Variants);
2378   }
2379 
2380   static const unsigned Variants[] = {
2381     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2382     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2383   };
2384 
2385   return makeArrayRef(Variants);
2386 }
2387 
2388 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2389   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2390   const unsigned Num = Desc.getNumImplicitUses();
2391   for (unsigned i = 0; i < Num; ++i) {
2392     unsigned Reg = Desc.ImplicitUses[i];
2393     switch (Reg) {
2394     case AMDGPU::FLAT_SCR:
2395     case AMDGPU::VCC:
2396     case AMDGPU::VCC_LO:
2397     case AMDGPU::VCC_HI:
2398     case AMDGPU::M0:
2399     case AMDGPU::SGPR_NULL:
2400       return Reg;
2401     default:
2402       break;
2403     }
2404   }
2405   return AMDGPU::NoRegister;
2406 }
2407 
2408 // NB: This code is correct only when used to check constant
2409 // bus limitations because GFX7 support no f16 inline constants.
2410 // Note that there are no cases when a GFX7 opcode violates
2411 // constant bus limitations due to the use of an f16 constant.
2412 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2413                                        unsigned OpIdx) const {
2414   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2415 
2416   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2417     return false;
2418   }
2419 
2420   const MCOperand &MO = Inst.getOperand(OpIdx);
2421 
2422   int64_t Val = MO.getImm();
2423   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2424 
2425   switch (OpSize) { // expected operand size
2426   case 8:
2427     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2428   case 4:
2429     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2430   case 2: {
2431     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2432     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2433         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2434         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2435         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2436       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2437     } else {
2438       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2439     }
2440   }
2441   default:
2442     llvm_unreachable("invalid operand size");
2443   }
2444 }
2445 
2446 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2447   const MCOperand &MO = Inst.getOperand(OpIdx);
2448   if (MO.isImm()) {
2449     return !isInlineConstant(Inst, OpIdx);
2450   }
2451   return !MO.isReg() ||
2452          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2453 }
2454 
2455 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2456   const unsigned Opcode = Inst.getOpcode();
2457   const MCInstrDesc &Desc = MII.get(Opcode);
2458   unsigned ConstantBusUseCount = 0;
2459   unsigned NumLiterals = 0;
2460   unsigned LiteralSize;
2461 
2462   if (Desc.TSFlags &
2463       (SIInstrFlags::VOPC |
2464        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2465        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2466        SIInstrFlags::SDWA)) {
2467     // Check special imm operands (used by madmk, etc)
2468     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2469       ++ConstantBusUseCount;
2470     }
2471 
2472     SmallDenseSet<unsigned> SGPRsUsed;
2473     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2474     if (SGPRUsed != AMDGPU::NoRegister) {
2475       SGPRsUsed.insert(SGPRUsed);
2476       ++ConstantBusUseCount;
2477     }
2478 
2479     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2480     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2481     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2482 
2483     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2484 
2485     for (int OpIdx : OpIndices) {
2486       if (OpIdx == -1) break;
2487 
2488       const MCOperand &MO = Inst.getOperand(OpIdx);
2489       if (usesConstantBus(Inst, OpIdx)) {
2490         if (MO.isReg()) {
2491           const unsigned Reg = mc2PseudoReg(MO.getReg());
2492           // Pairs of registers with a partial intersections like these
2493           //   s0, s[0:1]
2494           //   flat_scratch_lo, flat_scratch
2495           //   flat_scratch_lo, flat_scratch_hi
2496           // are theoretically valid but they are disabled anyway.
2497           // Note that this code mimics SIInstrInfo::verifyInstruction
2498           if (!SGPRsUsed.count(Reg)) {
2499             SGPRsUsed.insert(Reg);
2500             ++ConstantBusUseCount;
2501           }
2502           SGPRUsed = Reg;
2503         } else { // Expression or a literal
2504 
2505           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2506             continue; // special operand like VINTERP attr_chan
2507 
2508           // An instruction may use only one literal.
2509           // This has been validated on the previous step.
2510           // See validateVOP3Literal.
2511           // This literal may be used as more than one operand.
2512           // If all these operands are of the same size,
2513           // this literal counts as one scalar value.
2514           // Otherwise it counts as 2 scalar values.
2515           // See "GFX10 Shader Programming", section 3.6.2.3.
2516 
2517           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2518           if (Size < 4) Size = 4;
2519 
2520           if (NumLiterals == 0) {
2521             NumLiterals = 1;
2522             LiteralSize = Size;
2523           } else if (LiteralSize != Size) {
2524             NumLiterals = 2;
2525           }
2526         }
2527       }
2528     }
2529   }
2530   ConstantBusUseCount += NumLiterals;
2531 
2532   if (isGFX10())
2533     return ConstantBusUseCount <= 2;
2534 
2535   return ConstantBusUseCount <= 1;
2536 }
2537 
2538 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2539   const unsigned Opcode = Inst.getOpcode();
2540   const MCInstrDesc &Desc = MII.get(Opcode);
2541 
2542   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2543   if (DstIdx == -1 ||
2544       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2545     return true;
2546   }
2547 
2548   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2549 
2550   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2551   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2552   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2553 
2554   assert(DstIdx != -1);
2555   const MCOperand &Dst = Inst.getOperand(DstIdx);
2556   assert(Dst.isReg());
2557   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2558 
2559   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2560 
2561   for (int SrcIdx : SrcIndices) {
2562     if (SrcIdx == -1) break;
2563     const MCOperand &Src = Inst.getOperand(SrcIdx);
2564     if (Src.isReg()) {
2565       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2566       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2567         return false;
2568       }
2569     }
2570   }
2571 
2572   return true;
2573 }
2574 
2575 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2576 
2577   const unsigned Opc = Inst.getOpcode();
2578   const MCInstrDesc &Desc = MII.get(Opc);
2579 
2580   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2581     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2582     assert(ClampIdx != -1);
2583     return Inst.getOperand(ClampIdx).getImm() == 0;
2584   }
2585 
2586   return true;
2587 }
2588 
2589 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2590 
2591   const unsigned Opc = Inst.getOpcode();
2592   const MCInstrDesc &Desc = MII.get(Opc);
2593 
2594   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2595     return true;
2596 
2597   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2598   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2599   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2600 
2601   assert(VDataIdx != -1);
2602   assert(DMaskIdx != -1);
2603   assert(TFEIdx != -1);
2604 
2605   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2606   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2607   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2608   if (DMask == 0)
2609     DMask = 1;
2610 
2611   unsigned DataSize =
2612     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2613   if (hasPackedD16()) {
2614     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2615     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2616       DataSize = (DataSize + 1) / 2;
2617   }
2618 
2619   return (VDataSize / 4) == DataSize + TFESize;
2620 }
2621 
2622 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2623   const unsigned Opc = Inst.getOpcode();
2624   const MCInstrDesc &Desc = MII.get(Opc);
2625 
2626   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2627     return true;
2628 
2629   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2630   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2631       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2632   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2633   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2634   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2635 
2636   assert(VAddr0Idx != -1);
2637   assert(SrsrcIdx != -1);
2638   assert(DimIdx != -1);
2639   assert(SrsrcIdx > VAddr0Idx);
2640 
2641   unsigned Dim = Inst.getOperand(DimIdx).getImm();
2642   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2643   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2644   unsigned VAddrSize =
2645       IsNSA ? SrsrcIdx - VAddr0Idx
2646             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2647 
2648   unsigned AddrSize = BaseOpcode->NumExtraArgs +
2649                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2650                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2651                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2652   if (!IsNSA) {
2653     if (AddrSize > 8)
2654       AddrSize = 16;
2655     else if (AddrSize > 4)
2656       AddrSize = 8;
2657   }
2658 
2659   return VAddrSize == AddrSize;
2660 }
2661 
2662 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2663 
2664   const unsigned Opc = Inst.getOpcode();
2665   const MCInstrDesc &Desc = MII.get(Opc);
2666 
2667   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2668     return true;
2669   if (!Desc.mayLoad() || !Desc.mayStore())
2670     return true; // Not atomic
2671 
2672   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2673   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2674 
2675   // This is an incomplete check because image_atomic_cmpswap
2676   // may only use 0x3 and 0xf while other atomic operations
2677   // may use 0x1 and 0x3. However these limitations are
2678   // verified when we check that dmask matches dst size.
2679   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2680 }
2681 
2682 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2683 
2684   const unsigned Opc = Inst.getOpcode();
2685   const MCInstrDesc &Desc = MII.get(Opc);
2686 
2687   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2688     return true;
2689 
2690   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2691   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2692 
2693   // GATHER4 instructions use dmask in a different fashion compared to
2694   // other MIMG instructions. The only useful DMASK values are
2695   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2696   // (red,red,red,red) etc.) The ISA document doesn't mention
2697   // this.
2698   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2699 }
2700 
2701 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2702 
2703   const unsigned Opc = Inst.getOpcode();
2704   const MCInstrDesc &Desc = MII.get(Opc);
2705 
2706   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2707     return true;
2708 
2709   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2710   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2711     if (isCI() || isSI())
2712       return false;
2713   }
2714 
2715   return true;
2716 }
2717 
2718 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
2719   const unsigned Opc = Inst.getOpcode();
2720   const MCInstrDesc &Desc = MII.get(Opc);
2721 
2722   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2723     return true;
2724 
2725   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2726   if (DimIdx < 0)
2727     return true;
2728 
2729   long Imm = Inst.getOperand(DimIdx).getImm();
2730   if (Imm < 0 || Imm >= 8)
2731     return false;
2732 
2733   return true;
2734 }
2735 
2736 static bool IsRevOpcode(const unsigned Opcode)
2737 {
2738   switch (Opcode) {
2739   case AMDGPU::V_SUBREV_F32_e32:
2740   case AMDGPU::V_SUBREV_F32_e64:
2741   case AMDGPU::V_SUBREV_F32_e32_gfx10:
2742   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
2743   case AMDGPU::V_SUBREV_F32_e32_vi:
2744   case AMDGPU::V_SUBREV_F32_e64_gfx10:
2745   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
2746   case AMDGPU::V_SUBREV_F32_e64_vi:
2747 
2748   case AMDGPU::V_SUBREV_I32_e32:
2749   case AMDGPU::V_SUBREV_I32_e64:
2750   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
2751   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
2752 
2753   case AMDGPU::V_SUBBREV_U32_e32:
2754   case AMDGPU::V_SUBBREV_U32_e64:
2755   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
2756   case AMDGPU::V_SUBBREV_U32_e32_vi:
2757   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
2758   case AMDGPU::V_SUBBREV_U32_e64_vi:
2759 
2760   case AMDGPU::V_SUBREV_U32_e32:
2761   case AMDGPU::V_SUBREV_U32_e64:
2762   case AMDGPU::V_SUBREV_U32_e32_gfx9:
2763   case AMDGPU::V_SUBREV_U32_e32_vi:
2764   case AMDGPU::V_SUBREV_U32_e64_gfx9:
2765   case AMDGPU::V_SUBREV_U32_e64_vi:
2766 
2767   case AMDGPU::V_SUBREV_F16_e32:
2768   case AMDGPU::V_SUBREV_F16_e64:
2769   case AMDGPU::V_SUBREV_F16_e32_gfx10:
2770   case AMDGPU::V_SUBREV_F16_e32_vi:
2771   case AMDGPU::V_SUBREV_F16_e64_gfx10:
2772   case AMDGPU::V_SUBREV_F16_e64_vi:
2773 
2774   case AMDGPU::V_SUBREV_U16_e32:
2775   case AMDGPU::V_SUBREV_U16_e64:
2776   case AMDGPU::V_SUBREV_U16_e32_vi:
2777   case AMDGPU::V_SUBREV_U16_e64_vi:
2778 
2779   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
2780   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
2781   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
2782 
2783   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
2784   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
2785 
2786   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
2787   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
2788 
2789   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
2790   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
2791 
2792   case AMDGPU::V_LSHRREV_B32_e32:
2793   case AMDGPU::V_LSHRREV_B32_e64:
2794   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
2795   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
2796   case AMDGPU::V_LSHRREV_B32_e32_vi:
2797   case AMDGPU::V_LSHRREV_B32_e64_vi:
2798   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
2799   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
2800 
2801   case AMDGPU::V_ASHRREV_I32_e32:
2802   case AMDGPU::V_ASHRREV_I32_e64:
2803   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
2804   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
2805   case AMDGPU::V_ASHRREV_I32_e32_vi:
2806   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
2807   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
2808   case AMDGPU::V_ASHRREV_I32_e64_vi:
2809 
2810   case AMDGPU::V_LSHLREV_B32_e32:
2811   case AMDGPU::V_LSHLREV_B32_e64:
2812   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
2813   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
2814   case AMDGPU::V_LSHLREV_B32_e32_vi:
2815   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
2816   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
2817   case AMDGPU::V_LSHLREV_B32_e64_vi:
2818 
2819   case AMDGPU::V_LSHLREV_B16_e32:
2820   case AMDGPU::V_LSHLREV_B16_e64:
2821   case AMDGPU::V_LSHLREV_B16_e32_vi:
2822   case AMDGPU::V_LSHLREV_B16_e64_vi:
2823   case AMDGPU::V_LSHLREV_B16_gfx10:
2824 
2825   case AMDGPU::V_LSHRREV_B16_e32:
2826   case AMDGPU::V_LSHRREV_B16_e64:
2827   case AMDGPU::V_LSHRREV_B16_e32_vi:
2828   case AMDGPU::V_LSHRREV_B16_e64_vi:
2829   case AMDGPU::V_LSHRREV_B16_gfx10:
2830 
2831   case AMDGPU::V_ASHRREV_I16_e32:
2832   case AMDGPU::V_ASHRREV_I16_e64:
2833   case AMDGPU::V_ASHRREV_I16_e32_vi:
2834   case AMDGPU::V_ASHRREV_I16_e64_vi:
2835   case AMDGPU::V_ASHRREV_I16_gfx10:
2836 
2837   case AMDGPU::V_LSHLREV_B64:
2838   case AMDGPU::V_LSHLREV_B64_gfx10:
2839   case AMDGPU::V_LSHLREV_B64_vi:
2840 
2841   case AMDGPU::V_LSHRREV_B64:
2842   case AMDGPU::V_LSHRREV_B64_gfx10:
2843   case AMDGPU::V_LSHRREV_B64_vi:
2844 
2845   case AMDGPU::V_ASHRREV_I64:
2846   case AMDGPU::V_ASHRREV_I64_gfx10:
2847   case AMDGPU::V_ASHRREV_I64_vi:
2848 
2849   case AMDGPU::V_PK_LSHLREV_B16:
2850   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
2851   case AMDGPU::V_PK_LSHLREV_B16_vi:
2852 
2853   case AMDGPU::V_PK_LSHRREV_B16:
2854   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
2855   case AMDGPU::V_PK_LSHRREV_B16_vi:
2856   case AMDGPU::V_PK_ASHRREV_I16:
2857   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
2858   case AMDGPU::V_PK_ASHRREV_I16_vi:
2859     return true;
2860   default:
2861     return false;
2862   }
2863 }
2864 
2865 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
2866 
2867   using namespace SIInstrFlags;
2868   const unsigned Opcode = Inst.getOpcode();
2869   const MCInstrDesc &Desc = MII.get(Opcode);
2870 
2871   // lds_direct register is defined so that it can be used
2872   // with 9-bit operands only. Ignore encodings which do not accept these.
2873   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
2874     return true;
2875 
2876   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2877   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2878   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2879 
2880   const int SrcIndices[] = { Src1Idx, Src2Idx };
2881 
2882   // lds_direct cannot be specified as either src1 or src2.
2883   for (int SrcIdx : SrcIndices) {
2884     if (SrcIdx == -1) break;
2885     const MCOperand &Src = Inst.getOperand(SrcIdx);
2886     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
2887       return false;
2888     }
2889   }
2890 
2891   if (Src0Idx == -1)
2892     return true;
2893 
2894   const MCOperand &Src = Inst.getOperand(Src0Idx);
2895   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
2896     return true;
2897 
2898   // lds_direct is specified as src0. Check additional limitations.
2899   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
2900 }
2901 
2902 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
2903   unsigned Opcode = Inst.getOpcode();
2904   const MCInstrDesc &Desc = MII.get(Opcode);
2905   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
2906     return true;
2907 
2908   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2909   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2910 
2911   const int OpIndices[] = { Src0Idx, Src1Idx };
2912 
2913   unsigned NumLiterals = 0;
2914   uint32_t LiteralValue;
2915 
2916   for (int OpIdx : OpIndices) {
2917     if (OpIdx == -1) break;
2918 
2919     const MCOperand &MO = Inst.getOperand(OpIdx);
2920     if (MO.isImm() &&
2921         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
2922         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
2923         !isInlineConstant(Inst, OpIdx)) {
2924       uint32_t Value = static_cast<uint32_t>(MO.getImm());
2925       if (NumLiterals == 0 || LiteralValue != Value) {
2926         LiteralValue = Value;
2927         ++NumLiterals;
2928       }
2929     }
2930   }
2931 
2932   return NumLiterals <= 1;
2933 }
2934 
2935 // VOP3 literal is only allowed in GFX10+ and only one can be used
2936 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
2937   unsigned Opcode = Inst.getOpcode();
2938   const MCInstrDesc &Desc = MII.get(Opcode);
2939   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
2940     return true;
2941 
2942   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2943   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2944   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2945 
2946   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2947 
2948   unsigned NumLiterals = 0;
2949   uint32_t LiteralValue;
2950 
2951   for (int OpIdx : OpIndices) {
2952     if (OpIdx == -1) break;
2953 
2954     const MCOperand &MO = Inst.getOperand(OpIdx);
2955     if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
2956       continue;
2957 
2958     if (!isInlineConstant(Inst, OpIdx)) {
2959       uint32_t Value = static_cast<uint32_t>(MO.getImm());
2960       if (NumLiterals == 0 || LiteralValue != Value) {
2961         LiteralValue = Value;
2962         ++NumLiterals;
2963       }
2964     }
2965   }
2966 
2967   return !NumLiterals ||
2968          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
2969 }
2970 
2971 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2972                                           const SMLoc &IDLoc) {
2973   if (!validateLdsDirect(Inst)) {
2974     Error(IDLoc,
2975       "invalid use of lds_direct");
2976     return false;
2977   }
2978   if (!validateSOPLiteral(Inst)) {
2979     Error(IDLoc,
2980       "only one literal operand is allowed");
2981     return false;
2982   }
2983   if (!validateVOP3Literal(Inst)) {
2984     Error(IDLoc,
2985       "invalid literal operand");
2986     return false;
2987   }
2988   if (!validateConstantBusLimitations(Inst)) {
2989     Error(IDLoc,
2990       "invalid operand (violates constant bus restrictions)");
2991     return false;
2992   }
2993   if (!validateEarlyClobberLimitations(Inst)) {
2994     Error(IDLoc,
2995       "destination must be different than all sources");
2996     return false;
2997   }
2998   if (!validateIntClampSupported(Inst)) {
2999     Error(IDLoc,
3000       "integer clamping is not supported on this GPU");
3001     return false;
3002   }
3003   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3004   if (!validateMIMGD16(Inst)) {
3005     Error(IDLoc,
3006       "d16 modifier is not supported on this GPU");
3007     return false;
3008   }
3009   if (!validateMIMGDim(Inst)) {
3010     Error(IDLoc, "dim modifier is required on this GPU");
3011     return false;
3012   }
3013   if (!validateMIMGDataSize(Inst)) {
3014     Error(IDLoc,
3015       "image data size does not match dmask and tfe");
3016     return false;
3017   }
3018   if (!validateMIMGAddrSize(Inst)) {
3019     Error(IDLoc,
3020       "image address size does not match dim and a16");
3021     return false;
3022   }
3023   if (!validateMIMGAtomicDMask(Inst)) {
3024     Error(IDLoc,
3025       "invalid atomic image dmask");
3026     return false;
3027   }
3028   if (!validateMIMGGatherDMask(Inst)) {
3029     Error(IDLoc,
3030       "invalid image_gather dmask: only one bit must be set");
3031     return false;
3032   }
3033 
3034   return true;
3035 }
3036 
3037 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3038                                             const FeatureBitset &FBS,
3039                                             unsigned VariantID = 0);
3040 
3041 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3042                                               OperandVector &Operands,
3043                                               MCStreamer &Out,
3044                                               uint64_t &ErrorInfo,
3045                                               bool MatchingInlineAsm) {
3046   MCInst Inst;
3047   unsigned Result = Match_Success;
3048   for (auto Variant : getMatchedVariants()) {
3049     uint64_t EI;
3050     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3051                                   Variant);
3052     // We order match statuses from least to most specific. We use most specific
3053     // status as resulting
3054     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3055     if ((R == Match_Success) ||
3056         (R == Match_PreferE32) ||
3057         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3058         (R == Match_InvalidOperand && Result != Match_MissingFeature
3059                                    && Result != Match_PreferE32) ||
3060         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3061                                    && Result != Match_MissingFeature
3062                                    && Result != Match_PreferE32)) {
3063       Result = R;
3064       ErrorInfo = EI;
3065     }
3066     if (R == Match_Success)
3067       break;
3068   }
3069 
3070   switch (Result) {
3071   default: break;
3072   case Match_Success:
3073     if (!validateInstruction(Inst, IDLoc)) {
3074       return true;
3075     }
3076     Inst.setLoc(IDLoc);
3077     Out.EmitInstruction(Inst, getSTI());
3078     return false;
3079 
3080   case Match_MissingFeature:
3081     return Error(IDLoc, "instruction not supported on this GPU");
3082 
3083   case Match_MnemonicFail: {
3084     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3085     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3086         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3087     return Error(IDLoc, "invalid instruction" + Suggestion,
3088                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3089   }
3090 
3091   case Match_InvalidOperand: {
3092     SMLoc ErrorLoc = IDLoc;
3093     if (ErrorInfo != ~0ULL) {
3094       if (ErrorInfo >= Operands.size()) {
3095         return Error(IDLoc, "too few operands for instruction");
3096       }
3097       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3098       if (ErrorLoc == SMLoc())
3099         ErrorLoc = IDLoc;
3100     }
3101     return Error(ErrorLoc, "invalid operand for instruction");
3102   }
3103 
3104   case Match_PreferE32:
3105     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3106                         "should be encoded as e32");
3107   }
3108   llvm_unreachable("Implement any new match types added!");
3109 }
3110 
3111 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3112   int64_t Tmp = -1;
3113   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3114     return true;
3115   }
3116   if (getParser().parseAbsoluteExpression(Tmp)) {
3117     return true;
3118   }
3119   Ret = static_cast<uint32_t>(Tmp);
3120   return false;
3121 }
3122 
3123 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3124                                                uint32_t &Minor) {
3125   if (ParseAsAbsoluteExpression(Major))
3126     return TokError("invalid major version");
3127 
3128   if (getLexer().isNot(AsmToken::Comma))
3129     return TokError("minor version number required, comma expected");
3130   Lex();
3131 
3132   if (ParseAsAbsoluteExpression(Minor))
3133     return TokError("invalid minor version");
3134 
3135   return false;
3136 }
3137 
3138 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3139   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3140     return TokError("directive only supported for amdgcn architecture");
3141 
3142   std::string Target;
3143 
3144   SMLoc TargetStart = getTok().getLoc();
3145   if (getParser().parseEscapedString(Target))
3146     return true;
3147   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3148 
3149   std::string ExpectedTarget;
3150   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3151   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3152 
3153   if (Target != ExpectedTargetOS.str())
3154     return getParser().Error(TargetRange.Start, "target must match options",
3155                              TargetRange);
3156 
3157   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3158   return false;
3159 }
3160 
3161 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3162   return getParser().Error(Range.Start, "value out of range", Range);
3163 }
3164 
3165 bool AMDGPUAsmParser::calculateGPRBlocks(
3166     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3167     bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
3168     unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
3169     unsigned &SGPRBlocks) {
3170   // TODO(scott.linder): These calculations are duplicated from
3171   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3172   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3173 
3174   unsigned NumVGPRs = NextFreeVGPR;
3175   unsigned NumSGPRs = NextFreeSGPR;
3176 
3177   if (Version.Major >= 10)
3178     NumSGPRs = 0;
3179   else {
3180     unsigned MaxAddressableNumSGPRs =
3181         IsaInfo::getAddressableNumSGPRs(&getSTI());
3182 
3183     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3184         NumSGPRs > MaxAddressableNumSGPRs)
3185       return OutOfRangeError(SGPRRange);
3186 
3187     NumSGPRs +=
3188         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3189 
3190     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3191         NumSGPRs > MaxAddressableNumSGPRs)
3192       return OutOfRangeError(SGPRRange);
3193 
3194     if (Features.test(FeatureSGPRInitBug))
3195       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3196   }
3197 
3198   VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
3199   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3200 
3201   return false;
3202 }
3203 
3204 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3205   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3206     return TokError("directive only supported for amdgcn architecture");
3207 
3208   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3209     return TokError("directive only supported for amdhsa OS");
3210 
3211   StringRef KernelName;
3212   if (getParser().parseIdentifier(KernelName))
3213     return true;
3214 
3215   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3216 
3217   StringSet<> Seen;
3218 
3219   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3220 
3221   SMRange VGPRRange;
3222   uint64_t NextFreeVGPR = 0;
3223   SMRange SGPRRange;
3224   uint64_t NextFreeSGPR = 0;
3225   unsigned UserSGPRCount = 0;
3226   bool ReserveVCC = true;
3227   bool ReserveFlatScr = true;
3228   bool ReserveXNACK = hasXNACK();
3229 
3230   while (true) {
3231     while (getLexer().is(AsmToken::EndOfStatement))
3232       Lex();
3233 
3234     if (getLexer().isNot(AsmToken::Identifier))
3235       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3236 
3237     StringRef ID = getTok().getIdentifier();
3238     SMRange IDRange = getTok().getLocRange();
3239     Lex();
3240 
3241     if (ID == ".end_amdhsa_kernel")
3242       break;
3243 
3244     if (Seen.find(ID) != Seen.end())
3245       return TokError(".amdhsa_ directives cannot be repeated");
3246     Seen.insert(ID);
3247 
3248     SMLoc ValStart = getTok().getLoc();
3249     int64_t IVal;
3250     if (getParser().parseAbsoluteExpression(IVal))
3251       return true;
3252     SMLoc ValEnd = getTok().getLoc();
3253     SMRange ValRange = SMRange(ValStart, ValEnd);
3254 
3255     if (IVal < 0)
3256       return OutOfRangeError(ValRange);
3257 
3258     uint64_t Val = IVal;
3259 
3260 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3261   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3262     return OutOfRangeError(RANGE);                                             \
3263   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3264 
3265     if (ID == ".amdhsa_group_segment_fixed_size") {
3266       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3267         return OutOfRangeError(ValRange);
3268       KD.group_segment_fixed_size = Val;
3269     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3270       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3271         return OutOfRangeError(ValRange);
3272       KD.private_segment_fixed_size = Val;
3273     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3274       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3275                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3276                        Val, ValRange);
3277       UserSGPRCount += 4;
3278     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3279       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3280                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3281                        ValRange);
3282       UserSGPRCount += 2;
3283     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3284       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3285                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3286                        ValRange);
3287       UserSGPRCount += 2;
3288     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3289       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3290                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3291                        Val, ValRange);
3292       UserSGPRCount += 2;
3293     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3294       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3295                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3296                        ValRange);
3297       UserSGPRCount += 2;
3298     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3299       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3300                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3301                        ValRange);
3302       UserSGPRCount += 2;
3303     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3304       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3305                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3306                        Val, ValRange);
3307       UserSGPRCount += 1;
3308     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3309       PARSE_BITS_ENTRY(
3310           KD.compute_pgm_rsrc2,
3311           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3312           ValRange);
3313     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3314       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3315                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3316                        ValRange);
3317     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3318       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3319                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3320                        ValRange);
3321     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3322       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3323                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3324                        ValRange);
3325     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3326       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3327                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3328                        ValRange);
3329     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3330       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3331                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3332                        ValRange);
3333     } else if (ID == ".amdhsa_next_free_vgpr") {
3334       VGPRRange = ValRange;
3335       NextFreeVGPR = Val;
3336     } else if (ID == ".amdhsa_next_free_sgpr") {
3337       SGPRRange = ValRange;
3338       NextFreeSGPR = Val;
3339     } else if (ID == ".amdhsa_reserve_vcc") {
3340       if (!isUInt<1>(Val))
3341         return OutOfRangeError(ValRange);
3342       ReserveVCC = Val;
3343     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3344       if (IVersion.Major < 7)
3345         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3346                                  IDRange);
3347       if (!isUInt<1>(Val))
3348         return OutOfRangeError(ValRange);
3349       ReserveFlatScr = Val;
3350     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3351       if (IVersion.Major < 8)
3352         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3353                                  IDRange);
3354       if (!isUInt<1>(Val))
3355         return OutOfRangeError(ValRange);
3356       ReserveXNACK = Val;
3357     } else if (ID == ".amdhsa_float_round_mode_32") {
3358       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3359                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3360     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3361       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3362                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3363     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3364       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3365                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3366     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3367       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3368                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3369                        ValRange);
3370     } else if (ID == ".amdhsa_dx10_clamp") {
3371       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3372                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3373     } else if (ID == ".amdhsa_ieee_mode") {
3374       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3375                        Val, ValRange);
3376     } else if (ID == ".amdhsa_fp16_overflow") {
3377       if (IVersion.Major < 9)
3378         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3379                                  IDRange);
3380       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3381                        ValRange);
3382     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3383       if (IVersion.Major < 10)
3384         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3385                                  IDRange);
3386       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3387                        ValRange);
3388     } else if (ID == ".amdhsa_memory_ordered") {
3389       if (IVersion.Major < 10)
3390         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3391                                  IDRange);
3392       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3393                        ValRange);
3394     } else if (ID == ".amdhsa_forward_progress") {
3395       if (IVersion.Major < 10)
3396         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3397                                  IDRange);
3398       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3399                        ValRange);
3400     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3401       PARSE_BITS_ENTRY(
3402           KD.compute_pgm_rsrc2,
3403           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3404           ValRange);
3405     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3406       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3407                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3408                        Val, ValRange);
3409     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3410       PARSE_BITS_ENTRY(
3411           KD.compute_pgm_rsrc2,
3412           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3413           ValRange);
3414     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3415       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3416                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3417                        Val, ValRange);
3418     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3419       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3420                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3421                        Val, ValRange);
3422     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3423       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3424                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3425                        Val, ValRange);
3426     } else if (ID == ".amdhsa_exception_int_div_zero") {
3427       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3428                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3429                        Val, ValRange);
3430     } else {
3431       return getParser().Error(IDRange.Start,
3432                                "unknown .amdhsa_kernel directive", IDRange);
3433     }
3434 
3435 #undef PARSE_BITS_ENTRY
3436   }
3437 
3438   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3439     return TokError(".amdhsa_next_free_vgpr directive is required");
3440 
3441   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3442     return TokError(".amdhsa_next_free_sgpr directive is required");
3443 
3444   unsigned VGPRBlocks;
3445   unsigned SGPRBlocks;
3446   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3447                          ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
3448                          SGPRRange, VGPRBlocks, SGPRBlocks))
3449     return true;
3450 
3451   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3452           VGPRBlocks))
3453     return OutOfRangeError(VGPRRange);
3454   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3455                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3456 
3457   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3458           SGPRBlocks))
3459     return OutOfRangeError(SGPRRange);
3460   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3461                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3462                   SGPRBlocks);
3463 
3464   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3465     return TokError("too many user SGPRs enabled");
3466   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3467                   UserSGPRCount);
3468 
3469   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3470       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3471       ReserveFlatScr, ReserveXNACK);
3472   return false;
3473 }
3474 
3475 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3476   uint32_t Major;
3477   uint32_t Minor;
3478 
3479   if (ParseDirectiveMajorMinor(Major, Minor))
3480     return true;
3481 
3482   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3483   return false;
3484 }
3485 
3486 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3487   uint32_t Major;
3488   uint32_t Minor;
3489   uint32_t Stepping;
3490   StringRef VendorName;
3491   StringRef ArchName;
3492 
3493   // If this directive has no arguments, then use the ISA version for the
3494   // targeted GPU.
3495   if (getLexer().is(AsmToken::EndOfStatement)) {
3496     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3497     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3498                                                       ISA.Stepping,
3499                                                       "AMD", "AMDGPU");
3500     return false;
3501   }
3502 
3503   if (ParseDirectiveMajorMinor(Major, Minor))
3504     return true;
3505 
3506   if (getLexer().isNot(AsmToken::Comma))
3507     return TokError("stepping version number required, comma expected");
3508   Lex();
3509 
3510   if (ParseAsAbsoluteExpression(Stepping))
3511     return TokError("invalid stepping version");
3512 
3513   if (getLexer().isNot(AsmToken::Comma))
3514     return TokError("vendor name required, comma expected");
3515   Lex();
3516 
3517   if (getLexer().isNot(AsmToken::String))
3518     return TokError("invalid vendor name");
3519 
3520   VendorName = getLexer().getTok().getStringContents();
3521   Lex();
3522 
3523   if (getLexer().isNot(AsmToken::Comma))
3524     return TokError("arch name required, comma expected");
3525   Lex();
3526 
3527   if (getLexer().isNot(AsmToken::String))
3528     return TokError("invalid arch name");
3529 
3530   ArchName = getLexer().getTok().getStringContents();
3531   Lex();
3532 
3533   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3534                                                     VendorName, ArchName);
3535   return false;
3536 }
3537 
3538 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3539                                                amd_kernel_code_t &Header) {
3540   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3541   // assembly for backwards compatibility.
3542   if (ID == "max_scratch_backing_memory_byte_size") {
3543     Parser.eatToEndOfStatement();
3544     return false;
3545   }
3546 
3547   SmallString<40> ErrStr;
3548   raw_svector_ostream Err(ErrStr);
3549   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3550     return TokError(Err.str());
3551   }
3552   Lex();
3553 
3554   if (ID == "enable_wgp_mode") {
3555     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
3556       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3557   }
3558 
3559   if (ID == "enable_mem_ordered") {
3560     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
3561       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3562   }
3563 
3564   if (ID == "enable_fwd_progress") {
3565     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
3566       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3567   }
3568 
3569   return false;
3570 }
3571 
3572 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3573   amd_kernel_code_t Header;
3574   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3575 
3576   while (true) {
3577     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3578     // will set the current token to EndOfStatement.
3579     while(getLexer().is(AsmToken::EndOfStatement))
3580       Lex();
3581 
3582     if (getLexer().isNot(AsmToken::Identifier))
3583       return TokError("expected value identifier or .end_amd_kernel_code_t");
3584 
3585     StringRef ID = getLexer().getTok().getIdentifier();
3586     Lex();
3587 
3588     if (ID == ".end_amd_kernel_code_t")
3589       break;
3590 
3591     if (ParseAMDKernelCodeTValue(ID, Header))
3592       return true;
3593   }
3594 
3595   getTargetStreamer().EmitAMDKernelCodeT(Header);
3596 
3597   return false;
3598 }
3599 
3600 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3601   if (getLexer().isNot(AsmToken::Identifier))
3602     return TokError("expected symbol name");
3603 
3604   StringRef KernelName = Parser.getTok().getString();
3605 
3606   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3607                                            ELF::STT_AMDGPU_HSA_KERNEL);
3608   Lex();
3609   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3610     KernelScope.initialize(getContext());
3611   return false;
3612 }
3613 
3614 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3615   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3616     return Error(getParser().getTok().getLoc(),
3617                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3618                  "architectures");
3619   }
3620 
3621   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3622 
3623   std::string ISAVersionStringFromSTI;
3624   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3625   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3626 
3627   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3628     return Error(getParser().getTok().getLoc(),
3629                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3630                  "arguments specified through the command line");
3631   }
3632 
3633   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3634   Lex();
3635 
3636   return false;
3637 }
3638 
3639 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3640   const char *AssemblerDirectiveBegin;
3641   const char *AssemblerDirectiveEnd;
3642   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3643       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3644           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3645                             HSAMD::V3::AssemblerDirectiveEnd)
3646           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3647                             HSAMD::AssemblerDirectiveEnd);
3648 
3649   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3650     return Error(getParser().getTok().getLoc(),
3651                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
3652                  "not available on non-amdhsa OSes")).str());
3653   }
3654 
3655   std::string HSAMetadataString;
3656   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
3657                           HSAMetadataString))
3658     return true;
3659 
3660   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3661     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3662       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3663   } else {
3664     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3665       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3666   }
3667 
3668   return false;
3669 }
3670 
3671 /// Common code to parse out a block of text (typically YAML) between start and
3672 /// end directives.
3673 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
3674                                           const char *AssemblerDirectiveEnd,
3675                                           std::string &CollectString) {
3676 
3677   raw_string_ostream CollectStream(CollectString);
3678 
3679   getLexer().setSkipSpace(false);
3680 
3681   bool FoundEnd = false;
3682   while (!getLexer().is(AsmToken::Eof)) {
3683     while (getLexer().is(AsmToken::Space)) {
3684       CollectStream << getLexer().getTok().getString();
3685       Lex();
3686     }
3687 
3688     if (getLexer().is(AsmToken::Identifier)) {
3689       StringRef ID = getLexer().getTok().getIdentifier();
3690       if (ID == AssemblerDirectiveEnd) {
3691         Lex();
3692         FoundEnd = true;
3693         break;
3694       }
3695     }
3696 
3697     CollectStream << Parser.parseStringToEndOfStatement()
3698                   << getContext().getAsmInfo()->getSeparatorString();
3699 
3700     Parser.eatToEndOfStatement();
3701   }
3702 
3703   getLexer().setSkipSpace(true);
3704 
3705   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3706     return TokError(Twine("expected directive ") +
3707                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
3708   }
3709 
3710   CollectStream.flush();
3711   return false;
3712 }
3713 
3714 /// Parse the assembler directive for new MsgPack-format PAL metadata.
3715 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
3716   std::string String;
3717   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
3718                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
3719     return true;
3720 
3721   auto PALMetadata = getTargetStreamer().getPALMetadata();
3722   if (!PALMetadata->setFromString(String))
3723     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
3724   return false;
3725 }
3726 
3727 /// Parse the assembler directive for old linear-format PAL metadata.
3728 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3729   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3730     return Error(getParser().getTok().getLoc(),
3731                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3732                  "not available on non-amdpal OSes")).str());
3733   }
3734 
3735   auto PALMetadata = getTargetStreamer().getPALMetadata();
3736   PALMetadata->setLegacy();
3737   for (;;) {
3738     uint32_t Key, Value;
3739     if (ParseAsAbsoluteExpression(Key)) {
3740       return TokError(Twine("invalid value in ") +
3741                       Twine(PALMD::AssemblerDirective));
3742     }
3743     if (getLexer().isNot(AsmToken::Comma)) {
3744       return TokError(Twine("expected an even number of values in ") +
3745                       Twine(PALMD::AssemblerDirective));
3746     }
3747     Lex();
3748     if (ParseAsAbsoluteExpression(Value)) {
3749       return TokError(Twine("invalid value in ") +
3750                       Twine(PALMD::AssemblerDirective));
3751     }
3752     PALMetadata->setRegister(Key, Value);
3753     if (getLexer().isNot(AsmToken::Comma))
3754       break;
3755     Lex();
3756   }
3757   return false;
3758 }
3759 
3760 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3761   StringRef IDVal = DirectiveID.getString();
3762 
3763   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3764     if (IDVal == ".amdgcn_target")
3765       return ParseDirectiveAMDGCNTarget();
3766 
3767     if (IDVal == ".amdhsa_kernel")
3768       return ParseDirectiveAMDHSAKernel();
3769 
3770     // TODO: Restructure/combine with PAL metadata directive.
3771     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3772       return ParseDirectiveHSAMetadata();
3773   } else {
3774     if (IDVal == ".hsa_code_object_version")
3775       return ParseDirectiveHSACodeObjectVersion();
3776 
3777     if (IDVal == ".hsa_code_object_isa")
3778       return ParseDirectiveHSACodeObjectISA();
3779 
3780     if (IDVal == ".amd_kernel_code_t")
3781       return ParseDirectiveAMDKernelCodeT();
3782 
3783     if (IDVal == ".amdgpu_hsa_kernel")
3784       return ParseDirectiveAMDGPUHsaKernel();
3785 
3786     if (IDVal == ".amd_amdgpu_isa")
3787       return ParseDirectiveISAVersion();
3788 
3789     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3790       return ParseDirectiveHSAMetadata();
3791   }
3792 
3793   if (IDVal == PALMD::AssemblerDirectiveBegin)
3794     return ParseDirectivePALMetadataBegin();
3795 
3796   if (IDVal == PALMD::AssemblerDirective)
3797     return ParseDirectivePALMetadata();
3798 
3799   return true;
3800 }
3801 
3802 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
3803                                            unsigned RegNo) const {
3804 
3805   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
3806        R.isValid(); ++R) {
3807     if (*R == RegNo)
3808       return isGFX9() || isGFX10();
3809   }
3810 
3811   // GFX10 has 2 more SGPRs 104 and 105.
3812   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
3813        R.isValid(); ++R) {
3814     if (*R == RegNo)
3815       return hasSGPR104_SGPR105();
3816   }
3817 
3818   switch (RegNo) {
3819   case AMDGPU::TBA:
3820   case AMDGPU::TBA_LO:
3821   case AMDGPU::TBA_HI:
3822   case AMDGPU::TMA:
3823   case AMDGPU::TMA_LO:
3824   case AMDGPU::TMA_HI:
3825     return !isGFX9() && !isGFX10();
3826   case AMDGPU::XNACK_MASK:
3827   case AMDGPU::XNACK_MASK_LO:
3828   case AMDGPU::XNACK_MASK_HI:
3829     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
3830   case AMDGPU::SGPR_NULL:
3831     return isGFX10();
3832   default:
3833     break;
3834   }
3835 
3836   if (isInlineValue(RegNo))
3837     return !isCI() && !isSI() && !isVI();
3838 
3839   if (isCI())
3840     return true;
3841 
3842   if (isSI() || isGFX10()) {
3843     // No flat_scr on SI.
3844     // On GFX10 flat scratch is not a valid register operand and can only be
3845     // accessed with s_setreg/s_getreg.
3846     switch (RegNo) {
3847     case AMDGPU::FLAT_SCR:
3848     case AMDGPU::FLAT_SCR_LO:
3849     case AMDGPU::FLAT_SCR_HI:
3850       return false;
3851     default:
3852       return true;
3853     }
3854   }
3855 
3856   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
3857   // SI/CI have.
3858   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
3859        R.isValid(); ++R) {
3860     if (*R == RegNo)
3861       return hasSGPR102_SGPR103();
3862   }
3863 
3864   return true;
3865 }
3866 
3867 OperandMatchResultTy
3868 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
3869                               OperandMode Mode) {
3870   // Try to parse with a custom parser
3871   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
3872 
3873   // If we successfully parsed the operand or if there as an error parsing,
3874   // we are done.
3875   //
3876   // If we are parsing after we reach EndOfStatement then this means we
3877   // are appending default values to the Operands list.  This is only done
3878   // by custom parser, so we shouldn't continue on to the generic parsing.
3879   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
3880       getLexer().is(AsmToken::EndOfStatement))
3881     return ResTy;
3882 
3883   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
3884     unsigned Prefix = Operands.size();
3885     SMLoc LBraceLoc = getTok().getLoc();
3886     Parser.Lex(); // eat the '['
3887 
3888     for (;;) {
3889       ResTy = parseReg(Operands);
3890       if (ResTy != MatchOperand_Success)
3891         return ResTy;
3892 
3893       if (getLexer().is(AsmToken::RBrac))
3894         break;
3895 
3896       if (getLexer().isNot(AsmToken::Comma))
3897         return MatchOperand_ParseFail;
3898       Parser.Lex();
3899     }
3900 
3901     if (Operands.size() - Prefix > 1) {
3902       Operands.insert(Operands.begin() + Prefix,
3903                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
3904       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
3905                                                     getTok().getLoc()));
3906     }
3907 
3908     Parser.Lex(); // eat the ']'
3909     return MatchOperand_Success;
3910   }
3911 
3912   ResTy = parseRegOrImm(Operands);
3913 
3914   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
3915     return ResTy;
3916 
3917   const auto &Tok = Parser.getTok();
3918   SMLoc S = Tok.getLoc();
3919 
3920   const MCExpr *Expr = nullptr;
3921   if (!Parser.parseExpression(Expr)) {
3922     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3923     return MatchOperand_Success;
3924   }
3925 
3926   // Possibly this is an instruction flag like 'gds'.
3927   if (Tok.getKind() == AsmToken::Identifier) {
3928     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
3929     Parser.Lex();
3930     return MatchOperand_Success;
3931   }
3932 
3933   return MatchOperand_NoMatch;
3934 }
3935 
3936 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
3937   // Clear any forced encodings from the previous instruction.
3938   setForcedEncodingSize(0);
3939   setForcedDPP(false);
3940   setForcedSDWA(false);
3941 
3942   if (Name.endswith("_e64")) {
3943     setForcedEncodingSize(64);
3944     return Name.substr(0, Name.size() - 4);
3945   } else if (Name.endswith("_e32")) {
3946     setForcedEncodingSize(32);
3947     return Name.substr(0, Name.size() - 4);
3948   } else if (Name.endswith("_dpp")) {
3949     setForcedDPP(true);
3950     return Name.substr(0, Name.size() - 4);
3951   } else if (Name.endswith("_sdwa")) {
3952     setForcedSDWA(true);
3953     return Name.substr(0, Name.size() - 5);
3954   }
3955   return Name;
3956 }
3957 
3958 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
3959                                        StringRef Name,
3960                                        SMLoc NameLoc, OperandVector &Operands) {
3961   // Add the instruction mnemonic
3962   Name = parseMnemonicSuffix(Name);
3963   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
3964 
3965   bool IsMIMG = Name.startswith("image_");
3966 
3967   while (!getLexer().is(AsmToken::EndOfStatement)) {
3968     OperandMode Mode = OperandMode_Default;
3969     if (IsMIMG && isGFX10() && Operands.size() == 2)
3970       Mode = OperandMode_NSA;
3971     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
3972 
3973     // Eat the comma or space if there is one.
3974     if (getLexer().is(AsmToken::Comma))
3975       Parser.Lex();
3976 
3977     switch (Res) {
3978       case MatchOperand_Success: break;
3979       case MatchOperand_ParseFail:
3980         Error(getLexer().getLoc(), "failed parsing operand.");
3981         while (!getLexer().is(AsmToken::EndOfStatement)) {
3982           Parser.Lex();
3983         }
3984         return true;
3985       case MatchOperand_NoMatch:
3986         Error(getLexer().getLoc(), "not a valid operand.");
3987         while (!getLexer().is(AsmToken::EndOfStatement)) {
3988           Parser.Lex();
3989         }
3990         return true;
3991     }
3992   }
3993 
3994   return false;
3995 }
3996 
3997 //===----------------------------------------------------------------------===//
3998 // Utility functions
3999 //===----------------------------------------------------------------------===//
4000 
4001 OperandMatchResultTy
4002 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
4003   switch(getLexer().getKind()) {
4004     default: return MatchOperand_NoMatch;
4005     case AsmToken::Identifier: {
4006       StringRef Name = Parser.getTok().getString();
4007       if (!Name.equals(Prefix)) {
4008         return MatchOperand_NoMatch;
4009       }
4010 
4011       Parser.Lex();
4012       if (getLexer().isNot(AsmToken::Colon))
4013         return MatchOperand_ParseFail;
4014 
4015       Parser.Lex();
4016 
4017       bool IsMinus = false;
4018       if (getLexer().getKind() == AsmToken::Minus) {
4019         Parser.Lex();
4020         IsMinus = true;
4021       }
4022 
4023       if (getLexer().isNot(AsmToken::Integer))
4024         return MatchOperand_ParseFail;
4025 
4026       if (getParser().parseAbsoluteExpression(Int))
4027         return MatchOperand_ParseFail;
4028 
4029       if (IsMinus)
4030         Int = -Int;
4031       break;
4032     }
4033   }
4034   return MatchOperand_Success;
4035 }
4036 
4037 OperandMatchResultTy
4038 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4039                                     AMDGPUOperand::ImmTy ImmTy,
4040                                     bool (*ConvertResult)(int64_t&)) {
4041   SMLoc S = Parser.getTok().getLoc();
4042   int64_t Value = 0;
4043 
4044   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4045   if (Res != MatchOperand_Success)
4046     return Res;
4047 
4048   if (ConvertResult && !ConvertResult(Value)) {
4049     return MatchOperand_ParseFail;
4050   }
4051 
4052   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4053   return MatchOperand_Success;
4054 }
4055 
4056 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
4057   const char *Prefix,
4058   OperandVector &Operands,
4059   AMDGPUOperand::ImmTy ImmTy,
4060   bool (*ConvertResult)(int64_t&)) {
4061   StringRef Name = Parser.getTok().getString();
4062   if (!Name.equals(Prefix))
4063     return MatchOperand_NoMatch;
4064 
4065   Parser.Lex();
4066   if (getLexer().isNot(AsmToken::Colon))
4067     return MatchOperand_ParseFail;
4068 
4069   Parser.Lex();
4070   if (getLexer().isNot(AsmToken::LBrac))
4071     return MatchOperand_ParseFail;
4072   Parser.Lex();
4073 
4074   unsigned Val = 0;
4075   SMLoc S = Parser.getTok().getLoc();
4076 
4077   // FIXME: How to verify the number of elements matches the number of src
4078   // operands?
4079   for (int I = 0; I < 4; ++I) {
4080     if (I != 0) {
4081       if (getLexer().is(AsmToken::RBrac))
4082         break;
4083 
4084       if (getLexer().isNot(AsmToken::Comma))
4085         return MatchOperand_ParseFail;
4086       Parser.Lex();
4087     }
4088 
4089     if (getLexer().isNot(AsmToken::Integer))
4090       return MatchOperand_ParseFail;
4091 
4092     int64_t Op;
4093     if (getParser().parseAbsoluteExpression(Op))
4094       return MatchOperand_ParseFail;
4095 
4096     if (Op != 0 && Op != 1)
4097       return MatchOperand_ParseFail;
4098     Val |= (Op << I);
4099   }
4100 
4101   Parser.Lex();
4102   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4103   return MatchOperand_Success;
4104 }
4105 
4106 OperandMatchResultTy
4107 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4108                                AMDGPUOperand::ImmTy ImmTy) {
4109   int64_t Bit = 0;
4110   SMLoc S = Parser.getTok().getLoc();
4111 
4112   // We are at the end of the statement, and this is a default argument, so
4113   // use a default value.
4114   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4115     switch(getLexer().getKind()) {
4116       case AsmToken::Identifier: {
4117         StringRef Tok = Parser.getTok().getString();
4118         if (Tok == Name) {
4119           if (Tok == "r128" && isGFX9())
4120             Error(S, "r128 modifier is not supported on this GPU");
4121           if (Tok == "a16" && !isGFX9())
4122             Error(S, "a16 modifier is not supported on this GPU");
4123           Bit = 1;
4124           Parser.Lex();
4125         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4126           Bit = 0;
4127           Parser.Lex();
4128         } else {
4129           return MatchOperand_NoMatch;
4130         }
4131         break;
4132       }
4133       default:
4134         return MatchOperand_NoMatch;
4135     }
4136   }
4137 
4138   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4139     return MatchOperand_ParseFail;
4140 
4141   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4142   return MatchOperand_Success;
4143 }
4144 
4145 static void addOptionalImmOperand(
4146   MCInst& Inst, const OperandVector& Operands,
4147   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4148   AMDGPUOperand::ImmTy ImmT,
4149   int64_t Default = 0) {
4150   auto i = OptionalIdx.find(ImmT);
4151   if (i != OptionalIdx.end()) {
4152     unsigned Idx = i->second;
4153     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4154   } else {
4155     Inst.addOperand(MCOperand::createImm(Default));
4156   }
4157 }
4158 
4159 OperandMatchResultTy
4160 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4161   if (getLexer().isNot(AsmToken::Identifier)) {
4162     return MatchOperand_NoMatch;
4163   }
4164   StringRef Tok = Parser.getTok().getString();
4165   if (Tok != Prefix) {
4166     return MatchOperand_NoMatch;
4167   }
4168 
4169   Parser.Lex();
4170   if (getLexer().isNot(AsmToken::Colon)) {
4171     return MatchOperand_ParseFail;
4172   }
4173 
4174   Parser.Lex();
4175   if (getLexer().isNot(AsmToken::Identifier)) {
4176     return MatchOperand_ParseFail;
4177   }
4178 
4179   Value = Parser.getTok().getString();
4180   return MatchOperand_Success;
4181 }
4182 
4183 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4184 // values to live in a joint format operand in the MCInst encoding.
4185 OperandMatchResultTy
4186 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4187   SMLoc S = Parser.getTok().getLoc();
4188   int64_t Dfmt = 0, Nfmt = 0;
4189   // dfmt and nfmt can appear in either order, and each is optional.
4190   bool GotDfmt = false, GotNfmt = false;
4191   while (!GotDfmt || !GotNfmt) {
4192     if (!GotDfmt) {
4193       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4194       if (Res != MatchOperand_NoMatch) {
4195         if (Res != MatchOperand_Success)
4196           return Res;
4197         if (Dfmt >= 16) {
4198           Error(Parser.getTok().getLoc(), "out of range dfmt");
4199           return MatchOperand_ParseFail;
4200         }
4201         GotDfmt = true;
4202         Parser.Lex();
4203         continue;
4204       }
4205     }
4206     if (!GotNfmt) {
4207       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4208       if (Res != MatchOperand_NoMatch) {
4209         if (Res != MatchOperand_Success)
4210           return Res;
4211         if (Nfmt >= 8) {
4212           Error(Parser.getTok().getLoc(), "out of range nfmt");
4213           return MatchOperand_ParseFail;
4214         }
4215         GotNfmt = true;
4216         Parser.Lex();
4217         continue;
4218       }
4219     }
4220     break;
4221   }
4222   if (!GotDfmt && !GotNfmt)
4223     return MatchOperand_NoMatch;
4224   auto Format = Dfmt | Nfmt << 4;
4225   Operands.push_back(
4226       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4227   return MatchOperand_Success;
4228 }
4229 
4230 //===----------------------------------------------------------------------===//
4231 // ds
4232 //===----------------------------------------------------------------------===//
4233 
4234 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4235                                     const OperandVector &Operands) {
4236   OptionalImmIndexMap OptionalIdx;
4237 
4238   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4239     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4240 
4241     // Add the register arguments
4242     if (Op.isReg()) {
4243       Op.addRegOperands(Inst, 1);
4244       continue;
4245     }
4246 
4247     // Handle optional arguments
4248     OptionalIdx[Op.getImmTy()] = i;
4249   }
4250 
4251   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4252   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4253   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4254 
4255   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4256 }
4257 
4258 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4259                                 bool IsGdsHardcoded) {
4260   OptionalImmIndexMap OptionalIdx;
4261 
4262   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4263     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4264 
4265     // Add the register arguments
4266     if (Op.isReg()) {
4267       Op.addRegOperands(Inst, 1);
4268       continue;
4269     }
4270 
4271     if (Op.isToken() && Op.getToken() == "gds") {
4272       IsGdsHardcoded = true;
4273       continue;
4274     }
4275 
4276     // Handle optional arguments
4277     OptionalIdx[Op.getImmTy()] = i;
4278   }
4279 
4280   AMDGPUOperand::ImmTy OffsetType =
4281     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4282      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4283      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4284                                                       AMDGPUOperand::ImmTyOffset;
4285 
4286   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4287 
4288   if (!IsGdsHardcoded) {
4289     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4290   }
4291   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4292 }
4293 
4294 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4295   OptionalImmIndexMap OptionalIdx;
4296 
4297   unsigned OperandIdx[4];
4298   unsigned EnMask = 0;
4299   int SrcIdx = 0;
4300 
4301   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4302     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4303 
4304     // Add the register arguments
4305     if (Op.isReg()) {
4306       assert(SrcIdx < 4);
4307       OperandIdx[SrcIdx] = Inst.size();
4308       Op.addRegOperands(Inst, 1);
4309       ++SrcIdx;
4310       continue;
4311     }
4312 
4313     if (Op.isOff()) {
4314       assert(SrcIdx < 4);
4315       OperandIdx[SrcIdx] = Inst.size();
4316       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4317       ++SrcIdx;
4318       continue;
4319     }
4320 
4321     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4322       Op.addImmOperands(Inst, 1);
4323       continue;
4324     }
4325 
4326     if (Op.isToken() && Op.getToken() == "done")
4327       continue;
4328 
4329     // Handle optional arguments
4330     OptionalIdx[Op.getImmTy()] = i;
4331   }
4332 
4333   assert(SrcIdx == 4);
4334 
4335   bool Compr = false;
4336   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4337     Compr = true;
4338     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4339     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4340     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4341   }
4342 
4343   for (auto i = 0; i < SrcIdx; ++i) {
4344     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4345       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4346     }
4347   }
4348 
4349   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4350   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4351 
4352   Inst.addOperand(MCOperand::createImm(EnMask));
4353 }
4354 
4355 //===----------------------------------------------------------------------===//
4356 // s_waitcnt
4357 //===----------------------------------------------------------------------===//
4358 
4359 static bool
4360 encodeCnt(
4361   const AMDGPU::IsaVersion ISA,
4362   int64_t &IntVal,
4363   int64_t CntVal,
4364   bool Saturate,
4365   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4366   unsigned (*decode)(const IsaVersion &Version, unsigned))
4367 {
4368   bool Failed = false;
4369 
4370   IntVal = encode(ISA, IntVal, CntVal);
4371   if (CntVal != decode(ISA, IntVal)) {
4372     if (Saturate) {
4373       IntVal = encode(ISA, IntVal, -1);
4374     } else {
4375       Failed = true;
4376     }
4377   }
4378   return Failed;
4379 }
4380 
4381 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4382   StringRef CntName = Parser.getTok().getString();
4383   int64_t CntVal;
4384 
4385   Parser.Lex();
4386   if (getLexer().isNot(AsmToken::LParen))
4387     return true;
4388 
4389   Parser.Lex();
4390   if (getLexer().isNot(AsmToken::Integer))
4391     return true;
4392 
4393   SMLoc ValLoc = Parser.getTok().getLoc();
4394   if (getParser().parseAbsoluteExpression(CntVal))
4395     return true;
4396 
4397   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4398 
4399   bool Failed = true;
4400   bool Sat = CntName.endswith("_sat");
4401 
4402   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4403     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4404   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4405     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4406   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4407     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4408   }
4409 
4410   if (Failed) {
4411     Error(ValLoc, "too large value for " + CntName);
4412     return true;
4413   }
4414 
4415   if (getLexer().isNot(AsmToken::RParen)) {
4416     return true;
4417   }
4418 
4419   Parser.Lex();
4420   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
4421     const AsmToken NextToken = getLexer().peekTok();
4422     if (NextToken.is(AsmToken::Identifier)) {
4423       Parser.Lex();
4424     }
4425   }
4426 
4427   return false;
4428 }
4429 
4430 OperandMatchResultTy
4431 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4432   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4433   int64_t Waitcnt = getWaitcntBitMask(ISA);
4434   SMLoc S = Parser.getTok().getLoc();
4435 
4436   switch(getLexer().getKind()) {
4437     default: return MatchOperand_ParseFail;
4438     case AsmToken::Integer:
4439       // The operand can be an integer value.
4440       if (getParser().parseAbsoluteExpression(Waitcnt))
4441         return MatchOperand_ParseFail;
4442       break;
4443 
4444     case AsmToken::Identifier:
4445       do {
4446         if (parseCnt(Waitcnt))
4447           return MatchOperand_ParseFail;
4448       } while(getLexer().isNot(AsmToken::EndOfStatement));
4449       break;
4450   }
4451   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4452   return MatchOperand_Success;
4453 }
4454 
4455 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
4456                                           int64_t &Width) {
4457   using namespace llvm::AMDGPU::Hwreg;
4458 
4459   if (Parser.getTok().getString() != "hwreg")
4460     return true;
4461   Parser.Lex();
4462 
4463   if (getLexer().isNot(AsmToken::LParen))
4464     return true;
4465   Parser.Lex();
4466 
4467   if (getLexer().is(AsmToken::Identifier)) {
4468     HwReg.IsSymbolic = true;
4469     HwReg.Id = ID_UNKNOWN_;
4470     const StringRef tok = Parser.getTok().getString();
4471     int Last = ID_SYMBOLIC_LAST_;
4472     if (isSI() || isCI() || isVI())
4473       Last = ID_SYMBOLIC_FIRST_GFX9_;
4474     else if (isGFX9())
4475       Last = ID_SYMBOLIC_FIRST_GFX10_;
4476     for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
4477       if (tok == IdSymbolic[i]) {
4478         HwReg.Id = i;
4479         break;
4480       }
4481     }
4482     Parser.Lex();
4483   } else {
4484     HwReg.IsSymbolic = false;
4485     if (getLexer().isNot(AsmToken::Integer))
4486       return true;
4487     if (getParser().parseAbsoluteExpression(HwReg.Id))
4488       return true;
4489   }
4490 
4491   if (getLexer().is(AsmToken::RParen)) {
4492     Parser.Lex();
4493     return false;
4494   }
4495 
4496   // optional params
4497   if (getLexer().isNot(AsmToken::Comma))
4498     return true;
4499   Parser.Lex();
4500 
4501   if (getLexer().isNot(AsmToken::Integer))
4502     return true;
4503   if (getParser().parseAbsoluteExpression(Offset))
4504     return true;
4505 
4506   if (getLexer().isNot(AsmToken::Comma))
4507     return true;
4508   Parser.Lex();
4509 
4510   if (getLexer().isNot(AsmToken::Integer))
4511     return true;
4512   if (getParser().parseAbsoluteExpression(Width))
4513     return true;
4514 
4515   if (getLexer().isNot(AsmToken::RParen))
4516     return true;
4517   Parser.Lex();
4518 
4519   return false;
4520 }
4521 
4522 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4523   using namespace llvm::AMDGPU::Hwreg;
4524 
4525   int64_t Imm16Val = 0;
4526   SMLoc S = Parser.getTok().getLoc();
4527 
4528   switch(getLexer().getKind()) {
4529     default: return MatchOperand_NoMatch;
4530     case AsmToken::Integer:
4531       // The operand can be an integer value.
4532       if (getParser().parseAbsoluteExpression(Imm16Val))
4533         return MatchOperand_NoMatch;
4534       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4535         Error(S, "invalid immediate: only 16-bit values are legal");
4536         // Do not return error code, but create an imm operand anyway and proceed
4537         // to the next operand, if any. That avoids unneccessary error messages.
4538       }
4539       break;
4540 
4541     case AsmToken::Identifier: {
4542         OperandInfoTy HwReg(ID_UNKNOWN_);
4543         int64_t Offset = OFFSET_DEFAULT_;
4544         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
4545         if (parseHwregConstruct(HwReg, Offset, Width))
4546           return MatchOperand_ParseFail;
4547         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
4548           if (HwReg.IsSymbolic)
4549             Error(S, "invalid symbolic name of hardware register");
4550           else
4551             Error(S, "invalid code of hardware register: only 6-bit values are legal");
4552         }
4553         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
4554           Error(S, "invalid bit offset: only 5-bit values are legal");
4555         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
4556           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
4557         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
4558       }
4559       break;
4560   }
4561   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
4562   return MatchOperand_Success;
4563 }
4564 
4565 bool AMDGPUOperand::isSWaitCnt() const {
4566   return isImm();
4567 }
4568 
4569 bool AMDGPUOperand::isHwreg() const {
4570   return isImmTy(ImmTyHwreg);
4571 }
4572 
4573 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
4574   using namespace llvm::AMDGPU::SendMsg;
4575 
4576   if (Parser.getTok().getString() != "sendmsg")
4577     return true;
4578   Parser.Lex();
4579 
4580   if (getLexer().isNot(AsmToken::LParen))
4581     return true;
4582   Parser.Lex();
4583 
4584   if (getLexer().is(AsmToken::Identifier)) {
4585     Msg.IsSymbolic = true;
4586     Msg.Id = ID_UNKNOWN_;
4587     const std::string tok = Parser.getTok().getString();
4588     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
4589       switch(i) {
4590         default: continue; // Omit gaps.
4591         case ID_GS_ALLOC_REQ:
4592           if (isSI() || isCI() || isVI())
4593             continue;
4594           break;
4595         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:
4596         case ID_SYSMSG: break;
4597       }
4598       if (tok == IdSymbolic[i]) {
4599         Msg.Id = i;
4600         break;
4601       }
4602     }
4603     Parser.Lex();
4604   } else {
4605     Msg.IsSymbolic = false;
4606     if (getLexer().isNot(AsmToken::Integer))
4607       return true;
4608     if (getParser().parseAbsoluteExpression(Msg.Id))
4609       return true;
4610     if (getLexer().is(AsmToken::Integer))
4611       if (getParser().parseAbsoluteExpression(Msg.Id))
4612         Msg.Id = ID_UNKNOWN_;
4613   }
4614   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
4615     return false;
4616 
4617   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
4618     if (getLexer().isNot(AsmToken::RParen))
4619       return true;
4620     Parser.Lex();
4621     return false;
4622   }
4623 
4624   if (getLexer().isNot(AsmToken::Comma))
4625     return true;
4626   Parser.Lex();
4627 
4628   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
4629   Operation.Id = ID_UNKNOWN_;
4630   if (getLexer().is(AsmToken::Identifier)) {
4631     Operation.IsSymbolic = true;
4632     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
4633     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
4634     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
4635     const StringRef Tok = Parser.getTok().getString();
4636     for (int i = F; i < L; ++i) {
4637       if (Tok == S[i]) {
4638         Operation.Id = i;
4639         break;
4640       }
4641     }
4642     Parser.Lex();
4643   } else {
4644     Operation.IsSymbolic = false;
4645     if (getLexer().isNot(AsmToken::Integer))
4646       return true;
4647     if (getParser().parseAbsoluteExpression(Operation.Id))
4648       return true;
4649   }
4650 
4651   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4652     // Stream id is optional.
4653     if (getLexer().is(AsmToken::RParen)) {
4654       Parser.Lex();
4655       return false;
4656     }
4657 
4658     if (getLexer().isNot(AsmToken::Comma))
4659       return true;
4660     Parser.Lex();
4661 
4662     if (getLexer().isNot(AsmToken::Integer))
4663       return true;
4664     if (getParser().parseAbsoluteExpression(StreamId))
4665       return true;
4666   }
4667 
4668   if (getLexer().isNot(AsmToken::RParen))
4669     return true;
4670   Parser.Lex();
4671   return false;
4672 }
4673 
4674 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
4675   if (getLexer().getKind() != AsmToken::Identifier)
4676     return MatchOperand_NoMatch;
4677 
4678   StringRef Str = Parser.getTok().getString();
4679   int Slot = StringSwitch<int>(Str)
4680     .Case("p10", 0)
4681     .Case("p20", 1)
4682     .Case("p0", 2)
4683     .Default(-1);
4684 
4685   SMLoc S = Parser.getTok().getLoc();
4686   if (Slot == -1)
4687     return MatchOperand_ParseFail;
4688 
4689   Parser.Lex();
4690   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
4691                                               AMDGPUOperand::ImmTyInterpSlot));
4692   return MatchOperand_Success;
4693 }
4694 
4695 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
4696   if (getLexer().getKind() != AsmToken::Identifier)
4697     return MatchOperand_NoMatch;
4698 
4699   StringRef Str = Parser.getTok().getString();
4700   if (!Str.startswith("attr"))
4701     return MatchOperand_NoMatch;
4702 
4703   StringRef Chan = Str.take_back(2);
4704   int AttrChan = StringSwitch<int>(Chan)
4705     .Case(".x", 0)
4706     .Case(".y", 1)
4707     .Case(".z", 2)
4708     .Case(".w", 3)
4709     .Default(-1);
4710   if (AttrChan == -1)
4711     return MatchOperand_ParseFail;
4712 
4713   Str = Str.drop_back(2).drop_front(4);
4714 
4715   uint8_t Attr;
4716   if (Str.getAsInteger(10, Attr))
4717     return MatchOperand_ParseFail;
4718 
4719   SMLoc S = Parser.getTok().getLoc();
4720   Parser.Lex();
4721   if (Attr > 63) {
4722     Error(S, "out of bounds attr");
4723     return MatchOperand_Success;
4724   }
4725 
4726   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4727 
4728   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4729                                               AMDGPUOperand::ImmTyInterpAttr));
4730   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4731                                               AMDGPUOperand::ImmTyAttrChan));
4732   return MatchOperand_Success;
4733 }
4734 
4735 void AMDGPUAsmParser::errorExpTgt() {
4736   Error(Parser.getTok().getLoc(), "invalid exp target");
4737 }
4738 
4739 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4740                                                       uint8_t &Val) {
4741   if (Str == "null") {
4742     Val = 9;
4743     return MatchOperand_Success;
4744   }
4745 
4746   if (Str.startswith("mrt")) {
4747     Str = Str.drop_front(3);
4748     if (Str == "z") { // == mrtz
4749       Val = 8;
4750       return MatchOperand_Success;
4751     }
4752 
4753     if (Str.getAsInteger(10, Val))
4754       return MatchOperand_ParseFail;
4755 
4756     if (Val > 7)
4757       errorExpTgt();
4758 
4759     return MatchOperand_Success;
4760   }
4761 
4762   if (Str.startswith("pos")) {
4763     Str = Str.drop_front(3);
4764     if (Str.getAsInteger(10, Val))
4765       return MatchOperand_ParseFail;
4766 
4767     if (Val > 4 || (Val == 4 && !isGFX10()))
4768       errorExpTgt();
4769 
4770     Val += 12;
4771     return MatchOperand_Success;
4772   }
4773 
4774   if (isGFX10() && Str == "prim") {
4775     Val = 20;
4776     return MatchOperand_Success;
4777   }
4778 
4779   if (Str.startswith("param")) {
4780     Str = Str.drop_front(5);
4781     if (Str.getAsInteger(10, Val))
4782       return MatchOperand_ParseFail;
4783 
4784     if (Val >= 32)
4785       errorExpTgt();
4786 
4787     Val += 32;
4788     return MatchOperand_Success;
4789   }
4790 
4791   if (Str.startswith("invalid_target_")) {
4792     Str = Str.drop_front(15);
4793     if (Str.getAsInteger(10, Val))
4794       return MatchOperand_ParseFail;
4795 
4796     errorExpTgt();
4797     return MatchOperand_Success;
4798   }
4799 
4800   return MatchOperand_NoMatch;
4801 }
4802 
4803 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4804   uint8_t Val;
4805   StringRef Str = Parser.getTok().getString();
4806 
4807   auto Res = parseExpTgtImpl(Str, Val);
4808   if (Res != MatchOperand_Success)
4809     return Res;
4810 
4811   SMLoc S = Parser.getTok().getLoc();
4812   Parser.Lex();
4813 
4814   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4815                                               AMDGPUOperand::ImmTyExpTgt));
4816   return MatchOperand_Success;
4817 }
4818 
4819 OperandMatchResultTy
4820 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4821   using namespace llvm::AMDGPU::SendMsg;
4822 
4823   int64_t Imm16Val = 0;
4824   SMLoc S = Parser.getTok().getLoc();
4825 
4826   switch(getLexer().getKind()) {
4827   default:
4828     return MatchOperand_NoMatch;
4829   case AsmToken::Integer:
4830     // The operand can be an integer value.
4831     if (getParser().parseAbsoluteExpression(Imm16Val))
4832       return MatchOperand_NoMatch;
4833     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4834       Error(S, "invalid immediate: only 16-bit values are legal");
4835       // Do not return error code, but create an imm operand anyway and proceed
4836       // to the next operand, if any. That avoids unneccessary error messages.
4837     }
4838     break;
4839   case AsmToken::Identifier: {
4840       OperandInfoTy Msg(ID_UNKNOWN_);
4841       OperandInfoTy Operation(OP_UNKNOWN_);
4842       int64_t StreamId = STREAM_ID_DEFAULT_;
4843       if (parseSendMsgConstruct(Msg, Operation, StreamId))
4844         return MatchOperand_ParseFail;
4845       do {
4846         // Validate and encode message ID.
4847         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4848                 || (Msg.Id == ID_GS_ALLOC_REQ && !isSI() && !isCI() && !isVI())
4849                 || Msg.Id == ID_SYSMSG)) {
4850           if (Msg.IsSymbolic)
4851             Error(S, "invalid/unsupported symbolic name of message");
4852           else
4853             Error(S, "invalid/unsupported code of message");
4854           break;
4855         }
4856         Imm16Val = (Msg.Id << ID_SHIFT_);
4857         // Validate and encode operation ID.
4858         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4859           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4860             if (Operation.IsSymbolic)
4861               Error(S, "invalid symbolic name of GS_OP");
4862             else
4863               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4864             break;
4865           }
4866           if (Operation.Id == OP_GS_NOP
4867               && Msg.Id != ID_GS_DONE) {
4868             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
4869             break;
4870           }
4871           Imm16Val |= (Operation.Id << OP_SHIFT_);
4872         }
4873         if (Msg.Id == ID_SYSMSG) {
4874           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
4875             if (Operation.IsSymbolic)
4876               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
4877             else
4878               Error(S, "invalid/unsupported code of SYSMSG_OP");
4879             break;
4880           }
4881           Imm16Val |= (Operation.Id << OP_SHIFT_);
4882         }
4883         // Validate and encode stream ID.
4884         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4885           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
4886             Error(S, "invalid stream id: only 2-bit values are legal");
4887             break;
4888           }
4889           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
4890         }
4891       } while (false);
4892     }
4893     break;
4894   }
4895   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
4896   return MatchOperand_Success;
4897 }
4898 
4899 bool AMDGPUOperand::isSendMsg() const {
4900   return isImmTy(ImmTySendMsg);
4901 }
4902 
4903 //===----------------------------------------------------------------------===//
4904 // parser helpers
4905 //===----------------------------------------------------------------------===//
4906 
4907 bool
4908 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
4909   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
4910 }
4911 
4912 bool
4913 AMDGPUAsmParser::isId(const StringRef Id) const {
4914   return isId(getToken(), Id);
4915 }
4916 
4917 bool
4918 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
4919   return getTokenKind() == Kind;
4920 }
4921 
4922 bool
4923 AMDGPUAsmParser::trySkipId(const StringRef Id) {
4924   if (isId(Id)) {
4925     lex();
4926     return true;
4927   }
4928   return false;
4929 }
4930 
4931 bool
4932 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
4933   if (isToken(Kind)) {
4934     lex();
4935     return true;
4936   }
4937   return false;
4938 }
4939 
4940 bool
4941 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
4942                            const StringRef ErrMsg) {
4943   if (!trySkipToken(Kind)) {
4944     Error(getLoc(), ErrMsg);
4945     return false;
4946   }
4947   return true;
4948 }
4949 
4950 bool
4951 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
4952   return !getParser().parseAbsoluteExpression(Imm);
4953 }
4954 
4955 bool
4956 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
4957   if (isToken(AsmToken::String)) {
4958     Val = getToken().getStringContents();
4959     lex();
4960     return true;
4961   } else {
4962     Error(getLoc(), ErrMsg);
4963     return false;
4964   }
4965 }
4966 
4967 AsmToken
4968 AMDGPUAsmParser::getToken() const {
4969   return Parser.getTok();
4970 }
4971 
4972 AsmToken
4973 AMDGPUAsmParser::peekToken() {
4974   return getLexer().peekTok();
4975 }
4976 
4977 void
4978 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
4979   auto TokCount = getLexer().peekTokens(Tokens);
4980 
4981   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
4982     Tokens[Idx] = AsmToken(AsmToken::Error, "");
4983 }
4984 
4985 AsmToken::TokenKind
4986 AMDGPUAsmParser::getTokenKind() const {
4987   return getLexer().getKind();
4988 }
4989 
4990 SMLoc
4991 AMDGPUAsmParser::getLoc() const {
4992   return getToken().getLoc();
4993 }
4994 
4995 StringRef
4996 AMDGPUAsmParser::getTokenStr() const {
4997   return getToken().getString();
4998 }
4999 
5000 void
5001 AMDGPUAsmParser::lex() {
5002   Parser.Lex();
5003 }
5004 
5005 //===----------------------------------------------------------------------===//
5006 // swizzle
5007 //===----------------------------------------------------------------------===//
5008 
5009 LLVM_READNONE
5010 static unsigned
5011 encodeBitmaskPerm(const unsigned AndMask,
5012                   const unsigned OrMask,
5013                   const unsigned XorMask) {
5014   using namespace llvm::AMDGPU::Swizzle;
5015 
5016   return BITMASK_PERM_ENC |
5017          (AndMask << BITMASK_AND_SHIFT) |
5018          (OrMask  << BITMASK_OR_SHIFT)  |
5019          (XorMask << BITMASK_XOR_SHIFT);
5020 }
5021 
5022 bool
5023 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5024                                       const unsigned MinVal,
5025                                       const unsigned MaxVal,
5026                                       const StringRef ErrMsg) {
5027   for (unsigned i = 0; i < OpNum; ++i) {
5028     if (!skipToken(AsmToken::Comma, "expected a comma")){
5029       return false;
5030     }
5031     SMLoc ExprLoc = Parser.getTok().getLoc();
5032     if (!parseExpr(Op[i])) {
5033       return false;
5034     }
5035     if (Op[i] < MinVal || Op[i] > MaxVal) {
5036       Error(ExprLoc, ErrMsg);
5037       return false;
5038     }
5039   }
5040 
5041   return true;
5042 }
5043 
5044 bool
5045 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5046   using namespace llvm::AMDGPU::Swizzle;
5047 
5048   int64_t Lane[LANE_NUM];
5049   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5050                            "expected a 2-bit lane id")) {
5051     Imm = QUAD_PERM_ENC;
5052     for (unsigned I = 0; I < LANE_NUM; ++I) {
5053       Imm |= Lane[I] << (LANE_SHIFT * I);
5054     }
5055     return true;
5056   }
5057   return false;
5058 }
5059 
5060 bool
5061 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5062   using namespace llvm::AMDGPU::Swizzle;
5063 
5064   SMLoc S = Parser.getTok().getLoc();
5065   int64_t GroupSize;
5066   int64_t LaneIdx;
5067 
5068   if (!parseSwizzleOperands(1, &GroupSize,
5069                             2, 32,
5070                             "group size must be in the interval [2,32]")) {
5071     return false;
5072   }
5073   if (!isPowerOf2_64(GroupSize)) {
5074     Error(S, "group size must be a power of two");
5075     return false;
5076   }
5077   if (parseSwizzleOperands(1, &LaneIdx,
5078                            0, GroupSize - 1,
5079                            "lane id must be in the interval [0,group size - 1]")) {
5080     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5081     return true;
5082   }
5083   return false;
5084 }
5085 
5086 bool
5087 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5088   using namespace llvm::AMDGPU::Swizzle;
5089 
5090   SMLoc S = Parser.getTok().getLoc();
5091   int64_t GroupSize;
5092 
5093   if (!parseSwizzleOperands(1, &GroupSize,
5094       2, 32, "group size must be in the interval [2,32]")) {
5095     return false;
5096   }
5097   if (!isPowerOf2_64(GroupSize)) {
5098     Error(S, "group size must be a power of two");
5099     return false;
5100   }
5101 
5102   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5103   return true;
5104 }
5105 
5106 bool
5107 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5108   using namespace llvm::AMDGPU::Swizzle;
5109 
5110   SMLoc S = Parser.getTok().getLoc();
5111   int64_t GroupSize;
5112 
5113   if (!parseSwizzleOperands(1, &GroupSize,
5114       1, 16, "group size must be in the interval [1,16]")) {
5115     return false;
5116   }
5117   if (!isPowerOf2_64(GroupSize)) {
5118     Error(S, "group size must be a power of two");
5119     return false;
5120   }
5121 
5122   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5123   return true;
5124 }
5125 
5126 bool
5127 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5128   using namespace llvm::AMDGPU::Swizzle;
5129 
5130   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5131     return false;
5132   }
5133 
5134   StringRef Ctl;
5135   SMLoc StrLoc = Parser.getTok().getLoc();
5136   if (!parseString(Ctl)) {
5137     return false;
5138   }
5139   if (Ctl.size() != BITMASK_WIDTH) {
5140     Error(StrLoc, "expected a 5-character mask");
5141     return false;
5142   }
5143 
5144   unsigned AndMask = 0;
5145   unsigned OrMask = 0;
5146   unsigned XorMask = 0;
5147 
5148   for (size_t i = 0; i < Ctl.size(); ++i) {
5149     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5150     switch(Ctl[i]) {
5151     default:
5152       Error(StrLoc, "invalid mask");
5153       return false;
5154     case '0':
5155       break;
5156     case '1':
5157       OrMask |= Mask;
5158       break;
5159     case 'p':
5160       AndMask |= Mask;
5161       break;
5162     case 'i':
5163       AndMask |= Mask;
5164       XorMask |= Mask;
5165       break;
5166     }
5167   }
5168 
5169   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5170   return true;
5171 }
5172 
5173 bool
5174 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5175 
5176   SMLoc OffsetLoc = Parser.getTok().getLoc();
5177 
5178   if (!parseExpr(Imm)) {
5179     return false;
5180   }
5181   if (!isUInt<16>(Imm)) {
5182     Error(OffsetLoc, "expected a 16-bit offset");
5183     return false;
5184   }
5185   return true;
5186 }
5187 
5188 bool
5189 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5190   using namespace llvm::AMDGPU::Swizzle;
5191 
5192   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5193 
5194     SMLoc ModeLoc = Parser.getTok().getLoc();
5195     bool Ok = false;
5196 
5197     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5198       Ok = parseSwizzleQuadPerm(Imm);
5199     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5200       Ok = parseSwizzleBitmaskPerm(Imm);
5201     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5202       Ok = parseSwizzleBroadcast(Imm);
5203     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5204       Ok = parseSwizzleSwap(Imm);
5205     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5206       Ok = parseSwizzleReverse(Imm);
5207     } else {
5208       Error(ModeLoc, "expected a swizzle mode");
5209     }
5210 
5211     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5212   }
5213 
5214   return false;
5215 }
5216 
5217 OperandMatchResultTy
5218 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5219   SMLoc S = Parser.getTok().getLoc();
5220   int64_t Imm = 0;
5221 
5222   if (trySkipId("offset")) {
5223 
5224     bool Ok = false;
5225     if (skipToken(AsmToken::Colon, "expected a colon")) {
5226       if (trySkipId("swizzle")) {
5227         Ok = parseSwizzleMacro(Imm);
5228       } else {
5229         Ok = parseSwizzleOffset(Imm);
5230       }
5231     }
5232 
5233     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5234 
5235     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5236   } else {
5237     // Swizzle "offset" operand is optional.
5238     // If it is omitted, try parsing other optional operands.
5239     return parseOptionalOpr(Operands);
5240   }
5241 }
5242 
5243 bool
5244 AMDGPUOperand::isSwizzle() const {
5245   return isImmTy(ImmTySwizzle);
5246 }
5247 
5248 //===----------------------------------------------------------------------===//
5249 // VGPR Index Mode
5250 //===----------------------------------------------------------------------===//
5251 
5252 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5253 
5254   using namespace llvm::AMDGPU::VGPRIndexMode;
5255 
5256   if (trySkipToken(AsmToken::RParen)) {
5257     return OFF;
5258   }
5259 
5260   int64_t Imm = 0;
5261 
5262   while (true) {
5263     unsigned Mode = 0;
5264     SMLoc S = Parser.getTok().getLoc();
5265 
5266     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5267       if (trySkipId(IdSymbolic[ModeId])) {
5268         Mode = 1 << ModeId;
5269         break;
5270       }
5271     }
5272 
5273     if (Mode == 0) {
5274       Error(S, (Imm == 0)?
5275                "expected a VGPR index mode or a closing parenthesis" :
5276                "expected a VGPR index mode");
5277       break;
5278     }
5279 
5280     if (Imm & Mode) {
5281       Error(S, "duplicate VGPR index mode");
5282       break;
5283     }
5284     Imm |= Mode;
5285 
5286     if (trySkipToken(AsmToken::RParen))
5287       break;
5288     if (!skipToken(AsmToken::Comma,
5289                    "expected a comma or a closing parenthesis"))
5290       break;
5291   }
5292 
5293   return Imm;
5294 }
5295 
5296 OperandMatchResultTy
5297 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5298 
5299   int64_t Imm = 0;
5300   SMLoc S = Parser.getTok().getLoc();
5301 
5302   if (getLexer().getKind() == AsmToken::Identifier &&
5303       Parser.getTok().getString() == "gpr_idx" &&
5304       getLexer().peekTok().is(AsmToken::LParen)) {
5305 
5306     Parser.Lex();
5307     Parser.Lex();
5308 
5309     // If parse failed, trigger an error but do not return error code
5310     // to avoid excessive error messages.
5311     Imm = parseGPRIdxMacro();
5312 
5313   } else {
5314     if (getParser().parseAbsoluteExpression(Imm))
5315       return MatchOperand_NoMatch;
5316     if (Imm < 0 || !isUInt<4>(Imm)) {
5317       Error(S, "invalid immediate: only 4-bit values are legal");
5318     }
5319   }
5320 
5321   Operands.push_back(
5322       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5323   return MatchOperand_Success;
5324 }
5325 
5326 bool AMDGPUOperand::isGPRIdxMode() const {
5327   return isImmTy(ImmTyGprIdxMode);
5328 }
5329 
5330 //===----------------------------------------------------------------------===//
5331 // sopp branch targets
5332 //===----------------------------------------------------------------------===//
5333 
5334 OperandMatchResultTy
5335 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5336   SMLoc S = Parser.getTok().getLoc();
5337 
5338   switch (getLexer().getKind()) {
5339     default: return MatchOperand_ParseFail;
5340     case AsmToken::Integer: {
5341       int64_t Imm;
5342       if (getParser().parseAbsoluteExpression(Imm))
5343         return MatchOperand_ParseFail;
5344       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
5345       return MatchOperand_Success;
5346     }
5347 
5348     case AsmToken::Identifier:
5349       Operands.push_back(AMDGPUOperand::CreateExpr(this,
5350           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
5351                                   Parser.getTok().getString()), getContext()), S));
5352       Parser.Lex();
5353       return MatchOperand_Success;
5354   }
5355 }
5356 
5357 //===----------------------------------------------------------------------===//
5358 // mubuf
5359 //===----------------------------------------------------------------------===//
5360 
5361 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5362   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5363 }
5364 
5365 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5366   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5367 }
5368 
5369 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5370   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5371 }
5372 
5373 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5374                                const OperandVector &Operands,
5375                                bool IsAtomic,
5376                                bool IsAtomicReturn,
5377                                bool IsLds) {
5378   bool IsLdsOpcode = IsLds;
5379   bool HasLdsModifier = false;
5380   OptionalImmIndexMap OptionalIdx;
5381   assert(IsAtomicReturn ? IsAtomic : true);
5382   unsigned FirstOperandIdx = 1;
5383 
5384   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5385     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5386 
5387     // Add the register arguments
5388     if (Op.isReg()) {
5389       Op.addRegOperands(Inst, 1);
5390       // Insert a tied src for atomic return dst.
5391       // This cannot be postponed as subsequent calls to
5392       // addImmOperands rely on correct number of MC operands.
5393       if (IsAtomicReturn && i == FirstOperandIdx)
5394         Op.addRegOperands(Inst, 1);
5395       continue;
5396     }
5397 
5398     // Handle the case where soffset is an immediate
5399     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5400       Op.addImmOperands(Inst, 1);
5401       continue;
5402     }
5403 
5404     HasLdsModifier |= Op.isLDS();
5405 
5406     // Handle tokens like 'offen' which are sometimes hard-coded into the
5407     // asm string.  There are no MCInst operands for these.
5408     if (Op.isToken()) {
5409       continue;
5410     }
5411     assert(Op.isImm());
5412 
5413     // Handle optional arguments
5414     OptionalIdx[Op.getImmTy()] = i;
5415   }
5416 
5417   // This is a workaround for an llvm quirk which may result in an
5418   // incorrect instruction selection. Lds and non-lds versions of
5419   // MUBUF instructions are identical except that lds versions
5420   // have mandatory 'lds' modifier. However this modifier follows
5421   // optional modifiers and llvm asm matcher regards this 'lds'
5422   // modifier as an optional one. As a result, an lds version
5423   // of opcode may be selected even if it has no 'lds' modifier.
5424   if (IsLdsOpcode && !HasLdsModifier) {
5425     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5426     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5427       Inst.setOpcode(NoLdsOpcode);
5428       IsLdsOpcode = false;
5429     }
5430   }
5431 
5432   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5433   if (!IsAtomic) { // glc is hard-coded.
5434     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5435   }
5436   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5437 
5438   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5439     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5440   }
5441 
5442   if (isGFX10())
5443     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5444 }
5445 
5446 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5447   OptionalImmIndexMap OptionalIdx;
5448 
5449   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5450     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5451 
5452     // Add the register arguments
5453     if (Op.isReg()) {
5454       Op.addRegOperands(Inst, 1);
5455       continue;
5456     }
5457 
5458     // Handle the case where soffset is an immediate
5459     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5460       Op.addImmOperands(Inst, 1);
5461       continue;
5462     }
5463 
5464     // Handle tokens like 'offen' which are sometimes hard-coded into the
5465     // asm string.  There are no MCInst operands for these.
5466     if (Op.isToken()) {
5467       continue;
5468     }
5469     assert(Op.isImm());
5470 
5471     // Handle optional arguments
5472     OptionalIdx[Op.getImmTy()] = i;
5473   }
5474 
5475   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5476                         AMDGPUOperand::ImmTyOffset);
5477   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5478   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5479   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5480   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5481 
5482   if (isGFX10())
5483     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5484 }
5485 
5486 //===----------------------------------------------------------------------===//
5487 // mimg
5488 //===----------------------------------------------------------------------===//
5489 
5490 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5491                               bool IsAtomic) {
5492   unsigned I = 1;
5493   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5494   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5495     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5496   }
5497 
5498   if (IsAtomic) {
5499     // Add src, same as dst
5500     assert(Desc.getNumDefs() == 1);
5501     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5502   }
5503 
5504   OptionalImmIndexMap OptionalIdx;
5505 
5506   for (unsigned E = Operands.size(); I != E; ++I) {
5507     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5508 
5509     // Add the register arguments
5510     if (Op.isReg()) {
5511       Op.addRegOperands(Inst, 1);
5512     } else if (Op.isImmModifier()) {
5513       OptionalIdx[Op.getImmTy()] = I;
5514     } else if (!Op.isToken()) {
5515       llvm_unreachable("unexpected operand type");
5516     }
5517   }
5518 
5519   bool IsGFX10 = isGFX10();
5520 
5521   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5522   if (IsGFX10)
5523     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5524   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5525   if (IsGFX10)
5526     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5527   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5528   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5529   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5530   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5531   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5532   if (!IsGFX10)
5533     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5534   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5535 }
5536 
5537 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5538   cvtMIMG(Inst, Operands, true);
5539 }
5540 
5541 //===----------------------------------------------------------------------===//
5542 // smrd
5543 //===----------------------------------------------------------------------===//
5544 
5545 bool AMDGPUOperand::isSMRDOffset8() const {
5546   return isImm() && isUInt<8>(getImm());
5547 }
5548 
5549 bool AMDGPUOperand::isSMRDOffset20() const {
5550   return isImm() && isUInt<20>(getImm());
5551 }
5552 
5553 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5554   // 32-bit literals are only supported on CI and we only want to use them
5555   // when the offset is > 8-bits.
5556   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5557 }
5558 
5559 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5560   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5561 }
5562 
5563 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5564   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5565 }
5566 
5567 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5568   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5569 }
5570 
5571 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
5572   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5573 }
5574 
5575 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
5576   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5577 }
5578 
5579 //===----------------------------------------------------------------------===//
5580 // vop3
5581 //===----------------------------------------------------------------------===//
5582 
5583 static bool ConvertOmodMul(int64_t &Mul) {
5584   if (Mul != 1 && Mul != 2 && Mul != 4)
5585     return false;
5586 
5587   Mul >>= 1;
5588   return true;
5589 }
5590 
5591 static bool ConvertOmodDiv(int64_t &Div) {
5592   if (Div == 1) {
5593     Div = 0;
5594     return true;
5595   }
5596 
5597   if (Div == 2) {
5598     Div = 3;
5599     return true;
5600   }
5601 
5602   return false;
5603 }
5604 
5605 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5606   if (BoundCtrl == 0) {
5607     BoundCtrl = 1;
5608     return true;
5609   }
5610 
5611   if (BoundCtrl == -1) {
5612     BoundCtrl = 0;
5613     return true;
5614   }
5615 
5616   return false;
5617 }
5618 
5619 // Note: the order in this table matches the order of operands in AsmString.
5620 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5621   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5622   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5623   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5624   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5625   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5626   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5627   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5628   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5629   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5630   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
5631   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5632   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5633   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5634   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5635   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5636   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5637   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5638   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5639   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5640   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5641   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5642   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5643   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5644   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5645   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5646   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
5647   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5648   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5649   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5650   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5651   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5652   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5653   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5654   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5655   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5656   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5657   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5658   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5659   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
5660 };
5661 
5662 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5663   unsigned size = Operands.size();
5664   assert(size > 0);
5665 
5666   OperandMatchResultTy res = parseOptionalOpr(Operands);
5667 
5668   // This is a hack to enable hardcoded mandatory operands which follow
5669   // optional operands.
5670   //
5671   // Current design assumes that all operands after the first optional operand
5672   // are also optional. However implementation of some instructions violates
5673   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5674   //
5675   // To alleviate this problem, we have to (implicitly) parse extra operands
5676   // to make sure autogenerated parser of custom operands never hit hardcoded
5677   // mandatory operands.
5678 
5679   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5680 
5681     // We have parsed the first optional operand.
5682     // Parse as many operands as necessary to skip all mandatory operands.
5683 
5684     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5685       if (res != MatchOperand_Success ||
5686           getLexer().is(AsmToken::EndOfStatement)) break;
5687       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5688       res = parseOptionalOpr(Operands);
5689     }
5690   }
5691 
5692   return res;
5693 }
5694 
5695 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
5696   OperandMatchResultTy res;
5697   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
5698     // try to parse any optional operand here
5699     if (Op.IsBit) {
5700       res = parseNamedBit(Op.Name, Operands, Op.Type);
5701     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
5702       res = parseOModOperand(Operands);
5703     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
5704                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
5705                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
5706       res = parseSDWASel(Operands, Op.Name, Op.Type);
5707     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
5708       res = parseSDWADstUnused(Operands);
5709     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
5710                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
5711                Op.Type == AMDGPUOperand::ImmTyNegLo ||
5712                Op.Type == AMDGPUOperand::ImmTyNegHi) {
5713       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
5714                                         Op.ConvertResult);
5715     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
5716       res = parseDim(Operands);
5717     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
5718       res = parseDfmtNfmt(Operands);
5719     } else {
5720       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
5721     }
5722     if (res != MatchOperand_NoMatch) {
5723       return res;
5724     }
5725   }
5726   return MatchOperand_NoMatch;
5727 }
5728 
5729 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
5730   StringRef Name = Parser.getTok().getString();
5731   if (Name == "mul") {
5732     return parseIntWithPrefix("mul", Operands,
5733                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
5734   }
5735 
5736   if (Name == "div") {
5737     return parseIntWithPrefix("div", Operands,
5738                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
5739   }
5740 
5741   return MatchOperand_NoMatch;
5742 }
5743 
5744 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
5745   cvtVOP3P(Inst, Operands);
5746 
5747   int Opc = Inst.getOpcode();
5748 
5749   int SrcNum;
5750   const int Ops[] = { AMDGPU::OpName::src0,
5751                       AMDGPU::OpName::src1,
5752                       AMDGPU::OpName::src2 };
5753   for (SrcNum = 0;
5754        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
5755        ++SrcNum);
5756   assert(SrcNum > 0);
5757 
5758   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5759   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5760 
5761   if ((OpSel & (1 << SrcNum)) != 0) {
5762     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
5763     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
5764     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
5765   }
5766 }
5767 
5768 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
5769       // 1. This operand is input modifiers
5770   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
5771       // 2. This is not last operand
5772       && Desc.NumOperands > (OpNum + 1)
5773       // 3. Next operand is register class
5774       && Desc.OpInfo[OpNum + 1].RegClass != -1
5775       // 4. Next register is not tied to any other operand
5776       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
5777 }
5778 
5779 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
5780 {
5781   OptionalImmIndexMap OptionalIdx;
5782   unsigned Opc = Inst.getOpcode();
5783 
5784   unsigned I = 1;
5785   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5786   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5787     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5788   }
5789 
5790   for (unsigned E = Operands.size(); I != E; ++I) {
5791     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5792     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5793       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5794     } else if (Op.isInterpSlot() ||
5795                Op.isInterpAttr() ||
5796                Op.isAttrChan()) {
5797       Inst.addOperand(MCOperand::createImm(Op.getImm()));
5798     } else if (Op.isImmModifier()) {
5799       OptionalIdx[Op.getImmTy()] = I;
5800     } else {
5801       llvm_unreachable("unhandled operand type");
5802     }
5803   }
5804 
5805   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
5806     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
5807   }
5808 
5809   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5810     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5811   }
5812 
5813   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5814     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5815   }
5816 }
5817 
5818 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
5819                               OptionalImmIndexMap &OptionalIdx) {
5820   unsigned Opc = Inst.getOpcode();
5821 
5822   unsigned I = 1;
5823   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5824   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5825     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5826   }
5827 
5828   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
5829     // This instruction has src modifiers
5830     for (unsigned E = Operands.size(); I != E; ++I) {
5831       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5832       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5833         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5834       } else if (Op.isImmModifier()) {
5835         OptionalIdx[Op.getImmTy()] = I;
5836       } else if (Op.isRegOrImm()) {
5837         Op.addRegOrImmOperands(Inst, 1);
5838       } else {
5839         llvm_unreachable("unhandled operand type");
5840       }
5841     }
5842   } else {
5843     // No src modifiers
5844     for (unsigned E = Operands.size(); I != E; ++I) {
5845       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5846       if (Op.isMod()) {
5847         OptionalIdx[Op.getImmTy()] = I;
5848       } else {
5849         Op.addRegOrImmOperands(Inst, 1);
5850       }
5851     }
5852   }
5853 
5854   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5855     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5856   }
5857 
5858   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5859     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5860   }
5861 
5862   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
5863   // it has src2 register operand that is tied to dst operand
5864   // we don't allow modifiers for this operand in assembler so src2_modifiers
5865   // should be 0.
5866   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
5867       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
5868       Opc == AMDGPU::V_MAC_F32_e64_vi ||
5869       Opc == AMDGPU::V_MAC_F16_e64_vi ||
5870       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
5871       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
5872       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
5873     auto it = Inst.begin();
5874     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5875     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5876     ++it;
5877     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5878   }
5879 }
5880 
5881 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5882   OptionalImmIndexMap OptionalIdx;
5883   cvtVOP3(Inst, Operands, OptionalIdx);
5884 }
5885 
5886 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5887                                const OperandVector &Operands) {
5888   OptionalImmIndexMap OptIdx;
5889   const int Opc = Inst.getOpcode();
5890   const MCInstrDesc &Desc = MII.get(Opc);
5891 
5892   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5893 
5894   cvtVOP3(Inst, Operands, OptIdx);
5895 
5896   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5897     assert(!IsPacked);
5898     Inst.addOperand(Inst.getOperand(0));
5899   }
5900 
5901   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5902   // instruction, and then figure out where to actually put the modifiers
5903 
5904   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
5905 
5906   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5907   if (OpSelHiIdx != -1) {
5908     int DefaultVal = IsPacked ? -1 : 0;
5909     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
5910                           DefaultVal);
5911   }
5912 
5913   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
5914   if (NegLoIdx != -1) {
5915     assert(IsPacked);
5916     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
5917     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
5918   }
5919 
5920   const int Ops[] = { AMDGPU::OpName::src0,
5921                       AMDGPU::OpName::src1,
5922                       AMDGPU::OpName::src2 };
5923   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
5924                          AMDGPU::OpName::src1_modifiers,
5925                          AMDGPU::OpName::src2_modifiers };
5926 
5927   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5928 
5929   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5930   unsigned OpSelHi = 0;
5931   unsigned NegLo = 0;
5932   unsigned NegHi = 0;
5933 
5934   if (OpSelHiIdx != -1) {
5935     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5936   }
5937 
5938   if (NegLoIdx != -1) {
5939     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
5940     NegLo = Inst.getOperand(NegLoIdx).getImm();
5941     NegHi = Inst.getOperand(NegHiIdx).getImm();
5942   }
5943 
5944   for (int J = 0; J < 3; ++J) {
5945     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
5946     if (OpIdx == -1)
5947       break;
5948 
5949     uint32_t ModVal = 0;
5950 
5951     if ((OpSel & (1 << J)) != 0)
5952       ModVal |= SISrcMods::OP_SEL_0;
5953 
5954     if ((OpSelHi & (1 << J)) != 0)
5955       ModVal |= SISrcMods::OP_SEL_1;
5956 
5957     if ((NegLo & (1 << J)) != 0)
5958       ModVal |= SISrcMods::NEG;
5959 
5960     if ((NegHi & (1 << J)) != 0)
5961       ModVal |= SISrcMods::NEG_HI;
5962 
5963     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
5964 
5965     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
5966   }
5967 }
5968 
5969 //===----------------------------------------------------------------------===//
5970 // dpp
5971 //===----------------------------------------------------------------------===//
5972 
5973 bool AMDGPUOperand::isDPPCtrl() const {
5974   using namespace AMDGPU::DPP;
5975 
5976   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
5977   if (result) {
5978     int64_t Imm = getImm();
5979     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
5980            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
5981            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
5982            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
5983            (Imm == DppCtrl::WAVE_SHL1) ||
5984            (Imm == DppCtrl::WAVE_ROL1) ||
5985            (Imm == DppCtrl::WAVE_SHR1) ||
5986            (Imm == DppCtrl::WAVE_ROR1) ||
5987            (Imm == DppCtrl::ROW_MIRROR) ||
5988            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
5989            (Imm == DppCtrl::BCAST15) ||
5990            (Imm == DppCtrl::BCAST31);
5991   }
5992   return false;
5993 }
5994 
5995 bool AMDGPUOperand::isS16Imm() const {
5996   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
5997 }
5998 
5999 bool AMDGPUOperand::isU16Imm() const {
6000   return isImm() && isUInt<16>(getImm());
6001 }
6002 
6003 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6004   if (!isGFX10())
6005     return MatchOperand_NoMatch;
6006 
6007   SMLoc S = Parser.getTok().getLoc();
6008 
6009   if (getLexer().isNot(AsmToken::Identifier))
6010     return MatchOperand_NoMatch;
6011   if (getLexer().getTok().getString() != "dim")
6012     return MatchOperand_NoMatch;
6013 
6014   Parser.Lex();
6015   if (getLexer().isNot(AsmToken::Colon))
6016     return MatchOperand_ParseFail;
6017 
6018   Parser.Lex();
6019 
6020   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6021   // integer.
6022   std::string Token;
6023   if (getLexer().is(AsmToken::Integer)) {
6024     SMLoc Loc = getLexer().getTok().getEndLoc();
6025     Token = getLexer().getTok().getString();
6026     Parser.Lex();
6027     if (getLexer().getTok().getLoc() != Loc)
6028       return MatchOperand_ParseFail;
6029   }
6030   if (getLexer().isNot(AsmToken::Identifier))
6031     return MatchOperand_ParseFail;
6032   Token += getLexer().getTok().getString();
6033 
6034   StringRef DimId = Token;
6035   if (DimId.startswith("SQ_RSRC_IMG_"))
6036     DimId = DimId.substr(12);
6037 
6038   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6039   if (!DimInfo)
6040     return MatchOperand_ParseFail;
6041 
6042   Parser.Lex();
6043 
6044   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6045                                               AMDGPUOperand::ImmTyDim));
6046   return MatchOperand_Success;
6047 }
6048 
6049 OperandMatchResultTy
6050 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6051   using namespace AMDGPU::DPP;
6052 
6053   SMLoc S = Parser.getTok().getLoc();
6054   StringRef Prefix;
6055   int64_t Int;
6056 
6057   if (getLexer().getKind() == AsmToken::Identifier) {
6058     Prefix = Parser.getTok().getString();
6059   } else {
6060     return MatchOperand_NoMatch;
6061   }
6062 
6063   if (Prefix == "row_mirror") {
6064     Int = DppCtrl::ROW_MIRROR;
6065     Parser.Lex();
6066   } else if (Prefix == "row_half_mirror") {
6067     Int = DppCtrl::ROW_HALF_MIRROR;
6068     Parser.Lex();
6069   } else {
6070     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6071     if (Prefix != "quad_perm"
6072         && Prefix != "row_shl"
6073         && Prefix != "row_shr"
6074         && Prefix != "row_ror"
6075         && Prefix != "wave_shl"
6076         && Prefix != "wave_rol"
6077         && Prefix != "wave_shr"
6078         && Prefix != "wave_ror"
6079         && Prefix != "row_bcast") {
6080       return MatchOperand_NoMatch;
6081     }
6082 
6083     Parser.Lex();
6084     if (getLexer().isNot(AsmToken::Colon))
6085       return MatchOperand_ParseFail;
6086 
6087     if (Prefix == "quad_perm") {
6088       // quad_perm:[%d,%d,%d,%d]
6089       Parser.Lex();
6090       if (getLexer().isNot(AsmToken::LBrac))
6091         return MatchOperand_ParseFail;
6092       Parser.Lex();
6093 
6094       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6095         return MatchOperand_ParseFail;
6096 
6097       for (int i = 0; i < 3; ++i) {
6098         if (getLexer().isNot(AsmToken::Comma))
6099           return MatchOperand_ParseFail;
6100         Parser.Lex();
6101 
6102         int64_t Temp;
6103         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6104           return MatchOperand_ParseFail;
6105         const int shift = i*2 + 2;
6106         Int += (Temp << shift);
6107       }
6108 
6109       if (getLexer().isNot(AsmToken::RBrac))
6110         return MatchOperand_ParseFail;
6111       Parser.Lex();
6112     } else {
6113       // sel:%d
6114       Parser.Lex();
6115       if (getParser().parseAbsoluteExpression(Int))
6116         return MatchOperand_ParseFail;
6117 
6118       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6119         Int |= DppCtrl::ROW_SHL0;
6120       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6121         Int |= DppCtrl::ROW_SHR0;
6122       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6123         Int |= DppCtrl::ROW_ROR0;
6124       } else if (Prefix == "wave_shl" && 1 == Int) {
6125         Int = DppCtrl::WAVE_SHL1;
6126       } else if (Prefix == "wave_rol" && 1 == Int) {
6127         Int = DppCtrl::WAVE_ROL1;
6128       } else if (Prefix == "wave_shr" && 1 == Int) {
6129         Int = DppCtrl::WAVE_SHR1;
6130       } else if (Prefix == "wave_ror" && 1 == Int) {
6131         Int = DppCtrl::WAVE_ROR1;
6132       } else if (Prefix == "row_bcast") {
6133         if (Int == 15) {
6134           Int = DppCtrl::BCAST15;
6135         } else if (Int == 31) {
6136           Int = DppCtrl::BCAST31;
6137         } else {
6138           return MatchOperand_ParseFail;
6139         }
6140       } else {
6141         return MatchOperand_ParseFail;
6142       }
6143     }
6144   }
6145 
6146   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6147   return MatchOperand_Success;
6148 }
6149 
6150 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6151   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6152 }
6153 
6154 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6155   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6156 }
6157 
6158 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6159   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6160 }
6161 
6162 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6163   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6164 }
6165 
6166 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
6167   OptionalImmIndexMap OptionalIdx;
6168 
6169   unsigned I = 1;
6170   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6171   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6172     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6173   }
6174 
6175   for (unsigned E = Operands.size(); I != E; ++I) {
6176     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6177                                             MCOI::TIED_TO);
6178     if (TiedTo != -1) {
6179       assert((unsigned)TiedTo < Inst.getNumOperands());
6180       // handle tied old or src2 for MAC instructions
6181       Inst.addOperand(Inst.getOperand(TiedTo));
6182     }
6183     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6184     // Add the register arguments
6185     if (Op.isReg() && Op.getReg() == AMDGPU::VCC) {
6186       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6187       // Skip it.
6188       continue;
6189     }
6190     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6191       Op.addRegWithFPInputModsOperands(Inst, 2);
6192     } else if (Op.isDPPCtrl()) {
6193       Op.addImmOperands(Inst, 1);
6194     } else if (Op.isImm()) {
6195       // Handle optional arguments
6196       OptionalIdx[Op.getImmTy()] = I;
6197     } else {
6198       llvm_unreachable("Invalid operand type");
6199     }
6200   }
6201 
6202   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6203   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6204   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6205 }
6206 
6207 //===----------------------------------------------------------------------===//
6208 // sdwa
6209 //===----------------------------------------------------------------------===//
6210 
6211 OperandMatchResultTy
6212 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6213                               AMDGPUOperand::ImmTy Type) {
6214   using namespace llvm::AMDGPU::SDWA;
6215 
6216   SMLoc S = Parser.getTok().getLoc();
6217   StringRef Value;
6218   OperandMatchResultTy res;
6219 
6220   res = parseStringWithPrefix(Prefix, Value);
6221   if (res != MatchOperand_Success) {
6222     return res;
6223   }
6224 
6225   int64_t Int;
6226   Int = StringSwitch<int64_t>(Value)
6227         .Case("BYTE_0", SdwaSel::BYTE_0)
6228         .Case("BYTE_1", SdwaSel::BYTE_1)
6229         .Case("BYTE_2", SdwaSel::BYTE_2)
6230         .Case("BYTE_3", SdwaSel::BYTE_3)
6231         .Case("WORD_0", SdwaSel::WORD_0)
6232         .Case("WORD_1", SdwaSel::WORD_1)
6233         .Case("DWORD", SdwaSel::DWORD)
6234         .Default(0xffffffff);
6235   Parser.Lex(); // eat last token
6236 
6237   if (Int == 0xffffffff) {
6238     return MatchOperand_ParseFail;
6239   }
6240 
6241   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6242   return MatchOperand_Success;
6243 }
6244 
6245 OperandMatchResultTy
6246 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6247   using namespace llvm::AMDGPU::SDWA;
6248 
6249   SMLoc S = Parser.getTok().getLoc();
6250   StringRef Value;
6251   OperandMatchResultTy res;
6252 
6253   res = parseStringWithPrefix("dst_unused", Value);
6254   if (res != MatchOperand_Success) {
6255     return res;
6256   }
6257 
6258   int64_t Int;
6259   Int = StringSwitch<int64_t>(Value)
6260         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6261         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6262         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6263         .Default(0xffffffff);
6264   Parser.Lex(); // eat last token
6265 
6266   if (Int == 0xffffffff) {
6267     return MatchOperand_ParseFail;
6268   }
6269 
6270   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6271   return MatchOperand_Success;
6272 }
6273 
6274 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6275   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6276 }
6277 
6278 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6279   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6280 }
6281 
6282 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6283   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6284 }
6285 
6286 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6287   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6288 }
6289 
6290 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6291                               uint64_t BasicInstType, bool skipVcc) {
6292   using namespace llvm::AMDGPU::SDWA;
6293 
6294   OptionalImmIndexMap OptionalIdx;
6295   bool skippedVcc = false;
6296 
6297   unsigned I = 1;
6298   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6299   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6300     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6301   }
6302 
6303   for (unsigned E = Operands.size(); I != E; ++I) {
6304     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6305     if (skipVcc && !skippedVcc && Op.isReg() && Op.getReg() == AMDGPU::VCC) {
6306       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6307       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6308       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6309       // Skip VCC only if we didn't skip it on previous iteration.
6310       if (BasicInstType == SIInstrFlags::VOP2 &&
6311           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6312         skippedVcc = true;
6313         continue;
6314       } else if (BasicInstType == SIInstrFlags::VOPC &&
6315                  Inst.getNumOperands() == 0) {
6316         skippedVcc = true;
6317         continue;
6318       }
6319     }
6320     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6321       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6322     } else if (Op.isImm()) {
6323       // Handle optional arguments
6324       OptionalIdx[Op.getImmTy()] = I;
6325     } else {
6326       llvm_unreachable("Invalid operand type");
6327     }
6328     skippedVcc = false;
6329   }
6330 
6331   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6332       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6333       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6334     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6335     switch (BasicInstType) {
6336     case SIInstrFlags::VOP1:
6337       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6338       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6339         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6340       }
6341       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6342       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6343       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6344       break;
6345 
6346     case SIInstrFlags::VOP2:
6347       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6348       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6349         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6350       }
6351       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6352       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6353       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6354       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6355       break;
6356 
6357     case SIInstrFlags::VOPC:
6358       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6359         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6360       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6361       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6362       break;
6363 
6364     default:
6365       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6366     }
6367   }
6368 
6369   // special case v_mac_{f16, f32}:
6370   // it has src2 register operand that is tied to dst operand
6371   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6372       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6373     auto it = Inst.begin();
6374     std::advance(
6375       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6376     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6377   }
6378 }
6379 
6380 /// Force static initialization.
6381 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6382   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6383   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6384 }
6385 
6386 #define GET_REGISTER_MATCHER
6387 #define GET_MATCHER_IMPLEMENTATION
6388 #define GET_MNEMONIC_SPELL_CHECKER
6389 #include "AMDGPUGenAsmMatcher.inc"
6390 
6391 // This fuction should be defined after auto-generated include so that we have
6392 // MatchClassKind enum defined
6393 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6394                                                      unsigned Kind) {
6395   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6396   // But MatchInstructionImpl() expects to meet token and fails to validate
6397   // operand. This method checks if we are given immediate operand but expect to
6398   // get corresponding token.
6399   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6400   switch (Kind) {
6401   case MCK_addr64:
6402     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6403   case MCK_gds:
6404     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6405   case MCK_lds:
6406     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6407   case MCK_glc:
6408     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6409   case MCK_idxen:
6410     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6411   case MCK_offen:
6412     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6413   case MCK_SSrcB32:
6414     // When operands have expression values, they will return true for isToken,
6415     // because it is not possible to distinguish between a token and an
6416     // expression at parse time. MatchInstructionImpl() will always try to
6417     // match an operand as a token, when isToken returns true, and when the
6418     // name of the expression is not a valid token, the match will fail,
6419     // so we need to handle it here.
6420     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6421   case MCK_SSrcF32:
6422     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6423   case MCK_SoppBrTarget:
6424     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6425   case MCK_VReg32OrOff:
6426     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6427   case MCK_InterpSlot:
6428     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6429   case MCK_Attr:
6430     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6431   case MCK_AttrChan:
6432     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6433   default:
6434     return Match_InvalidOperand;
6435   }
6436 }
6437 
6438 //===----------------------------------------------------------------------===//
6439 // endpgm
6440 //===----------------------------------------------------------------------===//
6441 
6442 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6443   SMLoc S = Parser.getTok().getLoc();
6444   int64_t Imm = 0;
6445 
6446   if (!parseExpr(Imm)) {
6447     // The operand is optional, if not present default to 0
6448     Imm = 0;
6449   }
6450 
6451   if (!isUInt<16>(Imm)) {
6452     Error(S, "expected a 16-bit value");
6453     return MatchOperand_ParseFail;
6454   }
6455 
6456   Operands.push_back(
6457       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6458   return MatchOperand_Success;
6459 }
6460 
6461 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
6462