1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/MC/MCInst.h"
32 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/MC/MCInstrInfo.h"
34 #include "llvm/MC/MCParser/MCAsmLexer.h"
35 #include "llvm/MC/MCParser/MCAsmParser.h"
36 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
38 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
39 #include "llvm/MC/MCRegisterInfo.h"
40 #include "llvm/MC/MCStreamer.h"
41 #include "llvm/MC/MCSubtargetInfo.h"
42 #include "llvm/MC/MCSymbol.h"
43 #include "llvm/Support/AMDGPUMetadata.h"
44 #include "llvm/Support/AMDHSAKernelDescriptor.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/MachineValueType.h"
49 #include "llvm/Support/MathExtras.h"
50 #include "llvm/Support/SMLoc.h"
51 #include "llvm/Support/TargetParser.h"
52 #include "llvm/Support/TargetRegistry.h"
53 #include "llvm/Support/raw_ostream.h"
54 #include <algorithm>
55 #include <cassert>
56 #include <cstdint>
57 #include <cstring>
58 #include <iterator>
59 #include <map>
60 #include <memory>
61 #include <string>
62 
63 using namespace llvm;
64 using namespace llvm::AMDGPU;
65 using namespace llvm::amdhsa;
66 
67 namespace {
68 
69 class AMDGPUAsmParser;
70 
71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
72 
73 //===----------------------------------------------------------------------===//
74 // Operand
75 //===----------------------------------------------------------------------===//
76 
77 class AMDGPUOperand : public MCParsedAsmOperand {
78   enum KindTy {
79     Token,
80     Immediate,
81     Register,
82     Expression
83   } Kind;
84 
85   SMLoc StartLoc, EndLoc;
86   const AMDGPUAsmParser *AsmParser;
87 
88 public:
89   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
90     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
91 
92   using Ptr = std::unique_ptr<AMDGPUOperand>;
93 
94   struct Modifiers {
95     bool Abs = false;
96     bool Neg = false;
97     bool Sext = false;
98 
99     bool hasFPModifiers() const { return Abs || Neg; }
100     bool hasIntModifiers() const { return Sext; }
101     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
102 
103     int64_t getFPModifiersOperand() const {
104       int64_t Operand = 0;
105       Operand |= Abs ? SISrcMods::ABS : 0u;
106       Operand |= Neg ? SISrcMods::NEG : 0u;
107       return Operand;
108     }
109 
110     int64_t getIntModifiersOperand() const {
111       int64_t Operand = 0;
112       Operand |= Sext ? SISrcMods::SEXT : 0u;
113       return Operand;
114     }
115 
116     int64_t getModifiersOperand() const {
117       assert(!(hasFPModifiers() && hasIntModifiers())
118            && "fp and int modifiers should not be used simultaneously");
119       if (hasFPModifiers()) {
120         return getFPModifiersOperand();
121       } else if (hasIntModifiers()) {
122         return getIntModifiersOperand();
123       } else {
124         return 0;
125       }
126     }
127 
128     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
129   };
130 
131   enum ImmTy {
132     ImmTyNone,
133     ImmTyGDS,
134     ImmTyLDS,
135     ImmTyOffen,
136     ImmTyIdxen,
137     ImmTyAddr64,
138     ImmTyOffset,
139     ImmTyInstOffset,
140     ImmTyOffset0,
141     ImmTyOffset1,
142     ImmTyGLC,
143     ImmTySLC,
144     ImmTyTFE,
145     ImmTyD16,
146     ImmTyClampSI,
147     ImmTyOModSI,
148     ImmTyDppCtrl,
149     ImmTyDppRowMask,
150     ImmTyDppBankMask,
151     ImmTyDppBoundCtrl,
152     ImmTySdwaDstSel,
153     ImmTySdwaSrc0Sel,
154     ImmTySdwaSrc1Sel,
155     ImmTySdwaDstUnused,
156     ImmTyDMask,
157     ImmTyUNorm,
158     ImmTyDA,
159     ImmTyR128A16,
160     ImmTyLWE,
161     ImmTyExpTgt,
162     ImmTyExpCompr,
163     ImmTyExpVM,
164     ImmTyFORMAT,
165     ImmTyHwreg,
166     ImmTyOff,
167     ImmTySendMsg,
168     ImmTyInterpSlot,
169     ImmTyInterpAttr,
170     ImmTyAttrChan,
171     ImmTyOpSel,
172     ImmTyOpSelHi,
173     ImmTyNegLo,
174     ImmTyNegHi,
175     ImmTySwizzle,
176     ImmTyGprIdxMode,
177     ImmTyEndpgm,
178     ImmTyHigh
179   };
180 
181   struct TokOp {
182     const char *Data;
183     unsigned Length;
184   };
185 
186   struct ImmOp {
187     int64_t Val;
188     ImmTy Type;
189     bool IsFPImm;
190     Modifiers Mods;
191   };
192 
193   struct RegOp {
194     unsigned RegNo;
195     bool IsForcedVOP3;
196     Modifiers Mods;
197   };
198 
199   union {
200     TokOp Tok;
201     ImmOp Imm;
202     RegOp Reg;
203     const MCExpr *Expr;
204   };
205 
206   bool isToken() const override {
207     if (Kind == Token)
208       return true;
209 
210     if (Kind != Expression || !Expr)
211       return false;
212 
213     // When parsing operands, we can't always tell if something was meant to be
214     // a token, like 'gds', or an expression that references a global variable.
215     // In this case, we assume the string is an expression, and if we need to
216     // interpret is a token, then we treat the symbol name as the token.
217     return isa<MCSymbolRefExpr>(Expr);
218   }
219 
220   bool isImm() const override {
221     return Kind == Immediate;
222   }
223 
224   bool isInlinableImm(MVT type) const;
225   bool isLiteralImm(MVT type) const;
226 
227   bool isRegKind() const {
228     return Kind == Register;
229   }
230 
231   bool isReg() const override {
232     return isRegKind() && !hasModifiers();
233   }
234 
235   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
236     return isRegClass(RCID) || isInlinableImm(type);
237   }
238 
239   bool isRegOrImmWithInt16InputMods() const {
240     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
241   }
242 
243   bool isRegOrImmWithInt32InputMods() const {
244     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
245   }
246 
247   bool isRegOrImmWithInt64InputMods() const {
248     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
249   }
250 
251   bool isRegOrImmWithFP16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
253   }
254 
255   bool isRegOrImmWithFP32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
257   }
258 
259   bool isRegOrImmWithFP64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
261   }
262 
263   bool isVReg() const {
264     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
265            isRegClass(AMDGPU::VReg_64RegClassID) ||
266            isRegClass(AMDGPU::VReg_96RegClassID) ||
267            isRegClass(AMDGPU::VReg_128RegClassID) ||
268            isRegClass(AMDGPU::VReg_256RegClassID) ||
269            isRegClass(AMDGPU::VReg_512RegClassID);
270   }
271 
272   bool isVReg32() const {
273     return isRegClass(AMDGPU::VGPR_32RegClassID);
274   }
275 
276   bool isVReg32OrOff() const {
277     return isOff() || isVReg32();
278   }
279 
280   bool isSDWAOperand(MVT type) const;
281   bool isSDWAFP16Operand() const;
282   bool isSDWAFP32Operand() const;
283   bool isSDWAInt16Operand() const;
284   bool isSDWAInt32Operand() const;
285 
286   bool isImmTy(ImmTy ImmT) const {
287     return isImm() && Imm.Type == ImmT;
288   }
289 
290   bool isImmModifier() const {
291     return isImm() && Imm.Type != ImmTyNone;
292   }
293 
294   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
295   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
296   bool isDMask() const { return isImmTy(ImmTyDMask); }
297   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
298   bool isDA() const { return isImmTy(ImmTyDA); }
299   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
300   bool isLWE() const { return isImmTy(ImmTyLWE); }
301   bool isOff() const { return isImmTy(ImmTyOff); }
302   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
303   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
304   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
305   bool isOffen() const { return isImmTy(ImmTyOffen); }
306   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
307   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
308   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
309   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
310   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
311 
312   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
313   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
314   bool isGDS() const { return isImmTy(ImmTyGDS); }
315   bool isLDS() const { return isImmTy(ImmTyLDS); }
316   bool isGLC() const { return isImmTy(ImmTyGLC); }
317   bool isSLC() const { return isImmTy(ImmTySLC); }
318   bool isTFE() const { return isImmTy(ImmTyTFE); }
319   bool isD16() const { return isImmTy(ImmTyD16); }
320   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
321   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
322   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
323   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
324   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
325   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
326   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
327   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
328   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
329   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
330   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
331   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
332   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
333   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
334   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
335   bool isHigh() const { return isImmTy(ImmTyHigh); }
336 
337   bool isMod() const {
338     return isClampSI() || isOModSI();
339   }
340 
341   bool isRegOrImm() const {
342     return isReg() || isImm();
343   }
344 
345   bool isRegClass(unsigned RCID) const;
346 
347   bool isInlineValue() const;
348 
349   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
350     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
351   }
352 
353   bool isSCSrcB16() const {
354     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
355   }
356 
357   bool isSCSrcV2B16() const {
358     return isSCSrcB16();
359   }
360 
361   bool isSCSrcB32() const {
362     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
363   }
364 
365   bool isSCSrcB64() const {
366     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
367   }
368 
369   bool isSCSrcF16() const {
370     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
371   }
372 
373   bool isSCSrcV2F16() const {
374     return isSCSrcF16();
375   }
376 
377   bool isSCSrcF32() const {
378     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
379   }
380 
381   bool isSCSrcF64() const {
382     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
383   }
384 
385   bool isSSrcB32() const {
386     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
387   }
388 
389   bool isSSrcB16() const {
390     return isSCSrcB16() || isLiteralImm(MVT::i16);
391   }
392 
393   bool isSSrcV2B16() const {
394     llvm_unreachable("cannot happen");
395     return isSSrcB16();
396   }
397 
398   bool isSSrcB64() const {
399     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
400     // See isVSrc64().
401     return isSCSrcB64() || isLiteralImm(MVT::i64);
402   }
403 
404   bool isSSrcF32() const {
405     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
406   }
407 
408   bool isSSrcF64() const {
409     return isSCSrcB64() || isLiteralImm(MVT::f64);
410   }
411 
412   bool isSSrcF16() const {
413     return isSCSrcB16() || isLiteralImm(MVT::f16);
414   }
415 
416   bool isSSrcV2F16() const {
417     llvm_unreachable("cannot happen");
418     return isSSrcF16();
419   }
420 
421   bool isSSrcOrLdsB32() const {
422     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
423            isLiteralImm(MVT::i32) || isExpr();
424   }
425 
426   bool isVCSrcB32() const {
427     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
428   }
429 
430   bool isVCSrcB64() const {
431     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
432   }
433 
434   bool isVCSrcB16() const {
435     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
436   }
437 
438   bool isVCSrcV2B16() const {
439     return isVCSrcB16();
440   }
441 
442   bool isVCSrcF32() const {
443     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
444   }
445 
446   bool isVCSrcF64() const {
447     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
448   }
449 
450   bool isVCSrcF16() const {
451     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
452   }
453 
454   bool isVCSrcV2F16() const {
455     return isVCSrcF16();
456   }
457 
458   bool isVSrcB32() const {
459     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
460   }
461 
462   bool isVSrcB64() const {
463     return isVCSrcF64() || isLiteralImm(MVT::i64);
464   }
465 
466   bool isVSrcB16() const {
467     return isVCSrcF16() || isLiteralImm(MVT::i16);
468   }
469 
470   bool isVSrcV2B16() const {
471     llvm_unreachable("cannot happen");
472     return isVSrcB16();
473   }
474 
475   bool isVSrcF32() const {
476     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
477   }
478 
479   bool isVSrcF64() const {
480     return isVCSrcF64() || isLiteralImm(MVT::f64);
481   }
482 
483   bool isVSrcF16() const {
484     return isVCSrcF16() || isLiteralImm(MVT::f16);
485   }
486 
487   bool isVSrcV2F16() const {
488     llvm_unreachable("cannot happen");
489     return isVSrcF16();
490   }
491 
492   bool isKImmFP32() const {
493     return isLiteralImm(MVT::f32);
494   }
495 
496   bool isKImmFP16() const {
497     return isLiteralImm(MVT::f16);
498   }
499 
500   bool isMem() const override {
501     return false;
502   }
503 
504   bool isExpr() const {
505     return Kind == Expression;
506   }
507 
508   bool isSoppBrTarget() const {
509     return isExpr() || isImm();
510   }
511 
512   bool isSWaitCnt() const;
513   bool isHwreg() const;
514   bool isSendMsg() const;
515   bool isSwizzle() const;
516   bool isSMRDOffset8() const;
517   bool isSMRDOffset20() const;
518   bool isSMRDLiteralOffset() const;
519   bool isDPPCtrl() const;
520   bool isGPRIdxMode() const;
521   bool isS16Imm() const;
522   bool isU16Imm() const;
523   bool isEndpgm() const;
524 
525   StringRef getExpressionAsToken() const {
526     assert(isExpr());
527     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
528     return S->getSymbol().getName();
529   }
530 
531   StringRef getToken() const {
532     assert(isToken());
533 
534     if (Kind == Expression)
535       return getExpressionAsToken();
536 
537     return StringRef(Tok.Data, Tok.Length);
538   }
539 
540   int64_t getImm() const {
541     assert(isImm());
542     return Imm.Val;
543   }
544 
545   ImmTy getImmTy() const {
546     assert(isImm());
547     return Imm.Type;
548   }
549 
550   unsigned getReg() const override {
551     return Reg.RegNo;
552   }
553 
554   SMLoc getStartLoc() const override {
555     return StartLoc;
556   }
557 
558   SMLoc getEndLoc() const override {
559     return EndLoc;
560   }
561 
562   SMRange getLocRange() const {
563     return SMRange(StartLoc, EndLoc);
564   }
565 
566   Modifiers getModifiers() const {
567     assert(isRegKind() || isImmTy(ImmTyNone));
568     return isRegKind() ? Reg.Mods : Imm.Mods;
569   }
570 
571   void setModifiers(Modifiers Mods) {
572     assert(isRegKind() || isImmTy(ImmTyNone));
573     if (isRegKind())
574       Reg.Mods = Mods;
575     else
576       Imm.Mods = Mods;
577   }
578 
579   bool hasModifiers() const {
580     return getModifiers().hasModifiers();
581   }
582 
583   bool hasFPModifiers() const {
584     return getModifiers().hasFPModifiers();
585   }
586 
587   bool hasIntModifiers() const {
588     return getModifiers().hasIntModifiers();
589   }
590 
591   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
592 
593   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
594 
595   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
596 
597   template <unsigned Bitwidth>
598   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
599 
600   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
601     addKImmFPOperands<16>(Inst, N);
602   }
603 
604   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
605     addKImmFPOperands<32>(Inst, N);
606   }
607 
608   void addRegOperands(MCInst &Inst, unsigned N) const;
609 
610   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
611     if (isRegKind())
612       addRegOperands(Inst, N);
613     else if (isExpr())
614       Inst.addOperand(MCOperand::createExpr(Expr));
615     else
616       addImmOperands(Inst, N);
617   }
618 
619   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
620     Modifiers Mods = getModifiers();
621     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
622     if (isRegKind()) {
623       addRegOperands(Inst, N);
624     } else {
625       addImmOperands(Inst, N, false);
626     }
627   }
628 
629   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
630     assert(!hasIntModifiers());
631     addRegOrImmWithInputModsOperands(Inst, N);
632   }
633 
634   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
635     assert(!hasFPModifiers());
636     addRegOrImmWithInputModsOperands(Inst, N);
637   }
638 
639   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
640     Modifiers Mods = getModifiers();
641     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
642     assert(isRegKind());
643     addRegOperands(Inst, N);
644   }
645 
646   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
647     assert(!hasIntModifiers());
648     addRegWithInputModsOperands(Inst, N);
649   }
650 
651   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
652     assert(!hasFPModifiers());
653     addRegWithInputModsOperands(Inst, N);
654   }
655 
656   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
657     if (isImm())
658       addImmOperands(Inst, N);
659     else {
660       assert(isExpr());
661       Inst.addOperand(MCOperand::createExpr(Expr));
662     }
663   }
664 
665   static void printImmTy(raw_ostream& OS, ImmTy Type) {
666     switch (Type) {
667     case ImmTyNone: OS << "None"; break;
668     case ImmTyGDS: OS << "GDS"; break;
669     case ImmTyLDS: OS << "LDS"; break;
670     case ImmTyOffen: OS << "Offen"; break;
671     case ImmTyIdxen: OS << "Idxen"; break;
672     case ImmTyAddr64: OS << "Addr64"; break;
673     case ImmTyOffset: OS << "Offset"; break;
674     case ImmTyInstOffset: OS << "InstOffset"; break;
675     case ImmTyOffset0: OS << "Offset0"; break;
676     case ImmTyOffset1: OS << "Offset1"; break;
677     case ImmTyGLC: OS << "GLC"; break;
678     case ImmTySLC: OS << "SLC"; break;
679     case ImmTyTFE: OS << "TFE"; break;
680     case ImmTyD16: OS << "D16"; break;
681     case ImmTyFORMAT: OS << "FORMAT"; break;
682     case ImmTyClampSI: OS << "ClampSI"; break;
683     case ImmTyOModSI: OS << "OModSI"; break;
684     case ImmTyDppCtrl: OS << "DppCtrl"; break;
685     case ImmTyDppRowMask: OS << "DppRowMask"; break;
686     case ImmTyDppBankMask: OS << "DppBankMask"; break;
687     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
688     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
689     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
690     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
691     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
692     case ImmTyDMask: OS << "DMask"; break;
693     case ImmTyUNorm: OS << "UNorm"; break;
694     case ImmTyDA: OS << "DA"; break;
695     case ImmTyR128A16: OS << "R128A16"; break;
696     case ImmTyLWE: OS << "LWE"; break;
697     case ImmTyOff: OS << "Off"; break;
698     case ImmTyExpTgt: OS << "ExpTgt"; break;
699     case ImmTyExpCompr: OS << "ExpCompr"; break;
700     case ImmTyExpVM: OS << "ExpVM"; break;
701     case ImmTyHwreg: OS << "Hwreg"; break;
702     case ImmTySendMsg: OS << "SendMsg"; break;
703     case ImmTyInterpSlot: OS << "InterpSlot"; break;
704     case ImmTyInterpAttr: OS << "InterpAttr"; break;
705     case ImmTyAttrChan: OS << "AttrChan"; break;
706     case ImmTyOpSel: OS << "OpSel"; break;
707     case ImmTyOpSelHi: OS << "OpSelHi"; break;
708     case ImmTyNegLo: OS << "NegLo"; break;
709     case ImmTyNegHi: OS << "NegHi"; break;
710     case ImmTySwizzle: OS << "Swizzle"; break;
711     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
712     case ImmTyHigh: OS << "High"; break;
713     case ImmTyEndpgm:
714       OS << "Endpgm";
715       break;
716     }
717   }
718 
719   void print(raw_ostream &OS) const override {
720     switch (Kind) {
721     case Register:
722       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
723       break;
724     case Immediate:
725       OS << '<' << getImm();
726       if (getImmTy() != ImmTyNone) {
727         OS << " type: "; printImmTy(OS, getImmTy());
728       }
729       OS << " mods: " << Imm.Mods << '>';
730       break;
731     case Token:
732       OS << '\'' << getToken() << '\'';
733       break;
734     case Expression:
735       OS << "<expr " << *Expr << '>';
736       break;
737     }
738   }
739 
740   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
741                                       int64_t Val, SMLoc Loc,
742                                       ImmTy Type = ImmTyNone,
743                                       bool IsFPImm = false) {
744     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
745     Op->Imm.Val = Val;
746     Op->Imm.IsFPImm = IsFPImm;
747     Op->Imm.Type = Type;
748     Op->Imm.Mods = Modifiers();
749     Op->StartLoc = Loc;
750     Op->EndLoc = Loc;
751     return Op;
752   }
753 
754   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
755                                         StringRef Str, SMLoc Loc,
756                                         bool HasExplicitEncodingSize = true) {
757     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
758     Res->Tok.Data = Str.data();
759     Res->Tok.Length = Str.size();
760     Res->StartLoc = Loc;
761     Res->EndLoc = Loc;
762     return Res;
763   }
764 
765   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
766                                       unsigned RegNo, SMLoc S,
767                                       SMLoc E,
768                                       bool ForceVOP3) {
769     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
770     Op->Reg.RegNo = RegNo;
771     Op->Reg.Mods = Modifiers();
772     Op->Reg.IsForcedVOP3 = ForceVOP3;
773     Op->StartLoc = S;
774     Op->EndLoc = E;
775     return Op;
776   }
777 
778   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
779                                        const class MCExpr *Expr, SMLoc S) {
780     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
781     Op->Expr = Expr;
782     Op->StartLoc = S;
783     Op->EndLoc = S;
784     return Op;
785   }
786 };
787 
788 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
789   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
790   return OS;
791 }
792 
793 //===----------------------------------------------------------------------===//
794 // AsmParser
795 //===----------------------------------------------------------------------===//
796 
797 // Holds info related to the current kernel, e.g. count of SGPRs used.
798 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
799 // .amdgpu_hsa_kernel or at EOF.
800 class KernelScopeInfo {
801   int SgprIndexUnusedMin = -1;
802   int VgprIndexUnusedMin = -1;
803   MCContext *Ctx = nullptr;
804 
805   void usesSgprAt(int i) {
806     if (i >= SgprIndexUnusedMin) {
807       SgprIndexUnusedMin = ++i;
808       if (Ctx) {
809         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
810         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
811       }
812     }
813   }
814 
815   void usesVgprAt(int i) {
816     if (i >= VgprIndexUnusedMin) {
817       VgprIndexUnusedMin = ++i;
818       if (Ctx) {
819         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
820         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
821       }
822     }
823   }
824 
825 public:
826   KernelScopeInfo() = default;
827 
828   void initialize(MCContext &Context) {
829     Ctx = &Context;
830     usesSgprAt(SgprIndexUnusedMin = -1);
831     usesVgprAt(VgprIndexUnusedMin = -1);
832   }
833 
834   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
835     switch (RegKind) {
836       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
837       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
838       default: break;
839     }
840   }
841 };
842 
843 class AMDGPUAsmParser : public MCTargetAsmParser {
844   MCAsmParser &Parser;
845 
846   // Number of extra operands parsed after the first optional operand.
847   // This may be necessary to skip hardcoded mandatory operands.
848   static const unsigned MAX_OPR_LOOKAHEAD = 8;
849 
850   unsigned ForcedEncodingSize = 0;
851   bool ForcedDPP = false;
852   bool ForcedSDWA = false;
853   KernelScopeInfo KernelScope;
854 
855   /// @name Auto-generated Match Functions
856   /// {
857 
858 #define GET_ASSEMBLER_HEADER
859 #include "AMDGPUGenAsmMatcher.inc"
860 
861   /// }
862 
863 private:
864   bool ParseAsAbsoluteExpression(uint32_t &Ret);
865   bool OutOfRangeError(SMRange Range);
866   /// Calculate VGPR/SGPR blocks required for given target, reserved
867   /// registers, and user-specified NextFreeXGPR values.
868   ///
869   /// \param Features [in] Target features, used for bug corrections.
870   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
871   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
872   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
873   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
874   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
875   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
876   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
877   /// \param VGPRBlocks [out] Result VGPR block count.
878   /// \param SGPRBlocks [out] Result SGPR block count.
879   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
880                           bool FlatScrUsed, bool XNACKUsed,
881                           unsigned NextFreeVGPR, SMRange VGPRRange,
882                           unsigned NextFreeSGPR, SMRange SGPRRange,
883                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
884   bool ParseDirectiveAMDGCNTarget();
885   bool ParseDirectiveAMDHSAKernel();
886   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
887   bool ParseDirectiveHSACodeObjectVersion();
888   bool ParseDirectiveHSACodeObjectISA();
889   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
890   bool ParseDirectiveAMDKernelCodeT();
891   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
892   bool ParseDirectiveAMDGPUHsaKernel();
893 
894   bool ParseDirectiveISAVersion();
895   bool ParseDirectiveHSAMetadata();
896   bool ParseDirectivePALMetadataBegin();
897   bool ParseDirectivePALMetadata();
898 
899   /// Common code to parse out a block of text (typically YAML) between start and
900   /// end directives.
901   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
902                            const char *AssemblerDirectiveEnd,
903                            std::string &CollectString);
904 
905   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
906                              RegisterKind RegKind, unsigned Reg1,
907                              unsigned RegNum);
908   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
909                            unsigned& RegNum, unsigned& RegWidth,
910                            unsigned *DwordRegIndex);
911   bool isRegister();
912   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
913   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
914   void initializeGprCountSymbol(RegisterKind RegKind);
915   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
916                              unsigned RegWidth);
917   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
918                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
919   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
920                  bool IsGdsHardcoded);
921 
922 public:
923   enum AMDGPUMatchResultTy {
924     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
925   };
926 
927   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
928 
929   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
930                const MCInstrInfo &MII,
931                const MCTargetOptions &Options)
932       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
933     MCAsmParserExtension::Initialize(Parser);
934 
935     if (getFeatureBits().none()) {
936       // Set default features.
937       copySTI().ToggleFeature("southern-islands");
938     }
939 
940     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
941 
942     {
943       // TODO: make those pre-defined variables read-only.
944       // Currently there is none suitable machinery in the core llvm-mc for this.
945       // MCSymbol::isRedefinable is intended for another purpose, and
946       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
947       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
948       MCContext &Ctx = getContext();
949       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
950         MCSymbol *Sym =
951             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
952         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
953         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
954         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
955         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
956         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
957       } else {
958         MCSymbol *Sym =
959             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
960         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
961         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
962         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
963         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
964         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
965       }
966       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
967         initializeGprCountSymbol(IS_VGPR);
968         initializeGprCountSymbol(IS_SGPR);
969       } else
970         KernelScope.initialize(getContext());
971     }
972   }
973 
974   bool hasXNACK() const {
975     return AMDGPU::hasXNACK(getSTI());
976   }
977 
978   bool hasMIMG_R128() const {
979     return AMDGPU::hasMIMG_R128(getSTI());
980   }
981 
982   bool hasPackedD16() const {
983     return AMDGPU::hasPackedD16(getSTI());
984   }
985 
986   bool isSI() const {
987     return AMDGPU::isSI(getSTI());
988   }
989 
990   bool isCI() const {
991     return AMDGPU::isCI(getSTI());
992   }
993 
994   bool isVI() const {
995     return AMDGPU::isVI(getSTI());
996   }
997 
998   bool isGFX9() const {
999     return AMDGPU::isGFX9(getSTI());
1000   }
1001 
1002   bool hasInv2PiInlineImm() const {
1003     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1004   }
1005 
1006   bool hasFlatOffsets() const {
1007     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1008   }
1009 
1010   bool hasSGPR102_SGPR103() const {
1011     return !isVI();
1012   }
1013 
1014   bool hasIntClamp() const {
1015     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1016   }
1017 
1018   AMDGPUTargetStreamer &getTargetStreamer() {
1019     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1020     return static_cast<AMDGPUTargetStreamer &>(TS);
1021   }
1022 
1023   const MCRegisterInfo *getMRI() const {
1024     // We need this const_cast because for some reason getContext() is not const
1025     // in MCAsmParser.
1026     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1027   }
1028 
1029   const MCInstrInfo *getMII() const {
1030     return &MII;
1031   }
1032 
1033   const FeatureBitset &getFeatureBits() const {
1034     return getSTI().getFeatureBits();
1035   }
1036 
1037   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1038   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1039   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1040 
1041   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1042   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1043   bool isForcedDPP() const { return ForcedDPP; }
1044   bool isForcedSDWA() const { return ForcedSDWA; }
1045   ArrayRef<unsigned> getMatchedVariants() const;
1046 
1047   std::unique_ptr<AMDGPUOperand> parseRegister();
1048   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1049   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1050   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1051                                       unsigned Kind) override;
1052   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1053                                OperandVector &Operands, MCStreamer &Out,
1054                                uint64_t &ErrorInfo,
1055                                bool MatchingInlineAsm) override;
1056   bool ParseDirective(AsmToken DirectiveID) override;
1057   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
1058   StringRef parseMnemonicSuffix(StringRef Name);
1059   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1060                         SMLoc NameLoc, OperandVector &Operands) override;
1061   //bool ProcessInstruction(MCInst &Inst);
1062 
1063   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1064 
1065   OperandMatchResultTy
1066   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1067                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1068                      bool (*ConvertResult)(int64_t &) = nullptr);
1069 
1070   OperandMatchResultTy parseOperandArrayWithPrefix(
1071     const char *Prefix,
1072     OperandVector &Operands,
1073     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1074     bool (*ConvertResult)(int64_t&) = nullptr);
1075 
1076   OperandMatchResultTy
1077   parseNamedBit(const char *Name, OperandVector &Operands,
1078                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1079   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1080                                              StringRef &Value);
1081 
1082   bool parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier = false);
1083   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1084   OperandMatchResultTy parseReg(OperandVector &Operands);
1085   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
1086   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1087   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1088   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1089   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1090   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1091   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1092 
1093   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1094   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1095   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1096   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1097 
1098   bool parseCnt(int64_t &IntVal);
1099   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1100   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1101 
1102 private:
1103   struct OperandInfoTy {
1104     int64_t Id;
1105     bool IsSymbolic = false;
1106 
1107     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1108   };
1109 
1110   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1111   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1112 
1113   void errorExpTgt();
1114   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1115 
1116   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1117   bool validateSOPLiteral(const MCInst &Inst) const;
1118   bool validateConstantBusLimitations(const MCInst &Inst);
1119   bool validateEarlyClobberLimitations(const MCInst &Inst);
1120   bool validateIntClampSupported(const MCInst &Inst);
1121   bool validateMIMGAtomicDMask(const MCInst &Inst);
1122   bool validateMIMGGatherDMask(const MCInst &Inst);
1123   bool validateMIMGDataSize(const MCInst &Inst);
1124   bool validateMIMGD16(const MCInst &Inst);
1125   bool validateLdsDirect(const MCInst &Inst);
1126   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1127   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1128   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1129 
1130   bool isId(const StringRef Id) const;
1131   bool isId(const AsmToken &Token, const StringRef Id) const;
1132   bool isToken(const AsmToken::TokenKind Kind) const;
1133   bool trySkipId(const StringRef Id);
1134   bool trySkipToken(const AsmToken::TokenKind Kind);
1135   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1136   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1137   AsmToken::TokenKind getTokenKind() const;
1138   bool parseExpr(int64_t &Imm);
1139   StringRef getTokenStr() const;
1140   AsmToken peekToken();
1141   AsmToken getToken() const;
1142   SMLoc getLoc() const;
1143   void lex();
1144 
1145 public:
1146   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1147   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1148 
1149   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1150   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1151   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1152   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1153   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1154 
1155   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1156                             const unsigned MinVal,
1157                             const unsigned MaxVal,
1158                             const StringRef ErrMsg);
1159   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1160   bool parseSwizzleOffset(int64_t &Imm);
1161   bool parseSwizzleMacro(int64_t &Imm);
1162   bool parseSwizzleQuadPerm(int64_t &Imm);
1163   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1164   bool parseSwizzleBroadcast(int64_t &Imm);
1165   bool parseSwizzleSwap(int64_t &Imm);
1166   bool parseSwizzleReverse(int64_t &Imm);
1167 
1168   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1169   int64_t parseGPRIdxMacro();
1170 
1171   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1172   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1173   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1174   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1175   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1176 
1177   AMDGPUOperand::Ptr defaultGLC() const;
1178   AMDGPUOperand::Ptr defaultSLC() const;
1179 
1180   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1181   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1182   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1183   AMDGPUOperand::Ptr defaultOffsetU12() const;
1184   AMDGPUOperand::Ptr defaultOffsetS13() const;
1185 
1186   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1187 
1188   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1189                OptionalImmIndexMap &OptionalIdx);
1190   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1191   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1192   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1193 
1194   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1195 
1196   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1197                bool IsAtomic = false);
1198   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1199 
1200   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1201   AMDGPUOperand::Ptr defaultRowMask() const;
1202   AMDGPUOperand::Ptr defaultBankMask() const;
1203   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1204   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1205 
1206   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1207                                     AMDGPUOperand::ImmTy Type);
1208   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1209   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1210   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1211   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1212   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1213   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1214                 uint64_t BasicInstType, bool skipVcc = false);
1215 
1216   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1217   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1218 };
1219 
1220 struct OptionalOperand {
1221   const char *Name;
1222   AMDGPUOperand::ImmTy Type;
1223   bool IsBit;
1224   bool (*ConvertResult)(int64_t&);
1225 };
1226 
1227 } // end anonymous namespace
1228 
1229 // May be called with integer type with equivalent bitwidth.
1230 static const fltSemantics *getFltSemantics(unsigned Size) {
1231   switch (Size) {
1232   case 4:
1233     return &APFloat::IEEEsingle();
1234   case 8:
1235     return &APFloat::IEEEdouble();
1236   case 2:
1237     return &APFloat::IEEEhalf();
1238   default:
1239     llvm_unreachable("unsupported fp type");
1240   }
1241 }
1242 
1243 static const fltSemantics *getFltSemantics(MVT VT) {
1244   return getFltSemantics(VT.getSizeInBits() / 8);
1245 }
1246 
1247 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1248   switch (OperandType) {
1249   case AMDGPU::OPERAND_REG_IMM_INT32:
1250   case AMDGPU::OPERAND_REG_IMM_FP32:
1251   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1252   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1253     return &APFloat::IEEEsingle();
1254   case AMDGPU::OPERAND_REG_IMM_INT64:
1255   case AMDGPU::OPERAND_REG_IMM_FP64:
1256   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1257   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1258     return &APFloat::IEEEdouble();
1259   case AMDGPU::OPERAND_REG_IMM_INT16:
1260   case AMDGPU::OPERAND_REG_IMM_FP16:
1261   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1262   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1263   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1264   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1265     return &APFloat::IEEEhalf();
1266   default:
1267     llvm_unreachable("unsupported fp type");
1268   }
1269 }
1270 
1271 //===----------------------------------------------------------------------===//
1272 // Operand
1273 //===----------------------------------------------------------------------===//
1274 
1275 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1276   bool Lost;
1277 
1278   // Convert literal to single precision
1279   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1280                                                APFloat::rmNearestTiesToEven,
1281                                                &Lost);
1282   // We allow precision lost but not overflow or underflow
1283   if (Status != APFloat::opOK &&
1284       Lost &&
1285       ((Status & APFloat::opOverflow)  != 0 ||
1286        (Status & APFloat::opUnderflow) != 0)) {
1287     return false;
1288   }
1289 
1290   return true;
1291 }
1292 
1293 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1294   return isUIntN(Size, Val) || isIntN(Size, Val);
1295 }
1296 
1297 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1298 
1299   // This is a hack to enable named inline values like
1300   // shared_base with both 32-bit and 64-bit operands.
1301   // Note that these values are defined as
1302   // 32-bit operands only.
1303   if (isInlineValue()) {
1304     return true;
1305   }
1306 
1307   if (!isImmTy(ImmTyNone)) {
1308     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1309     return false;
1310   }
1311   // TODO: We should avoid using host float here. It would be better to
1312   // check the float bit values which is what a few other places do.
1313   // We've had bot failures before due to weird NaN support on mips hosts.
1314 
1315   APInt Literal(64, Imm.Val);
1316 
1317   if (Imm.IsFPImm) { // We got fp literal token
1318     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1319       return AMDGPU::isInlinableLiteral64(Imm.Val,
1320                                           AsmParser->hasInv2PiInlineImm());
1321     }
1322 
1323     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1324     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1325       return false;
1326 
1327     if (type.getScalarSizeInBits() == 16) {
1328       return AMDGPU::isInlinableLiteral16(
1329         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1330         AsmParser->hasInv2PiInlineImm());
1331     }
1332 
1333     // Check if single precision literal is inlinable
1334     return AMDGPU::isInlinableLiteral32(
1335       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1336       AsmParser->hasInv2PiInlineImm());
1337   }
1338 
1339   // We got int literal token.
1340   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1341     return AMDGPU::isInlinableLiteral64(Imm.Val,
1342                                         AsmParser->hasInv2PiInlineImm());
1343   }
1344 
1345   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1346     return false;
1347   }
1348 
1349   if (type.getScalarSizeInBits() == 16) {
1350     return AMDGPU::isInlinableLiteral16(
1351       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1352       AsmParser->hasInv2PiInlineImm());
1353   }
1354 
1355   return AMDGPU::isInlinableLiteral32(
1356     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1357     AsmParser->hasInv2PiInlineImm());
1358 }
1359 
1360 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1361   // Check that this immediate can be added as literal
1362   if (!isImmTy(ImmTyNone)) {
1363     return false;
1364   }
1365 
1366   if (!Imm.IsFPImm) {
1367     // We got int literal token.
1368 
1369     if (type == MVT::f64 && hasFPModifiers()) {
1370       // Cannot apply fp modifiers to int literals preserving the same semantics
1371       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1372       // disable these cases.
1373       return false;
1374     }
1375 
1376     unsigned Size = type.getSizeInBits();
1377     if (Size == 64)
1378       Size = 32;
1379 
1380     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1381     // types.
1382     return isSafeTruncation(Imm.Val, Size);
1383   }
1384 
1385   // We got fp literal token
1386   if (type == MVT::f64) { // Expected 64-bit fp operand
1387     // We would set low 64-bits of literal to zeroes but we accept this literals
1388     return true;
1389   }
1390 
1391   if (type == MVT::i64) { // Expected 64-bit int operand
1392     // We don't allow fp literals in 64-bit integer instructions. It is
1393     // unclear how we should encode them.
1394     return false;
1395   }
1396 
1397   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1398   return canLosslesslyConvertToFPType(FPLiteral, type);
1399 }
1400 
1401 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1402   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1403 }
1404 
1405 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1406   if (AsmParser->isVI())
1407     return isVReg32();
1408   else if (AsmParser->isGFX9())
1409     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1410   else
1411     return false;
1412 }
1413 
1414 bool AMDGPUOperand::isSDWAFP16Operand() const {
1415   return isSDWAOperand(MVT::f16);
1416 }
1417 
1418 bool AMDGPUOperand::isSDWAFP32Operand() const {
1419   return isSDWAOperand(MVT::f32);
1420 }
1421 
1422 bool AMDGPUOperand::isSDWAInt16Operand() const {
1423   return isSDWAOperand(MVT::i16);
1424 }
1425 
1426 bool AMDGPUOperand::isSDWAInt32Operand() const {
1427   return isSDWAOperand(MVT::i32);
1428 }
1429 
1430 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1431 {
1432   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1433   assert(Size == 2 || Size == 4 || Size == 8);
1434 
1435   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1436 
1437   if (Imm.Mods.Abs) {
1438     Val &= ~FpSignMask;
1439   }
1440   if (Imm.Mods.Neg) {
1441     Val ^= FpSignMask;
1442   }
1443 
1444   return Val;
1445 }
1446 
1447 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1448   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1449                              Inst.getNumOperands())) {
1450     addLiteralImmOperand(Inst, Imm.Val,
1451                          ApplyModifiers &
1452                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1453   } else {
1454     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1455     Inst.addOperand(MCOperand::createImm(Imm.Val));
1456   }
1457 }
1458 
1459 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1460   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1461   auto OpNum = Inst.getNumOperands();
1462   // Check that this operand accepts literals
1463   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1464 
1465   if (ApplyModifiers) {
1466     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1467     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1468     Val = applyInputFPModifiers(Val, Size);
1469   }
1470 
1471   APInt Literal(64, Val);
1472   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1473 
1474   if (Imm.IsFPImm) { // We got fp literal token
1475     switch (OpTy) {
1476     case AMDGPU::OPERAND_REG_IMM_INT64:
1477     case AMDGPU::OPERAND_REG_IMM_FP64:
1478     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1479     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1480       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1481                                        AsmParser->hasInv2PiInlineImm())) {
1482         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1483         return;
1484       }
1485 
1486       // Non-inlineable
1487       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1488         // For fp operands we check if low 32 bits are zeros
1489         if (Literal.getLoBits(32) != 0) {
1490           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1491           "Can't encode literal as exact 64-bit floating-point operand. "
1492           "Low 32-bits will be set to zero");
1493         }
1494 
1495         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1496         return;
1497       }
1498 
1499       // We don't allow fp literals in 64-bit integer instructions. It is
1500       // unclear how we should encode them. This case should be checked earlier
1501       // in predicate methods (isLiteralImm())
1502       llvm_unreachable("fp literal in 64-bit integer instruction.");
1503 
1504     case AMDGPU::OPERAND_REG_IMM_INT32:
1505     case AMDGPU::OPERAND_REG_IMM_FP32:
1506     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1507     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1508     case AMDGPU::OPERAND_REG_IMM_INT16:
1509     case AMDGPU::OPERAND_REG_IMM_FP16:
1510     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1511     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1512     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1513     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1514       bool lost;
1515       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1516       // Convert literal to single precision
1517       FPLiteral.convert(*getOpFltSemantics(OpTy),
1518                         APFloat::rmNearestTiesToEven, &lost);
1519       // We allow precision lost but not overflow or underflow. This should be
1520       // checked earlier in isLiteralImm()
1521 
1522       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1523       Inst.addOperand(MCOperand::createImm(ImmVal));
1524       return;
1525     }
1526     default:
1527       llvm_unreachable("invalid operand size");
1528     }
1529 
1530     return;
1531   }
1532 
1533   // We got int literal token.
1534   // Only sign extend inline immediates.
1535   switch (OpTy) {
1536   case AMDGPU::OPERAND_REG_IMM_INT32:
1537   case AMDGPU::OPERAND_REG_IMM_FP32:
1538   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1539   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1540     if (isSafeTruncation(Val, 32) &&
1541         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1542                                      AsmParser->hasInv2PiInlineImm())) {
1543       Inst.addOperand(MCOperand::createImm(Val));
1544       return;
1545     }
1546 
1547     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1548     return;
1549 
1550   case AMDGPU::OPERAND_REG_IMM_INT64:
1551   case AMDGPU::OPERAND_REG_IMM_FP64:
1552   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1553   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1554     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1555       Inst.addOperand(MCOperand::createImm(Val));
1556       return;
1557     }
1558 
1559     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1560     return;
1561 
1562   case AMDGPU::OPERAND_REG_IMM_INT16:
1563   case AMDGPU::OPERAND_REG_IMM_FP16:
1564   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1565   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1566     if (isSafeTruncation(Val, 16) &&
1567         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1568                                      AsmParser->hasInv2PiInlineImm())) {
1569       Inst.addOperand(MCOperand::createImm(Val));
1570       return;
1571     }
1572 
1573     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1574     return;
1575 
1576   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1577   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1578     assert(isSafeTruncation(Val, 16));
1579     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1580                                         AsmParser->hasInv2PiInlineImm()));
1581 
1582     Inst.addOperand(MCOperand::createImm(Val));
1583     return;
1584   }
1585   default:
1586     llvm_unreachable("invalid operand size");
1587   }
1588 }
1589 
1590 template <unsigned Bitwidth>
1591 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1592   APInt Literal(64, Imm.Val);
1593 
1594   if (!Imm.IsFPImm) {
1595     // We got int literal token.
1596     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1597     return;
1598   }
1599 
1600   bool Lost;
1601   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1602   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1603                     APFloat::rmNearestTiesToEven, &Lost);
1604   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1605 }
1606 
1607 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1608   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1609 }
1610 
1611 static bool isInlineValue(unsigned Reg) {
1612   switch (Reg) {
1613   case AMDGPU::SRC_SHARED_BASE:
1614   case AMDGPU::SRC_SHARED_LIMIT:
1615   case AMDGPU::SRC_PRIVATE_BASE:
1616   case AMDGPU::SRC_PRIVATE_LIMIT:
1617   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1618     return true;
1619   default:
1620     return false;
1621   }
1622 }
1623 
1624 bool AMDGPUOperand::isInlineValue() const {
1625   return isRegKind() && ::isInlineValue(getReg());
1626 }
1627 
1628 //===----------------------------------------------------------------------===//
1629 // AsmParser
1630 //===----------------------------------------------------------------------===//
1631 
1632 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1633   if (Is == IS_VGPR) {
1634     switch (RegWidth) {
1635       default: return -1;
1636       case 1: return AMDGPU::VGPR_32RegClassID;
1637       case 2: return AMDGPU::VReg_64RegClassID;
1638       case 3: return AMDGPU::VReg_96RegClassID;
1639       case 4: return AMDGPU::VReg_128RegClassID;
1640       case 8: return AMDGPU::VReg_256RegClassID;
1641       case 16: return AMDGPU::VReg_512RegClassID;
1642     }
1643   } else if (Is == IS_TTMP) {
1644     switch (RegWidth) {
1645       default: return -1;
1646       case 1: return AMDGPU::TTMP_32RegClassID;
1647       case 2: return AMDGPU::TTMP_64RegClassID;
1648       case 4: return AMDGPU::TTMP_128RegClassID;
1649       case 8: return AMDGPU::TTMP_256RegClassID;
1650       case 16: return AMDGPU::TTMP_512RegClassID;
1651     }
1652   } else if (Is == IS_SGPR) {
1653     switch (RegWidth) {
1654       default: return -1;
1655       case 1: return AMDGPU::SGPR_32RegClassID;
1656       case 2: return AMDGPU::SGPR_64RegClassID;
1657       case 4: return AMDGPU::SGPR_128RegClassID;
1658       case 8: return AMDGPU::SGPR_256RegClassID;
1659       case 16: return AMDGPU::SGPR_512RegClassID;
1660     }
1661   }
1662   return -1;
1663 }
1664 
1665 static unsigned getSpecialRegForName(StringRef RegName) {
1666   return StringSwitch<unsigned>(RegName)
1667     .Case("exec", AMDGPU::EXEC)
1668     .Case("vcc", AMDGPU::VCC)
1669     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1670     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1671     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1672     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1673     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1674     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1675     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1676     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1677     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1678     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1679     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1680     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1681     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1682     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1683     .Case("m0", AMDGPU::M0)
1684     .Case("scc", AMDGPU::SCC)
1685     .Case("tba", AMDGPU::TBA)
1686     .Case("tma", AMDGPU::TMA)
1687     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1688     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1689     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1690     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1691     .Case("vcc_lo", AMDGPU::VCC_LO)
1692     .Case("vcc_hi", AMDGPU::VCC_HI)
1693     .Case("exec_lo", AMDGPU::EXEC_LO)
1694     .Case("exec_hi", AMDGPU::EXEC_HI)
1695     .Case("tma_lo", AMDGPU::TMA_LO)
1696     .Case("tma_hi", AMDGPU::TMA_HI)
1697     .Case("tba_lo", AMDGPU::TBA_LO)
1698     .Case("tba_hi", AMDGPU::TBA_HI)
1699     .Default(0);
1700 }
1701 
1702 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1703                                     SMLoc &EndLoc) {
1704   auto R = parseRegister();
1705   if (!R) return true;
1706   assert(R->isReg());
1707   RegNo = R->getReg();
1708   StartLoc = R->getStartLoc();
1709   EndLoc = R->getEndLoc();
1710   return false;
1711 }
1712 
1713 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1714                                             RegisterKind RegKind, unsigned Reg1,
1715                                             unsigned RegNum) {
1716   switch (RegKind) {
1717   case IS_SPECIAL:
1718     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1719       Reg = AMDGPU::EXEC;
1720       RegWidth = 2;
1721       return true;
1722     }
1723     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1724       Reg = AMDGPU::FLAT_SCR;
1725       RegWidth = 2;
1726       return true;
1727     }
1728     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1729       Reg = AMDGPU::XNACK_MASK;
1730       RegWidth = 2;
1731       return true;
1732     }
1733     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1734       Reg = AMDGPU::VCC;
1735       RegWidth = 2;
1736       return true;
1737     }
1738     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1739       Reg = AMDGPU::TBA;
1740       RegWidth = 2;
1741       return true;
1742     }
1743     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1744       Reg = AMDGPU::TMA;
1745       RegWidth = 2;
1746       return true;
1747     }
1748     return false;
1749   case IS_VGPR:
1750   case IS_SGPR:
1751   case IS_TTMP:
1752     if (Reg1 != Reg + RegWidth) {
1753       return false;
1754     }
1755     RegWidth++;
1756     return true;
1757   default:
1758     llvm_unreachable("unexpected register kind");
1759   }
1760 }
1761 
1762 static const StringRef Registers[] = {
1763   { "v" },
1764   { "s" },
1765   { "ttmp" },
1766 };
1767 
1768 bool
1769 AMDGPUAsmParser::isRegister(const AsmToken &Token,
1770                             const AsmToken &NextToken) const {
1771 
1772   // A list of consecutive registers: [s0,s1,s2,s3]
1773   if (Token.is(AsmToken::LBrac))
1774     return true;
1775 
1776   if (!Token.is(AsmToken::Identifier))
1777     return false;
1778 
1779   // A single register like s0 or a range of registers like s[0:1]
1780 
1781   StringRef RegName = Token.getString();
1782 
1783   for (StringRef Reg : Registers) {
1784     if (RegName.startswith(Reg)) {
1785       if (Reg.size() < RegName.size()) {
1786         unsigned RegNum;
1787         // A single register with an index: rXX
1788         if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
1789           return true;
1790       } else {
1791         // A range of registers: r[XX:YY].
1792         if (NextToken.is(AsmToken::LBrac))
1793           return true;
1794       }
1795     }
1796   }
1797 
1798   return getSpecialRegForName(RegName);
1799 }
1800 
1801 bool
1802 AMDGPUAsmParser::isRegister()
1803 {
1804   return isRegister(getToken(), peekToken());
1805 }
1806 
1807 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1808                                           unsigned &RegNum, unsigned &RegWidth,
1809                                           unsigned *DwordRegIndex) {
1810   if (DwordRegIndex) { *DwordRegIndex = 0; }
1811   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1812   if (getLexer().is(AsmToken::Identifier)) {
1813     StringRef RegName = Parser.getTok().getString();
1814     if ((Reg = getSpecialRegForName(RegName))) {
1815       Parser.Lex();
1816       RegKind = IS_SPECIAL;
1817     } else {
1818       unsigned RegNumIndex = 0;
1819       if (RegName[0] == 'v') {
1820         RegNumIndex = 1;
1821         RegKind = IS_VGPR;
1822       } else if (RegName[0] == 's') {
1823         RegNumIndex = 1;
1824         RegKind = IS_SGPR;
1825       } else if (RegName.startswith("ttmp")) {
1826         RegNumIndex = strlen("ttmp");
1827         RegKind = IS_TTMP;
1828       } else {
1829         return false;
1830       }
1831       if (RegName.size() > RegNumIndex) {
1832         // Single 32-bit register: vXX.
1833         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1834           return false;
1835         Parser.Lex();
1836         RegWidth = 1;
1837       } else {
1838         // Range of registers: v[XX:YY]. ":YY" is optional.
1839         Parser.Lex();
1840         int64_t RegLo, RegHi;
1841         if (getLexer().isNot(AsmToken::LBrac))
1842           return false;
1843         Parser.Lex();
1844 
1845         if (getParser().parseAbsoluteExpression(RegLo))
1846           return false;
1847 
1848         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1849         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1850           return false;
1851         Parser.Lex();
1852 
1853         if (isRBrace) {
1854           RegHi = RegLo;
1855         } else {
1856           if (getParser().parseAbsoluteExpression(RegHi))
1857             return false;
1858 
1859           if (getLexer().isNot(AsmToken::RBrac))
1860             return false;
1861           Parser.Lex();
1862         }
1863         RegNum = (unsigned) RegLo;
1864         RegWidth = (RegHi - RegLo) + 1;
1865       }
1866     }
1867   } else if (getLexer().is(AsmToken::LBrac)) {
1868     // List of consecutive registers: [s0,s1,s2,s3]
1869     Parser.Lex();
1870     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1871       return false;
1872     if (RegWidth != 1)
1873       return false;
1874     RegisterKind RegKind1;
1875     unsigned Reg1, RegNum1, RegWidth1;
1876     do {
1877       if (getLexer().is(AsmToken::Comma)) {
1878         Parser.Lex();
1879       } else if (getLexer().is(AsmToken::RBrac)) {
1880         Parser.Lex();
1881         break;
1882       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1883         if (RegWidth1 != 1) {
1884           return false;
1885         }
1886         if (RegKind1 != RegKind) {
1887           return false;
1888         }
1889         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1890           return false;
1891         }
1892       } else {
1893         return false;
1894       }
1895     } while (true);
1896   } else {
1897     return false;
1898   }
1899   switch (RegKind) {
1900   case IS_SPECIAL:
1901     RegNum = 0;
1902     RegWidth = 1;
1903     break;
1904   case IS_VGPR:
1905   case IS_SGPR:
1906   case IS_TTMP:
1907   {
1908     unsigned Size = 1;
1909     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1910       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1911       Size = std::min(RegWidth, 4u);
1912     }
1913     if (RegNum % Size != 0)
1914       return false;
1915     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1916     RegNum = RegNum / Size;
1917     int RCID = getRegClass(RegKind, RegWidth);
1918     if (RCID == -1)
1919       return false;
1920     const MCRegisterClass RC = TRI->getRegClass(RCID);
1921     if (RegNum >= RC.getNumRegs())
1922       return false;
1923     Reg = RC.getRegister(RegNum);
1924     break;
1925   }
1926 
1927   default:
1928     llvm_unreachable("unexpected register kind");
1929   }
1930 
1931   if (!subtargetHasRegister(*TRI, Reg))
1932     return false;
1933   return true;
1934 }
1935 
1936 Optional<StringRef>
1937 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
1938   switch (RegKind) {
1939   case IS_VGPR:
1940     return StringRef(".amdgcn.next_free_vgpr");
1941   case IS_SGPR:
1942     return StringRef(".amdgcn.next_free_sgpr");
1943   default:
1944     return None;
1945   }
1946 }
1947 
1948 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
1949   auto SymbolName = getGprCountSymbolName(RegKind);
1950   assert(SymbolName && "initializing invalid register kind");
1951   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1952   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
1953 }
1954 
1955 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
1956                                             unsigned DwordRegIndex,
1957                                             unsigned RegWidth) {
1958   // Symbols are only defined for GCN targets
1959   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
1960     return true;
1961 
1962   auto SymbolName = getGprCountSymbolName(RegKind);
1963   if (!SymbolName)
1964     return true;
1965   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1966 
1967   int64_t NewMax = DwordRegIndex + RegWidth - 1;
1968   int64_t OldCount;
1969 
1970   if (!Sym->isVariable())
1971     return !Error(getParser().getTok().getLoc(),
1972                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
1973   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
1974     return !Error(
1975         getParser().getTok().getLoc(),
1976         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
1977 
1978   if (OldCount <= NewMax)
1979     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
1980 
1981   return true;
1982 }
1983 
1984 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1985   const auto &Tok = Parser.getTok();
1986   SMLoc StartLoc = Tok.getLoc();
1987   SMLoc EndLoc = Tok.getEndLoc();
1988   RegisterKind RegKind;
1989   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1990 
1991   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
1992     //FIXME: improve error messages (bug 41303).
1993     Error(StartLoc, "not a valid operand.");
1994     return nullptr;
1995   }
1996   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1997     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
1998       return nullptr;
1999   } else
2000     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2001   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
2002 }
2003 
2004 bool
2005 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier) {
2006   if (HasSP3AbsModifier) {
2007     // This is a workaround for handling expressions
2008     // as arguments of SP3 'abs' modifier, for example:
2009     //     |1.0|
2010     //     |-1|
2011     //     |1+x|
2012     // This syntax is not compatible with syntax of standard
2013     // MC expressions (due to the trailing '|').
2014 
2015     SMLoc EndLoc;
2016     const MCExpr *Expr;
2017     SMLoc StartLoc = getLoc();
2018 
2019     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
2020       return true;
2021     }
2022 
2023     if (!Expr->evaluateAsAbsolute(Val))
2024       return Error(StartLoc, "expected absolute expression");
2025 
2026     return false;
2027   }
2028 
2029   return getParser().parseAbsoluteExpression(Val);
2030 }
2031 
2032 OperandMatchResultTy
2033 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2034   // TODO: add syntactic sugar for 1/(2*PI)
2035 
2036   const auto& Tok = getToken();
2037   const auto& NextTok = peekToken();
2038   bool IsReal = Tok.is(AsmToken::Real);
2039   SMLoc S = Tok.getLoc();
2040   bool Negate = false;
2041 
2042   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2043     lex();
2044     IsReal = true;
2045     Negate = true;
2046   }
2047 
2048   if (IsReal) {
2049     // Floating-point expressions are not supported.
2050     // Can only allow floating-point literals with an
2051     // optional sign.
2052 
2053     StringRef Num = getTokenStr();
2054     lex();
2055 
2056     APFloat RealVal(APFloat::IEEEdouble());
2057     auto roundMode = APFloat::rmNearestTiesToEven;
2058     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2059       return MatchOperand_ParseFail;
2060     }
2061     if (Negate)
2062       RealVal.changeSign();
2063 
2064     Operands.push_back(
2065       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2066                                AMDGPUOperand::ImmTyNone, true));
2067 
2068     return MatchOperand_Success;
2069 
2070     // FIXME: Should enable arbitrary expressions here
2071   } else if (Tok.is(AsmToken::Integer) ||
2072              (Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Integer))){
2073 
2074     int64_t IntVal;
2075     if (parseAbsoluteExpr(IntVal, HasSP3AbsModifier))
2076       return MatchOperand_ParseFail;
2077 
2078     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2079     return MatchOperand_Success;
2080   }
2081 
2082   return MatchOperand_NoMatch;
2083 }
2084 
2085 OperandMatchResultTy
2086 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2087   if (!isRegister())
2088     return MatchOperand_NoMatch;
2089 
2090   if (auto R = parseRegister()) {
2091     assert(R->isReg());
2092     R->Reg.IsForcedVOP3 = isForcedVOP3();
2093     Operands.push_back(std::move(R));
2094     return MatchOperand_Success;
2095   }
2096   return MatchOperand_ParseFail;
2097 }
2098 
2099 OperandMatchResultTy
2100 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
2101   auto res = parseReg(Operands);
2102   return (res == MatchOperand_NoMatch)?
2103          parseImm(Operands, AbsMod) :
2104          res;
2105 }
2106 
2107 OperandMatchResultTy
2108 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2109                                               bool AllowImm) {
2110   bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
2111 
2112   if (getLexer().getKind()== AsmToken::Minus) {
2113     const AsmToken NextToken = getLexer().peekTok();
2114 
2115     // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2116     if (NextToken.is(AsmToken::Minus)) {
2117       Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
2118       return MatchOperand_ParseFail;
2119     }
2120 
2121     // '-' followed by an integer literal N should be interpreted as integer
2122     // negation rather than a floating-point NEG modifier applied to N.
2123     // Beside being contr-intuitive, such use of floating-point NEG modifier
2124     // results in different meaning of integer literals used with VOP1/2/C
2125     // and VOP3, for example:
2126     //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2127     //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2128     // Negative fp literals should be handled likewise for unifomtity
2129     if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
2130       Parser.Lex();
2131       Negate = true;
2132     }
2133   }
2134 
2135   if (getLexer().getKind() == AsmToken::Identifier &&
2136       Parser.getTok().getString() == "neg") {
2137     if (Negate) {
2138       Error(Parser.getTok().getLoc(), "expected register or immediate");
2139       return MatchOperand_ParseFail;
2140     }
2141     Parser.Lex();
2142     Negate2 = true;
2143     if (getLexer().isNot(AsmToken::LParen)) {
2144       Error(Parser.getTok().getLoc(), "expected left paren after neg");
2145       return MatchOperand_ParseFail;
2146     }
2147     Parser.Lex();
2148   }
2149 
2150   if (getLexer().getKind() == AsmToken::Identifier &&
2151       Parser.getTok().getString() == "abs") {
2152     Parser.Lex();
2153     Abs2 = true;
2154     if (getLexer().isNot(AsmToken::LParen)) {
2155       Error(Parser.getTok().getLoc(), "expected left paren after abs");
2156       return MatchOperand_ParseFail;
2157     }
2158     Parser.Lex();
2159   }
2160 
2161   if (getLexer().getKind() == AsmToken::Pipe) {
2162     if (Abs2) {
2163       Error(Parser.getTok().getLoc(), "expected register or immediate");
2164       return MatchOperand_ParseFail;
2165     }
2166     Parser.Lex();
2167     Abs = true;
2168   }
2169 
2170   OperandMatchResultTy Res;
2171   if (AllowImm) {
2172     Res = parseRegOrImm(Operands, Abs);
2173   } else {
2174     Res = parseReg(Operands);
2175   }
2176   if (Res != MatchOperand_Success) {
2177     return Res;
2178   }
2179 
2180   AMDGPUOperand::Modifiers Mods;
2181   if (Abs) {
2182     if (getLexer().getKind() != AsmToken::Pipe) {
2183       Error(Parser.getTok().getLoc(), "expected vertical bar");
2184       return MatchOperand_ParseFail;
2185     }
2186     Parser.Lex();
2187     Mods.Abs = true;
2188   }
2189   if (Abs2) {
2190     if (getLexer().isNot(AsmToken::RParen)) {
2191       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2192       return MatchOperand_ParseFail;
2193     }
2194     Parser.Lex();
2195     Mods.Abs = true;
2196   }
2197 
2198   if (Negate) {
2199     Mods.Neg = true;
2200   } else if (Negate2) {
2201     if (getLexer().isNot(AsmToken::RParen)) {
2202       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2203       return MatchOperand_ParseFail;
2204     }
2205     Parser.Lex();
2206     Mods.Neg = true;
2207   }
2208 
2209   if (Mods.hasFPModifiers()) {
2210     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2211     Op.setModifiers(Mods);
2212   }
2213   return MatchOperand_Success;
2214 }
2215 
2216 OperandMatchResultTy
2217 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2218                                                bool AllowImm) {
2219   bool Sext = false;
2220 
2221   if (getLexer().getKind() == AsmToken::Identifier &&
2222       Parser.getTok().getString() == "sext") {
2223     Parser.Lex();
2224     Sext = true;
2225     if (getLexer().isNot(AsmToken::LParen)) {
2226       Error(Parser.getTok().getLoc(), "expected left paren after sext");
2227       return MatchOperand_ParseFail;
2228     }
2229     Parser.Lex();
2230   }
2231 
2232   OperandMatchResultTy Res;
2233   if (AllowImm) {
2234     Res = parseRegOrImm(Operands);
2235   } else {
2236     Res = parseReg(Operands);
2237   }
2238   if (Res != MatchOperand_Success) {
2239     return Res;
2240   }
2241 
2242   AMDGPUOperand::Modifiers Mods;
2243   if (Sext) {
2244     if (getLexer().isNot(AsmToken::RParen)) {
2245       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2246       return MatchOperand_ParseFail;
2247     }
2248     Parser.Lex();
2249     Mods.Sext = true;
2250   }
2251 
2252   if (Mods.hasIntModifiers()) {
2253     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2254     Op.setModifiers(Mods);
2255   }
2256 
2257   return MatchOperand_Success;
2258 }
2259 
2260 OperandMatchResultTy
2261 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2262   return parseRegOrImmWithFPInputMods(Operands, false);
2263 }
2264 
2265 OperandMatchResultTy
2266 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2267   return parseRegOrImmWithIntInputMods(Operands, false);
2268 }
2269 
2270 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2271   auto Loc = getLoc();
2272   if (trySkipId("off")) {
2273     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2274                                                 AMDGPUOperand::ImmTyOff, false));
2275     return MatchOperand_Success;
2276   }
2277 
2278   if (!isRegister())
2279     return MatchOperand_NoMatch;
2280 
2281   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2282   if (Reg) {
2283     Operands.push_back(std::move(Reg));
2284     return MatchOperand_Success;
2285   }
2286 
2287   return MatchOperand_ParseFail;
2288 
2289 }
2290 
2291 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2292   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2293 
2294   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2295       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2296       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2297       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2298     return Match_InvalidOperand;
2299 
2300   if ((TSFlags & SIInstrFlags::VOP3) &&
2301       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2302       getForcedEncodingSize() != 64)
2303     return Match_PreferE32;
2304 
2305   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2306       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2307     // v_mac_f32/16 allow only dst_sel == DWORD;
2308     auto OpNum =
2309         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2310     const auto &Op = Inst.getOperand(OpNum);
2311     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2312       return Match_InvalidOperand;
2313     }
2314   }
2315 
2316   if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2317     // FIXME: Produces error without correct column reported.
2318     auto OpNum =
2319         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2320     const auto &Op = Inst.getOperand(OpNum);
2321     if (Op.getImm() != 0)
2322       return Match_InvalidOperand;
2323   }
2324 
2325   return Match_Success;
2326 }
2327 
2328 // What asm variants we should check
2329 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2330   if (getForcedEncodingSize() == 32) {
2331     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2332     return makeArrayRef(Variants);
2333   }
2334 
2335   if (isForcedVOP3()) {
2336     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2337     return makeArrayRef(Variants);
2338   }
2339 
2340   if (isForcedSDWA()) {
2341     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2342                                         AMDGPUAsmVariants::SDWA9};
2343     return makeArrayRef(Variants);
2344   }
2345 
2346   if (isForcedDPP()) {
2347     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2348     return makeArrayRef(Variants);
2349   }
2350 
2351   static const unsigned Variants[] = {
2352     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2353     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2354   };
2355 
2356   return makeArrayRef(Variants);
2357 }
2358 
2359 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2360   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2361   const unsigned Num = Desc.getNumImplicitUses();
2362   for (unsigned i = 0; i < Num; ++i) {
2363     unsigned Reg = Desc.ImplicitUses[i];
2364     switch (Reg) {
2365     case AMDGPU::FLAT_SCR:
2366     case AMDGPU::VCC:
2367     case AMDGPU::M0:
2368       return Reg;
2369     default:
2370       break;
2371     }
2372   }
2373   return AMDGPU::NoRegister;
2374 }
2375 
2376 // NB: This code is correct only when used to check constant
2377 // bus limitations because GFX7 support no f16 inline constants.
2378 // Note that there are no cases when a GFX7 opcode violates
2379 // constant bus limitations due to the use of an f16 constant.
2380 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2381                                        unsigned OpIdx) const {
2382   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2383 
2384   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2385     return false;
2386   }
2387 
2388   const MCOperand &MO = Inst.getOperand(OpIdx);
2389 
2390   int64_t Val = MO.getImm();
2391   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2392 
2393   switch (OpSize) { // expected operand size
2394   case 8:
2395     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2396   case 4:
2397     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2398   case 2: {
2399     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2400     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2401         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2402       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2403     } else {
2404       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2405     }
2406   }
2407   default:
2408     llvm_unreachable("invalid operand size");
2409   }
2410 }
2411 
2412 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2413   const MCOperand &MO = Inst.getOperand(OpIdx);
2414   if (MO.isImm()) {
2415     return !isInlineConstant(Inst, OpIdx);
2416   }
2417   return !MO.isReg() ||
2418          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2419 }
2420 
2421 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2422   const unsigned Opcode = Inst.getOpcode();
2423   const MCInstrDesc &Desc = MII.get(Opcode);
2424   unsigned ConstantBusUseCount = 0;
2425 
2426   if (Desc.TSFlags &
2427       (SIInstrFlags::VOPC |
2428        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2429        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2430        SIInstrFlags::SDWA)) {
2431     // Check special imm operands (used by madmk, etc)
2432     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2433       ++ConstantBusUseCount;
2434     }
2435 
2436     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2437     if (SGPRUsed != AMDGPU::NoRegister) {
2438       ++ConstantBusUseCount;
2439     }
2440 
2441     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2442     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2443     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2444 
2445     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2446 
2447     for (int OpIdx : OpIndices) {
2448       if (OpIdx == -1) break;
2449 
2450       const MCOperand &MO = Inst.getOperand(OpIdx);
2451       if (usesConstantBus(Inst, OpIdx)) {
2452         if (MO.isReg()) {
2453           const unsigned Reg = mc2PseudoReg(MO.getReg());
2454           // Pairs of registers with a partial intersections like these
2455           //   s0, s[0:1]
2456           //   flat_scratch_lo, flat_scratch
2457           //   flat_scratch_lo, flat_scratch_hi
2458           // are theoretically valid but they are disabled anyway.
2459           // Note that this code mimics SIInstrInfo::verifyInstruction
2460           if (Reg != SGPRUsed) {
2461             ++ConstantBusUseCount;
2462           }
2463           SGPRUsed = Reg;
2464         } else { // Expression or a literal
2465           ++ConstantBusUseCount;
2466         }
2467       }
2468     }
2469   }
2470 
2471   return ConstantBusUseCount <= 1;
2472 }
2473 
2474 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2475   const unsigned Opcode = Inst.getOpcode();
2476   const MCInstrDesc &Desc = MII.get(Opcode);
2477 
2478   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2479   if (DstIdx == -1 ||
2480       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2481     return true;
2482   }
2483 
2484   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2485 
2486   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2487   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2488   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2489 
2490   assert(DstIdx != -1);
2491   const MCOperand &Dst = Inst.getOperand(DstIdx);
2492   assert(Dst.isReg());
2493   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2494 
2495   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2496 
2497   for (int SrcIdx : SrcIndices) {
2498     if (SrcIdx == -1) break;
2499     const MCOperand &Src = Inst.getOperand(SrcIdx);
2500     if (Src.isReg()) {
2501       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2502       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2503         return false;
2504       }
2505     }
2506   }
2507 
2508   return true;
2509 }
2510 
2511 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2512 
2513   const unsigned Opc = Inst.getOpcode();
2514   const MCInstrDesc &Desc = MII.get(Opc);
2515 
2516   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2517     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2518     assert(ClampIdx != -1);
2519     return Inst.getOperand(ClampIdx).getImm() == 0;
2520   }
2521 
2522   return true;
2523 }
2524 
2525 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2526 
2527   const unsigned Opc = Inst.getOpcode();
2528   const MCInstrDesc &Desc = MII.get(Opc);
2529 
2530   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2531     return true;
2532 
2533   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2534   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2535   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2536 
2537   assert(VDataIdx != -1);
2538   assert(DMaskIdx != -1);
2539   assert(TFEIdx != -1);
2540 
2541   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2542   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2543   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2544   if (DMask == 0)
2545     DMask = 1;
2546 
2547   unsigned DataSize =
2548     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2549   if (hasPackedD16()) {
2550     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2551     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2552       DataSize = (DataSize + 1) / 2;
2553   }
2554 
2555   return (VDataSize / 4) == DataSize + TFESize;
2556 }
2557 
2558 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2559 
2560   const unsigned Opc = Inst.getOpcode();
2561   const MCInstrDesc &Desc = MII.get(Opc);
2562 
2563   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2564     return true;
2565   if (!Desc.mayLoad() || !Desc.mayStore())
2566     return true; // Not atomic
2567 
2568   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2569   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2570 
2571   // This is an incomplete check because image_atomic_cmpswap
2572   // may only use 0x3 and 0xf while other atomic operations
2573   // may use 0x1 and 0x3. However these limitations are
2574   // verified when we check that dmask matches dst size.
2575   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2576 }
2577 
2578 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2579 
2580   const unsigned Opc = Inst.getOpcode();
2581   const MCInstrDesc &Desc = MII.get(Opc);
2582 
2583   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2584     return true;
2585 
2586   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2587   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2588 
2589   // GATHER4 instructions use dmask in a different fashion compared to
2590   // other MIMG instructions. The only useful DMASK values are
2591   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2592   // (red,red,red,red) etc.) The ISA document doesn't mention
2593   // this.
2594   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2595 }
2596 
2597 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2598 
2599   const unsigned Opc = Inst.getOpcode();
2600   const MCInstrDesc &Desc = MII.get(Opc);
2601 
2602   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2603     return true;
2604 
2605   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2606   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2607     if (isCI() || isSI())
2608       return false;
2609   }
2610 
2611   return true;
2612 }
2613 
2614 static bool IsRevOpcode(const unsigned Opcode)
2615 {
2616   switch (Opcode) {
2617   case AMDGPU::V_SUBREV_F32_e32:
2618   case AMDGPU::V_SUBREV_F32_e64:
2619   case AMDGPU::V_SUBREV_F32_e32_si:
2620   case AMDGPU::V_SUBREV_F32_e32_vi:
2621   case AMDGPU::V_SUBREV_F32_e64_si:
2622   case AMDGPU::V_SUBREV_F32_e64_vi:
2623   case AMDGPU::V_SUBREV_I32_e32:
2624   case AMDGPU::V_SUBREV_I32_e64:
2625   case AMDGPU::V_SUBREV_I32_e32_si:
2626   case AMDGPU::V_SUBREV_I32_e64_si:
2627   case AMDGPU::V_SUBBREV_U32_e32:
2628   case AMDGPU::V_SUBBREV_U32_e64:
2629   case AMDGPU::V_SUBBREV_U32_e32_si:
2630   case AMDGPU::V_SUBBREV_U32_e32_vi:
2631   case AMDGPU::V_SUBBREV_U32_e64_si:
2632   case AMDGPU::V_SUBBREV_U32_e64_vi:
2633   case AMDGPU::V_SUBREV_U32_e32:
2634   case AMDGPU::V_SUBREV_U32_e64:
2635   case AMDGPU::V_SUBREV_U32_e32_gfx9:
2636   case AMDGPU::V_SUBREV_U32_e32_vi:
2637   case AMDGPU::V_SUBREV_U32_e64_gfx9:
2638   case AMDGPU::V_SUBREV_U32_e64_vi:
2639   case AMDGPU::V_SUBREV_F16_e32:
2640   case AMDGPU::V_SUBREV_F16_e64:
2641   case AMDGPU::V_SUBREV_F16_e32_vi:
2642   case AMDGPU::V_SUBREV_F16_e64_vi:
2643   case AMDGPU::V_SUBREV_U16_e32:
2644   case AMDGPU::V_SUBREV_U16_e64:
2645   case AMDGPU::V_SUBREV_U16_e32_vi:
2646   case AMDGPU::V_SUBREV_U16_e64_vi:
2647   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
2648   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
2649   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
2650   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
2651   case AMDGPU::V_LSHLREV_B32_e32_si:
2652   case AMDGPU::V_LSHLREV_B32_e64_si:
2653   case AMDGPU::V_LSHLREV_B16_e32_vi:
2654   case AMDGPU::V_LSHLREV_B16_e64_vi:
2655   case AMDGPU::V_LSHLREV_B32_e32_vi:
2656   case AMDGPU::V_LSHLREV_B32_e64_vi:
2657   case AMDGPU::V_LSHLREV_B64_vi:
2658   case AMDGPU::V_LSHRREV_B32_e32_si:
2659   case AMDGPU::V_LSHRREV_B32_e64_si:
2660   case AMDGPU::V_LSHRREV_B16_e32_vi:
2661   case AMDGPU::V_LSHRREV_B16_e64_vi:
2662   case AMDGPU::V_LSHRREV_B32_e32_vi:
2663   case AMDGPU::V_LSHRREV_B32_e64_vi:
2664   case AMDGPU::V_LSHRREV_B64_vi:
2665   case AMDGPU::V_ASHRREV_I32_e64_si:
2666   case AMDGPU::V_ASHRREV_I32_e32_si:
2667   case AMDGPU::V_ASHRREV_I16_e32_vi:
2668   case AMDGPU::V_ASHRREV_I16_e64_vi:
2669   case AMDGPU::V_ASHRREV_I32_e32_vi:
2670   case AMDGPU::V_ASHRREV_I32_e64_vi:
2671   case AMDGPU::V_ASHRREV_I64_vi:
2672   case AMDGPU::V_PK_LSHLREV_B16_vi:
2673   case AMDGPU::V_PK_LSHRREV_B16_vi:
2674   case AMDGPU::V_PK_ASHRREV_I16_vi:
2675     return true;
2676   default:
2677     return false;
2678   }
2679 }
2680 
2681 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
2682 
2683   using namespace SIInstrFlags;
2684   const unsigned Opcode = Inst.getOpcode();
2685   const MCInstrDesc &Desc = MII.get(Opcode);
2686 
2687   // lds_direct register is defined so that it can be used
2688   // with 9-bit operands only. Ignore encodings which do not accept these.
2689   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
2690     return true;
2691 
2692   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2693   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2694   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2695 
2696   const int SrcIndices[] = { Src1Idx, Src2Idx };
2697 
2698   // lds_direct cannot be specified as either src1 or src2.
2699   for (int SrcIdx : SrcIndices) {
2700     if (SrcIdx == -1) break;
2701     const MCOperand &Src = Inst.getOperand(SrcIdx);
2702     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
2703       return false;
2704     }
2705   }
2706 
2707   if (Src0Idx == -1)
2708     return true;
2709 
2710   const MCOperand &Src = Inst.getOperand(Src0Idx);
2711   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
2712     return true;
2713 
2714   // lds_direct is specified as src0. Check additional limitations.
2715   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
2716 }
2717 
2718 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
2719   unsigned Opcode = Inst.getOpcode();
2720   const MCInstrDesc &Desc = MII.get(Opcode);
2721   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
2722     return true;
2723 
2724   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2725   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2726 
2727   const int OpIndices[] = { Src0Idx, Src1Idx };
2728 
2729   unsigned NumLiterals = 0;
2730   uint32_t LiteralValue;
2731 
2732   for (int OpIdx : OpIndices) {
2733     if (OpIdx == -1) break;
2734 
2735     const MCOperand &MO = Inst.getOperand(OpIdx);
2736     if (MO.isImm() &&
2737         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
2738         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
2739         !isInlineConstant(Inst, OpIdx)) {
2740       uint32_t Value = static_cast<uint32_t>(MO.getImm());
2741       if (NumLiterals == 0 || LiteralValue != Value) {
2742         LiteralValue = Value;
2743         ++NumLiterals;
2744       }
2745     }
2746   }
2747 
2748   return NumLiterals <= 1;
2749 }
2750 
2751 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2752                                           const SMLoc &IDLoc) {
2753   if (!validateLdsDirect(Inst)) {
2754     Error(IDLoc,
2755       "invalid use of lds_direct");
2756     return false;
2757   }
2758   if (!validateSOPLiteral(Inst)) {
2759     Error(IDLoc,
2760       "only one literal operand is allowed");
2761     return false;
2762   }
2763   if (!validateConstantBusLimitations(Inst)) {
2764     Error(IDLoc,
2765       "invalid operand (violates constant bus restrictions)");
2766     return false;
2767   }
2768   if (!validateEarlyClobberLimitations(Inst)) {
2769     Error(IDLoc,
2770       "destination must be different than all sources");
2771     return false;
2772   }
2773   if (!validateIntClampSupported(Inst)) {
2774     Error(IDLoc,
2775       "integer clamping is not supported on this GPU");
2776     return false;
2777   }
2778   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
2779   if (!validateMIMGD16(Inst)) {
2780     Error(IDLoc,
2781       "d16 modifier is not supported on this GPU");
2782     return false;
2783   }
2784   if (!validateMIMGDataSize(Inst)) {
2785     Error(IDLoc,
2786       "image data size does not match dmask and tfe");
2787     return false;
2788   }
2789   if (!validateMIMGAtomicDMask(Inst)) {
2790     Error(IDLoc,
2791       "invalid atomic image dmask");
2792     return false;
2793   }
2794   if (!validateMIMGGatherDMask(Inst)) {
2795     Error(IDLoc,
2796       "invalid image_gather dmask: only one bit must be set");
2797     return false;
2798   }
2799 
2800   return true;
2801 }
2802 
2803 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
2804                                             const FeatureBitset &FBS,
2805                                             unsigned VariantID = 0);
2806 
2807 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2808                                               OperandVector &Operands,
2809                                               MCStreamer &Out,
2810                                               uint64_t &ErrorInfo,
2811                                               bool MatchingInlineAsm) {
2812   MCInst Inst;
2813   unsigned Result = Match_Success;
2814   for (auto Variant : getMatchedVariants()) {
2815     uint64_t EI;
2816     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2817                                   Variant);
2818     // We order match statuses from least to most specific. We use most specific
2819     // status as resulting
2820     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2821     if ((R == Match_Success) ||
2822         (R == Match_PreferE32) ||
2823         (R == Match_MissingFeature && Result != Match_PreferE32) ||
2824         (R == Match_InvalidOperand && Result != Match_MissingFeature
2825                                    && Result != Match_PreferE32) ||
2826         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
2827                                    && Result != Match_MissingFeature
2828                                    && Result != Match_PreferE32)) {
2829       Result = R;
2830       ErrorInfo = EI;
2831     }
2832     if (R == Match_Success)
2833       break;
2834   }
2835 
2836   switch (Result) {
2837   default: break;
2838   case Match_Success:
2839     if (!validateInstruction(Inst, IDLoc)) {
2840       return true;
2841     }
2842     Inst.setLoc(IDLoc);
2843     Out.EmitInstruction(Inst, getSTI());
2844     return false;
2845 
2846   case Match_MissingFeature:
2847     return Error(IDLoc, "instruction not supported on this GPU");
2848 
2849   case Match_MnemonicFail: {
2850     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
2851     std::string Suggestion = AMDGPUMnemonicSpellCheck(
2852         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
2853     return Error(IDLoc, "invalid instruction" + Suggestion,
2854                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
2855   }
2856 
2857   case Match_InvalidOperand: {
2858     SMLoc ErrorLoc = IDLoc;
2859     if (ErrorInfo != ~0ULL) {
2860       if (ErrorInfo >= Operands.size()) {
2861         return Error(IDLoc, "too few operands for instruction");
2862       }
2863       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2864       if (ErrorLoc == SMLoc())
2865         ErrorLoc = IDLoc;
2866     }
2867     return Error(ErrorLoc, "invalid operand for instruction");
2868   }
2869 
2870   case Match_PreferE32:
2871     return Error(IDLoc, "internal error: instruction without _e64 suffix "
2872                         "should be encoded as e32");
2873   }
2874   llvm_unreachable("Implement any new match types added!");
2875 }
2876 
2877 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2878   int64_t Tmp = -1;
2879   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2880     return true;
2881   }
2882   if (getParser().parseAbsoluteExpression(Tmp)) {
2883     return true;
2884   }
2885   Ret = static_cast<uint32_t>(Tmp);
2886   return false;
2887 }
2888 
2889 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2890                                                uint32_t &Minor) {
2891   if (ParseAsAbsoluteExpression(Major))
2892     return TokError("invalid major version");
2893 
2894   if (getLexer().isNot(AsmToken::Comma))
2895     return TokError("minor version number required, comma expected");
2896   Lex();
2897 
2898   if (ParseAsAbsoluteExpression(Minor))
2899     return TokError("invalid minor version");
2900 
2901   return false;
2902 }
2903 
2904 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
2905   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2906     return TokError("directive only supported for amdgcn architecture");
2907 
2908   std::string Target;
2909 
2910   SMLoc TargetStart = getTok().getLoc();
2911   if (getParser().parseEscapedString(Target))
2912     return true;
2913   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
2914 
2915   std::string ExpectedTarget;
2916   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
2917   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
2918 
2919   if (Target != ExpectedTargetOS.str())
2920     return getParser().Error(TargetRange.Start, "target must match options",
2921                              TargetRange);
2922 
2923   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
2924   return false;
2925 }
2926 
2927 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
2928   return getParser().Error(Range.Start, "value out of range", Range);
2929 }
2930 
2931 bool AMDGPUAsmParser::calculateGPRBlocks(
2932     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
2933     bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
2934     unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
2935     unsigned &SGPRBlocks) {
2936   // TODO(scott.linder): These calculations are duplicated from
2937   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
2938   IsaVersion Version = getIsaVersion(getSTI().getCPU());
2939 
2940   unsigned NumVGPRs = NextFreeVGPR;
2941   unsigned NumSGPRs = NextFreeSGPR;
2942   unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
2943 
2944   if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
2945       NumSGPRs > MaxAddressableNumSGPRs)
2946     return OutOfRangeError(SGPRRange);
2947 
2948   NumSGPRs +=
2949       IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
2950 
2951   if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
2952       NumSGPRs > MaxAddressableNumSGPRs)
2953     return OutOfRangeError(SGPRRange);
2954 
2955   if (Features.test(FeatureSGPRInitBug))
2956     NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
2957 
2958   VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
2959   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
2960 
2961   return false;
2962 }
2963 
2964 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
2965   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2966     return TokError("directive only supported for amdgcn architecture");
2967 
2968   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
2969     return TokError("directive only supported for amdhsa OS");
2970 
2971   StringRef KernelName;
2972   if (getParser().parseIdentifier(KernelName))
2973     return true;
2974 
2975   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
2976 
2977   StringSet<> Seen;
2978 
2979   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
2980 
2981   SMRange VGPRRange;
2982   uint64_t NextFreeVGPR = 0;
2983   SMRange SGPRRange;
2984   uint64_t NextFreeSGPR = 0;
2985   unsigned UserSGPRCount = 0;
2986   bool ReserveVCC = true;
2987   bool ReserveFlatScr = true;
2988   bool ReserveXNACK = hasXNACK();
2989 
2990   while (true) {
2991     while (getLexer().is(AsmToken::EndOfStatement))
2992       Lex();
2993 
2994     if (getLexer().isNot(AsmToken::Identifier))
2995       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
2996 
2997     StringRef ID = getTok().getIdentifier();
2998     SMRange IDRange = getTok().getLocRange();
2999     Lex();
3000 
3001     if (ID == ".end_amdhsa_kernel")
3002       break;
3003 
3004     if (Seen.find(ID) != Seen.end())
3005       return TokError(".amdhsa_ directives cannot be repeated");
3006     Seen.insert(ID);
3007 
3008     SMLoc ValStart = getTok().getLoc();
3009     int64_t IVal;
3010     if (getParser().parseAbsoluteExpression(IVal))
3011       return true;
3012     SMLoc ValEnd = getTok().getLoc();
3013     SMRange ValRange = SMRange(ValStart, ValEnd);
3014 
3015     if (IVal < 0)
3016       return OutOfRangeError(ValRange);
3017 
3018     uint64_t Val = IVal;
3019 
3020 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3021   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3022     return OutOfRangeError(RANGE);                                             \
3023   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3024 
3025     if (ID == ".amdhsa_group_segment_fixed_size") {
3026       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3027         return OutOfRangeError(ValRange);
3028       KD.group_segment_fixed_size = Val;
3029     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3030       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3031         return OutOfRangeError(ValRange);
3032       KD.private_segment_fixed_size = Val;
3033     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3034       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3035                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3036                        Val, ValRange);
3037       UserSGPRCount += 4;
3038     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3039       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3040                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3041                        ValRange);
3042       UserSGPRCount += 2;
3043     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3044       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3045                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3046                        ValRange);
3047       UserSGPRCount += 2;
3048     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3049       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3050                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3051                        Val, ValRange);
3052       UserSGPRCount += 2;
3053     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3054       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3055                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3056                        ValRange);
3057       UserSGPRCount += 2;
3058     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3059       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3060                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3061                        ValRange);
3062       UserSGPRCount += 2;
3063     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3064       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3065                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3066                        Val, ValRange);
3067       UserSGPRCount += 1;
3068     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3069       PARSE_BITS_ENTRY(
3070           KD.compute_pgm_rsrc2,
3071           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3072           ValRange);
3073     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3074       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3075                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3076                        ValRange);
3077     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3078       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3079                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3080                        ValRange);
3081     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3082       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3083                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3084                        ValRange);
3085     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3086       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3087                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3088                        ValRange);
3089     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3090       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3091                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3092                        ValRange);
3093     } else if (ID == ".amdhsa_next_free_vgpr") {
3094       VGPRRange = ValRange;
3095       NextFreeVGPR = Val;
3096     } else if (ID == ".amdhsa_next_free_sgpr") {
3097       SGPRRange = ValRange;
3098       NextFreeSGPR = Val;
3099     } else if (ID == ".amdhsa_reserve_vcc") {
3100       if (!isUInt<1>(Val))
3101         return OutOfRangeError(ValRange);
3102       ReserveVCC = Val;
3103     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3104       if (IVersion.Major < 7)
3105         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3106                                  IDRange);
3107       if (!isUInt<1>(Val))
3108         return OutOfRangeError(ValRange);
3109       ReserveFlatScr = Val;
3110     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3111       if (IVersion.Major < 8)
3112         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3113                                  IDRange);
3114       if (!isUInt<1>(Val))
3115         return OutOfRangeError(ValRange);
3116       ReserveXNACK = Val;
3117     } else if (ID == ".amdhsa_float_round_mode_32") {
3118       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3119                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3120     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3121       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3122                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3123     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3124       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3125                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3126     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3127       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3128                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3129                        ValRange);
3130     } else if (ID == ".amdhsa_dx10_clamp") {
3131       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3132                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3133     } else if (ID == ".amdhsa_ieee_mode") {
3134       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3135                        Val, ValRange);
3136     } else if (ID == ".amdhsa_fp16_overflow") {
3137       if (IVersion.Major < 9)
3138         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3139                                  IDRange);
3140       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3141                        ValRange);
3142     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3143       PARSE_BITS_ENTRY(
3144           KD.compute_pgm_rsrc2,
3145           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3146           ValRange);
3147     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3148       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3149                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3150                        Val, ValRange);
3151     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3152       PARSE_BITS_ENTRY(
3153           KD.compute_pgm_rsrc2,
3154           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3155           ValRange);
3156     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3157       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3158                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3159                        Val, ValRange);
3160     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3161       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3162                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3163                        Val, ValRange);
3164     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3165       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3166                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3167                        Val, ValRange);
3168     } else if (ID == ".amdhsa_exception_int_div_zero") {
3169       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3170                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3171                        Val, ValRange);
3172     } else {
3173       return getParser().Error(IDRange.Start,
3174                                "unknown .amdhsa_kernel directive", IDRange);
3175     }
3176 
3177 #undef PARSE_BITS_ENTRY
3178   }
3179 
3180   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3181     return TokError(".amdhsa_next_free_vgpr directive is required");
3182 
3183   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3184     return TokError(".amdhsa_next_free_sgpr directive is required");
3185 
3186   unsigned VGPRBlocks;
3187   unsigned SGPRBlocks;
3188   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3189                          ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
3190                          SGPRRange, VGPRBlocks, SGPRBlocks))
3191     return true;
3192 
3193   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3194           VGPRBlocks))
3195     return OutOfRangeError(VGPRRange);
3196   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3197                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3198 
3199   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3200           SGPRBlocks))
3201     return OutOfRangeError(SGPRRange);
3202   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3203                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3204                   SGPRBlocks);
3205 
3206   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3207     return TokError("too many user SGPRs enabled");
3208   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3209                   UserSGPRCount);
3210 
3211   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3212       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3213       ReserveFlatScr, ReserveXNACK);
3214   return false;
3215 }
3216 
3217 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3218   uint32_t Major;
3219   uint32_t Minor;
3220 
3221   if (ParseDirectiveMajorMinor(Major, Minor))
3222     return true;
3223 
3224   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3225   return false;
3226 }
3227 
3228 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3229   uint32_t Major;
3230   uint32_t Minor;
3231   uint32_t Stepping;
3232   StringRef VendorName;
3233   StringRef ArchName;
3234 
3235   // If this directive has no arguments, then use the ISA version for the
3236   // targeted GPU.
3237   if (getLexer().is(AsmToken::EndOfStatement)) {
3238     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3239     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3240                                                       ISA.Stepping,
3241                                                       "AMD", "AMDGPU");
3242     return false;
3243   }
3244 
3245   if (ParseDirectiveMajorMinor(Major, Minor))
3246     return true;
3247 
3248   if (getLexer().isNot(AsmToken::Comma))
3249     return TokError("stepping version number required, comma expected");
3250   Lex();
3251 
3252   if (ParseAsAbsoluteExpression(Stepping))
3253     return TokError("invalid stepping version");
3254 
3255   if (getLexer().isNot(AsmToken::Comma))
3256     return TokError("vendor name required, comma expected");
3257   Lex();
3258 
3259   if (getLexer().isNot(AsmToken::String))
3260     return TokError("invalid vendor name");
3261 
3262   VendorName = getLexer().getTok().getStringContents();
3263   Lex();
3264 
3265   if (getLexer().isNot(AsmToken::Comma))
3266     return TokError("arch name required, comma expected");
3267   Lex();
3268 
3269   if (getLexer().isNot(AsmToken::String))
3270     return TokError("invalid arch name");
3271 
3272   ArchName = getLexer().getTok().getStringContents();
3273   Lex();
3274 
3275   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3276                                                     VendorName, ArchName);
3277   return false;
3278 }
3279 
3280 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3281                                                amd_kernel_code_t &Header) {
3282   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3283   // assembly for backwards compatibility.
3284   if (ID == "max_scratch_backing_memory_byte_size") {
3285     Parser.eatToEndOfStatement();
3286     return false;
3287   }
3288 
3289   SmallString<40> ErrStr;
3290   raw_svector_ostream Err(ErrStr);
3291   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3292     return TokError(Err.str());
3293   }
3294   Lex();
3295   return false;
3296 }
3297 
3298 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3299   amd_kernel_code_t Header;
3300   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3301 
3302   while (true) {
3303     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3304     // will set the current token to EndOfStatement.
3305     while(getLexer().is(AsmToken::EndOfStatement))
3306       Lex();
3307 
3308     if (getLexer().isNot(AsmToken::Identifier))
3309       return TokError("expected value identifier or .end_amd_kernel_code_t");
3310 
3311     StringRef ID = getLexer().getTok().getIdentifier();
3312     Lex();
3313 
3314     if (ID == ".end_amd_kernel_code_t")
3315       break;
3316 
3317     if (ParseAMDKernelCodeTValue(ID, Header))
3318       return true;
3319   }
3320 
3321   getTargetStreamer().EmitAMDKernelCodeT(Header);
3322 
3323   return false;
3324 }
3325 
3326 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3327   if (getLexer().isNot(AsmToken::Identifier))
3328     return TokError("expected symbol name");
3329 
3330   StringRef KernelName = Parser.getTok().getString();
3331 
3332   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3333                                            ELF::STT_AMDGPU_HSA_KERNEL);
3334   Lex();
3335   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3336     KernelScope.initialize(getContext());
3337   return false;
3338 }
3339 
3340 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3341   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3342     return Error(getParser().getTok().getLoc(),
3343                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3344                  "architectures");
3345   }
3346 
3347   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3348 
3349   std::string ISAVersionStringFromSTI;
3350   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3351   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3352 
3353   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3354     return Error(getParser().getTok().getLoc(),
3355                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3356                  "arguments specified through the command line");
3357   }
3358 
3359   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3360   Lex();
3361 
3362   return false;
3363 }
3364 
3365 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3366   const char *AssemblerDirectiveBegin;
3367   const char *AssemblerDirectiveEnd;
3368   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3369       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3370           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3371                             HSAMD::V3::AssemblerDirectiveEnd)
3372           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3373                             HSAMD::AssemblerDirectiveEnd);
3374 
3375   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3376     return Error(getParser().getTok().getLoc(),
3377                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
3378                  "not available on non-amdhsa OSes")).str());
3379   }
3380 
3381   std::string HSAMetadataString;
3382   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
3383                           HSAMetadataString))
3384     return true;
3385 
3386   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3387     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3388       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3389   } else {
3390     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3391       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3392   }
3393 
3394   return false;
3395 }
3396 
3397 /// Common code to parse out a block of text (typically YAML) between start and
3398 /// end directives.
3399 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
3400                                           const char *AssemblerDirectiveEnd,
3401                                           std::string &CollectString) {
3402 
3403   raw_string_ostream CollectStream(CollectString);
3404 
3405   getLexer().setSkipSpace(false);
3406 
3407   bool FoundEnd = false;
3408   while (!getLexer().is(AsmToken::Eof)) {
3409     while (getLexer().is(AsmToken::Space)) {
3410       CollectStream << getLexer().getTok().getString();
3411       Lex();
3412     }
3413 
3414     if (getLexer().is(AsmToken::Identifier)) {
3415       StringRef ID = getLexer().getTok().getIdentifier();
3416       if (ID == AssemblerDirectiveEnd) {
3417         Lex();
3418         FoundEnd = true;
3419         break;
3420       }
3421     }
3422 
3423     CollectStream << Parser.parseStringToEndOfStatement()
3424                   << getContext().getAsmInfo()->getSeparatorString();
3425 
3426     Parser.eatToEndOfStatement();
3427   }
3428 
3429   getLexer().setSkipSpace(true);
3430 
3431   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3432     return TokError(Twine("expected directive ") +
3433                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
3434   }
3435 
3436   CollectStream.flush();
3437   return false;
3438 }
3439 
3440 /// Parse the assembler directive for new MsgPack-format PAL metadata.
3441 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
3442   std::string String;
3443   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
3444                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
3445     return true;
3446 
3447   auto PALMetadata = getTargetStreamer().getPALMetadata();
3448   if (!PALMetadata->setFromString(String))
3449     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
3450   return false;
3451 }
3452 
3453 /// Parse the assembler directive for old linear-format PAL metadata.
3454 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3455   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3456     return Error(getParser().getTok().getLoc(),
3457                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3458                  "not available on non-amdpal OSes")).str());
3459   }
3460 
3461   auto PALMetadata = getTargetStreamer().getPALMetadata();
3462   PALMetadata->setLegacy();
3463   for (;;) {
3464     uint32_t Key, Value;
3465     if (ParseAsAbsoluteExpression(Key)) {
3466       return TokError(Twine("invalid value in ") +
3467                       Twine(PALMD::AssemblerDirective));
3468     }
3469     if (getLexer().isNot(AsmToken::Comma)) {
3470       return TokError(Twine("expected an even number of values in ") +
3471                       Twine(PALMD::AssemblerDirective));
3472     }
3473     Lex();
3474     if (ParseAsAbsoluteExpression(Value)) {
3475       return TokError(Twine("invalid value in ") +
3476                       Twine(PALMD::AssemblerDirective));
3477     }
3478     PALMetadata->setRegister(Key, Value);
3479     if (getLexer().isNot(AsmToken::Comma))
3480       break;
3481     Lex();
3482   }
3483   return false;
3484 }
3485 
3486 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3487   StringRef IDVal = DirectiveID.getString();
3488 
3489   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3490     if (IDVal == ".amdgcn_target")
3491       return ParseDirectiveAMDGCNTarget();
3492 
3493     if (IDVal == ".amdhsa_kernel")
3494       return ParseDirectiveAMDHSAKernel();
3495 
3496     // TODO: Restructure/combine with PAL metadata directive.
3497     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3498       return ParseDirectiveHSAMetadata();
3499   } else {
3500     if (IDVal == ".hsa_code_object_version")
3501       return ParseDirectiveHSACodeObjectVersion();
3502 
3503     if (IDVal == ".hsa_code_object_isa")
3504       return ParseDirectiveHSACodeObjectISA();
3505 
3506     if (IDVal == ".amd_kernel_code_t")
3507       return ParseDirectiveAMDKernelCodeT();
3508 
3509     if (IDVal == ".amdgpu_hsa_kernel")
3510       return ParseDirectiveAMDGPUHsaKernel();
3511 
3512     if (IDVal == ".amd_amdgpu_isa")
3513       return ParseDirectiveISAVersion();
3514 
3515     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3516       return ParseDirectiveHSAMetadata();
3517   }
3518 
3519   if (IDVal == PALMD::AssemblerDirectiveBegin)
3520     return ParseDirectivePALMetadataBegin();
3521 
3522   if (IDVal == PALMD::AssemblerDirective)
3523     return ParseDirectivePALMetadata();
3524 
3525   return true;
3526 }
3527 
3528 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
3529                                            unsigned RegNo) const {
3530 
3531   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
3532        R.isValid(); ++R) {
3533     if (*R == RegNo)
3534       return isGFX9();
3535   }
3536 
3537   switch (RegNo) {
3538   case AMDGPU::TBA:
3539   case AMDGPU::TBA_LO:
3540   case AMDGPU::TBA_HI:
3541   case AMDGPU::TMA:
3542   case AMDGPU::TMA_LO:
3543   case AMDGPU::TMA_HI:
3544     return !isGFX9();
3545   case AMDGPU::XNACK_MASK:
3546   case AMDGPU::XNACK_MASK_LO:
3547   case AMDGPU::XNACK_MASK_HI:
3548     return !isCI() && !isSI() && hasXNACK();
3549   default:
3550     break;
3551   }
3552 
3553   if (isInlineValue(RegNo))
3554     return !isCI() && !isSI() && !isVI();
3555 
3556   if (isCI())
3557     return true;
3558 
3559   if (isSI()) {
3560     // No flat_scr
3561     switch (RegNo) {
3562     case AMDGPU::FLAT_SCR:
3563     case AMDGPU::FLAT_SCR_LO:
3564     case AMDGPU::FLAT_SCR_HI:
3565       return false;
3566     default:
3567       return true;
3568     }
3569   }
3570 
3571   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
3572   // SI/CI have.
3573   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
3574        R.isValid(); ++R) {
3575     if (*R == RegNo)
3576       return false;
3577   }
3578 
3579   return true;
3580 }
3581 
3582 OperandMatchResultTy
3583 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
3584   // Try to parse with a custom parser
3585   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
3586 
3587   // If we successfully parsed the operand or if there as an error parsing,
3588   // we are done.
3589   //
3590   // If we are parsing after we reach EndOfStatement then this means we
3591   // are appending default values to the Operands list.  This is only done
3592   // by custom parser, so we shouldn't continue on to the generic parsing.
3593   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
3594       getLexer().is(AsmToken::EndOfStatement))
3595     return ResTy;
3596 
3597   ResTy = parseRegOrImm(Operands);
3598 
3599   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
3600     return ResTy;
3601 
3602   const auto &Tok = Parser.getTok();
3603   SMLoc S = Tok.getLoc();
3604 
3605   const MCExpr *Expr = nullptr;
3606   if (!Parser.parseExpression(Expr)) {
3607     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3608     return MatchOperand_Success;
3609   }
3610 
3611   // Possibly this is an instruction flag like 'gds'.
3612   if (Tok.getKind() == AsmToken::Identifier) {
3613     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
3614     Parser.Lex();
3615     return MatchOperand_Success;
3616   }
3617 
3618   return MatchOperand_NoMatch;
3619 }
3620 
3621 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
3622   // Clear any forced encodings from the previous instruction.
3623   setForcedEncodingSize(0);
3624   setForcedDPP(false);
3625   setForcedSDWA(false);
3626 
3627   if (Name.endswith("_e64")) {
3628     setForcedEncodingSize(64);
3629     return Name.substr(0, Name.size() - 4);
3630   } else if (Name.endswith("_e32")) {
3631     setForcedEncodingSize(32);
3632     return Name.substr(0, Name.size() - 4);
3633   } else if (Name.endswith("_dpp")) {
3634     setForcedDPP(true);
3635     return Name.substr(0, Name.size() - 4);
3636   } else if (Name.endswith("_sdwa")) {
3637     setForcedSDWA(true);
3638     return Name.substr(0, Name.size() - 5);
3639   }
3640   return Name;
3641 }
3642 
3643 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
3644                                        StringRef Name,
3645                                        SMLoc NameLoc, OperandVector &Operands) {
3646   // Add the instruction mnemonic
3647   Name = parseMnemonicSuffix(Name);
3648   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
3649 
3650   while (!getLexer().is(AsmToken::EndOfStatement)) {
3651     OperandMatchResultTy Res = parseOperand(Operands, Name);
3652 
3653     // Eat the comma or space if there is one.
3654     if (getLexer().is(AsmToken::Comma))
3655       Parser.Lex();
3656 
3657     switch (Res) {
3658       case MatchOperand_Success: break;
3659       case MatchOperand_ParseFail:
3660         Error(getLexer().getLoc(), "failed parsing operand.");
3661         while (!getLexer().is(AsmToken::EndOfStatement)) {
3662           Parser.Lex();
3663         }
3664         return true;
3665       case MatchOperand_NoMatch:
3666         Error(getLexer().getLoc(), "not a valid operand.");
3667         while (!getLexer().is(AsmToken::EndOfStatement)) {
3668           Parser.Lex();
3669         }
3670         return true;
3671     }
3672   }
3673 
3674   return false;
3675 }
3676 
3677 //===----------------------------------------------------------------------===//
3678 // Utility functions
3679 //===----------------------------------------------------------------------===//
3680 
3681 OperandMatchResultTy
3682 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
3683   switch(getLexer().getKind()) {
3684     default: return MatchOperand_NoMatch;
3685     case AsmToken::Identifier: {
3686       StringRef Name = Parser.getTok().getString();
3687       if (!Name.equals(Prefix)) {
3688         return MatchOperand_NoMatch;
3689       }
3690 
3691       Parser.Lex();
3692       if (getLexer().isNot(AsmToken::Colon))
3693         return MatchOperand_ParseFail;
3694 
3695       Parser.Lex();
3696 
3697       bool IsMinus = false;
3698       if (getLexer().getKind() == AsmToken::Minus) {
3699         Parser.Lex();
3700         IsMinus = true;
3701       }
3702 
3703       if (getLexer().isNot(AsmToken::Integer))
3704         return MatchOperand_ParseFail;
3705 
3706       if (getParser().parseAbsoluteExpression(Int))
3707         return MatchOperand_ParseFail;
3708 
3709       if (IsMinus)
3710         Int = -Int;
3711       break;
3712     }
3713   }
3714   return MatchOperand_Success;
3715 }
3716 
3717 OperandMatchResultTy
3718 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
3719                                     AMDGPUOperand::ImmTy ImmTy,
3720                                     bool (*ConvertResult)(int64_t&)) {
3721   SMLoc S = Parser.getTok().getLoc();
3722   int64_t Value = 0;
3723 
3724   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
3725   if (Res != MatchOperand_Success)
3726     return Res;
3727 
3728   if (ConvertResult && !ConvertResult(Value)) {
3729     return MatchOperand_ParseFail;
3730   }
3731 
3732   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
3733   return MatchOperand_Success;
3734 }
3735 
3736 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
3737   const char *Prefix,
3738   OperandVector &Operands,
3739   AMDGPUOperand::ImmTy ImmTy,
3740   bool (*ConvertResult)(int64_t&)) {
3741   StringRef Name = Parser.getTok().getString();
3742   if (!Name.equals(Prefix))
3743     return MatchOperand_NoMatch;
3744 
3745   Parser.Lex();
3746   if (getLexer().isNot(AsmToken::Colon))
3747     return MatchOperand_ParseFail;
3748 
3749   Parser.Lex();
3750   if (getLexer().isNot(AsmToken::LBrac))
3751     return MatchOperand_ParseFail;
3752   Parser.Lex();
3753 
3754   unsigned Val = 0;
3755   SMLoc S = Parser.getTok().getLoc();
3756 
3757   // FIXME: How to verify the number of elements matches the number of src
3758   // operands?
3759   for (int I = 0; I < 4; ++I) {
3760     if (I != 0) {
3761       if (getLexer().is(AsmToken::RBrac))
3762         break;
3763 
3764       if (getLexer().isNot(AsmToken::Comma))
3765         return MatchOperand_ParseFail;
3766       Parser.Lex();
3767     }
3768 
3769     if (getLexer().isNot(AsmToken::Integer))
3770       return MatchOperand_ParseFail;
3771 
3772     int64_t Op;
3773     if (getParser().parseAbsoluteExpression(Op))
3774       return MatchOperand_ParseFail;
3775 
3776     if (Op != 0 && Op != 1)
3777       return MatchOperand_ParseFail;
3778     Val |= (Op << I);
3779   }
3780 
3781   Parser.Lex();
3782   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
3783   return MatchOperand_Success;
3784 }
3785 
3786 OperandMatchResultTy
3787 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
3788                                AMDGPUOperand::ImmTy ImmTy) {
3789   int64_t Bit = 0;
3790   SMLoc S = Parser.getTok().getLoc();
3791 
3792   // We are at the end of the statement, and this is a default argument, so
3793   // use a default value.
3794   if (getLexer().isNot(AsmToken::EndOfStatement)) {
3795     switch(getLexer().getKind()) {
3796       case AsmToken::Identifier: {
3797         StringRef Tok = Parser.getTok().getString();
3798         if (Tok == Name) {
3799           if (Tok == "r128" && isGFX9())
3800             Error(S, "r128 modifier is not supported on this GPU");
3801           if (Tok == "a16" && !isGFX9())
3802             Error(S, "a16 modifier is not supported on this GPU");
3803           Bit = 1;
3804           Parser.Lex();
3805         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
3806           Bit = 0;
3807           Parser.Lex();
3808         } else {
3809           return MatchOperand_NoMatch;
3810         }
3811         break;
3812       }
3813       default:
3814         return MatchOperand_NoMatch;
3815     }
3816   }
3817 
3818   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
3819   return MatchOperand_Success;
3820 }
3821 
3822 static void addOptionalImmOperand(
3823   MCInst& Inst, const OperandVector& Operands,
3824   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
3825   AMDGPUOperand::ImmTy ImmT,
3826   int64_t Default = 0) {
3827   auto i = OptionalIdx.find(ImmT);
3828   if (i != OptionalIdx.end()) {
3829     unsigned Idx = i->second;
3830     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
3831   } else {
3832     Inst.addOperand(MCOperand::createImm(Default));
3833   }
3834 }
3835 
3836 OperandMatchResultTy
3837 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
3838   if (getLexer().isNot(AsmToken::Identifier)) {
3839     return MatchOperand_NoMatch;
3840   }
3841   StringRef Tok = Parser.getTok().getString();
3842   if (Tok != Prefix) {
3843     return MatchOperand_NoMatch;
3844   }
3845 
3846   Parser.Lex();
3847   if (getLexer().isNot(AsmToken::Colon)) {
3848     return MatchOperand_ParseFail;
3849   }
3850 
3851   Parser.Lex();
3852   if (getLexer().isNot(AsmToken::Identifier)) {
3853     return MatchOperand_ParseFail;
3854   }
3855 
3856   Value = Parser.getTok().getString();
3857   return MatchOperand_Success;
3858 }
3859 
3860 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
3861 // values to live in a joint format operand in the MCInst encoding.
3862 OperandMatchResultTy
3863 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
3864   SMLoc S = Parser.getTok().getLoc();
3865   int64_t Dfmt = 0, Nfmt = 0;
3866   // dfmt and nfmt can appear in either order, and each is optional.
3867   bool GotDfmt = false, GotNfmt = false;
3868   while (!GotDfmt || !GotNfmt) {
3869     if (!GotDfmt) {
3870       auto Res = parseIntWithPrefix("dfmt", Dfmt);
3871       if (Res != MatchOperand_NoMatch) {
3872         if (Res != MatchOperand_Success)
3873           return Res;
3874         if (Dfmt >= 16) {
3875           Error(Parser.getTok().getLoc(), "out of range dfmt");
3876           return MatchOperand_ParseFail;
3877         }
3878         GotDfmt = true;
3879         Parser.Lex();
3880         continue;
3881       }
3882     }
3883     if (!GotNfmt) {
3884       auto Res = parseIntWithPrefix("nfmt", Nfmt);
3885       if (Res != MatchOperand_NoMatch) {
3886         if (Res != MatchOperand_Success)
3887           return Res;
3888         if (Nfmt >= 8) {
3889           Error(Parser.getTok().getLoc(), "out of range nfmt");
3890           return MatchOperand_ParseFail;
3891         }
3892         GotNfmt = true;
3893         Parser.Lex();
3894         continue;
3895       }
3896     }
3897     break;
3898   }
3899   if (!GotDfmt && !GotNfmt)
3900     return MatchOperand_NoMatch;
3901   auto Format = Dfmt | Nfmt << 4;
3902   Operands.push_back(
3903       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
3904   return MatchOperand_Success;
3905 }
3906 
3907 //===----------------------------------------------------------------------===//
3908 // ds
3909 //===----------------------------------------------------------------------===//
3910 
3911 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
3912                                     const OperandVector &Operands) {
3913   OptionalImmIndexMap OptionalIdx;
3914 
3915   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3916     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3917 
3918     // Add the register arguments
3919     if (Op.isReg()) {
3920       Op.addRegOperands(Inst, 1);
3921       continue;
3922     }
3923 
3924     // Handle optional arguments
3925     OptionalIdx[Op.getImmTy()] = i;
3926   }
3927 
3928   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
3929   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
3930   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3931 
3932   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3933 }
3934 
3935 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
3936                                 bool IsGdsHardcoded) {
3937   OptionalImmIndexMap OptionalIdx;
3938 
3939   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3940     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3941 
3942     // Add the register arguments
3943     if (Op.isReg()) {
3944       Op.addRegOperands(Inst, 1);
3945       continue;
3946     }
3947 
3948     if (Op.isToken() && Op.getToken() == "gds") {
3949       IsGdsHardcoded = true;
3950       continue;
3951     }
3952 
3953     // Handle optional arguments
3954     OptionalIdx[Op.getImmTy()] = i;
3955   }
3956 
3957   AMDGPUOperand::ImmTy OffsetType =
3958     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
3959      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
3960                                                       AMDGPUOperand::ImmTyOffset;
3961 
3962   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
3963 
3964   if (!IsGdsHardcoded) {
3965     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3966   }
3967   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3968 }
3969 
3970 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
3971   OptionalImmIndexMap OptionalIdx;
3972 
3973   unsigned OperandIdx[4];
3974   unsigned EnMask = 0;
3975   int SrcIdx = 0;
3976 
3977   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3978     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3979 
3980     // Add the register arguments
3981     if (Op.isReg()) {
3982       assert(SrcIdx < 4);
3983       OperandIdx[SrcIdx] = Inst.size();
3984       Op.addRegOperands(Inst, 1);
3985       ++SrcIdx;
3986       continue;
3987     }
3988 
3989     if (Op.isOff()) {
3990       assert(SrcIdx < 4);
3991       OperandIdx[SrcIdx] = Inst.size();
3992       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
3993       ++SrcIdx;
3994       continue;
3995     }
3996 
3997     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
3998       Op.addImmOperands(Inst, 1);
3999       continue;
4000     }
4001 
4002     if (Op.isToken() && Op.getToken() == "done")
4003       continue;
4004 
4005     // Handle optional arguments
4006     OptionalIdx[Op.getImmTy()] = i;
4007   }
4008 
4009   assert(SrcIdx == 4);
4010 
4011   bool Compr = false;
4012   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4013     Compr = true;
4014     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4015     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4016     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4017   }
4018 
4019   for (auto i = 0; i < SrcIdx; ++i) {
4020     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4021       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4022     }
4023   }
4024 
4025   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4026   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4027 
4028   Inst.addOperand(MCOperand::createImm(EnMask));
4029 }
4030 
4031 //===----------------------------------------------------------------------===//
4032 // s_waitcnt
4033 //===----------------------------------------------------------------------===//
4034 
4035 static bool
4036 encodeCnt(
4037   const AMDGPU::IsaVersion ISA,
4038   int64_t &IntVal,
4039   int64_t CntVal,
4040   bool Saturate,
4041   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4042   unsigned (*decode)(const IsaVersion &Version, unsigned))
4043 {
4044   bool Failed = false;
4045 
4046   IntVal = encode(ISA, IntVal, CntVal);
4047   if (CntVal != decode(ISA, IntVal)) {
4048     if (Saturate) {
4049       IntVal = encode(ISA, IntVal, -1);
4050     } else {
4051       Failed = true;
4052     }
4053   }
4054   return Failed;
4055 }
4056 
4057 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4058   StringRef CntName = Parser.getTok().getString();
4059   int64_t CntVal;
4060 
4061   Parser.Lex();
4062   if (getLexer().isNot(AsmToken::LParen))
4063     return true;
4064 
4065   Parser.Lex();
4066   if (getLexer().isNot(AsmToken::Integer))
4067     return true;
4068 
4069   SMLoc ValLoc = Parser.getTok().getLoc();
4070   if (getParser().parseAbsoluteExpression(CntVal))
4071     return true;
4072 
4073   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4074 
4075   bool Failed = true;
4076   bool Sat = CntName.endswith("_sat");
4077 
4078   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4079     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4080   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4081     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4082   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4083     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4084   }
4085 
4086   if (Failed) {
4087     Error(ValLoc, "too large value for " + CntName);
4088     return true;
4089   }
4090 
4091   if (getLexer().isNot(AsmToken::RParen)) {
4092     return true;
4093   }
4094 
4095   Parser.Lex();
4096   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
4097     const AsmToken NextToken = getLexer().peekTok();
4098     if (NextToken.is(AsmToken::Identifier)) {
4099       Parser.Lex();
4100     }
4101   }
4102 
4103   return false;
4104 }
4105 
4106 OperandMatchResultTy
4107 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4108   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4109   int64_t Waitcnt = getWaitcntBitMask(ISA);
4110   SMLoc S = Parser.getTok().getLoc();
4111 
4112   switch(getLexer().getKind()) {
4113     default: return MatchOperand_ParseFail;
4114     case AsmToken::Integer:
4115       // The operand can be an integer value.
4116       if (getParser().parseAbsoluteExpression(Waitcnt))
4117         return MatchOperand_ParseFail;
4118       break;
4119 
4120     case AsmToken::Identifier:
4121       do {
4122         if (parseCnt(Waitcnt))
4123           return MatchOperand_ParseFail;
4124       } while(getLexer().isNot(AsmToken::EndOfStatement));
4125       break;
4126   }
4127   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4128   return MatchOperand_Success;
4129 }
4130 
4131 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
4132                                           int64_t &Width) {
4133   using namespace llvm::AMDGPU::Hwreg;
4134 
4135   if (Parser.getTok().getString() != "hwreg")
4136     return true;
4137   Parser.Lex();
4138 
4139   if (getLexer().isNot(AsmToken::LParen))
4140     return true;
4141   Parser.Lex();
4142 
4143   if (getLexer().is(AsmToken::Identifier)) {
4144     HwReg.IsSymbolic = true;
4145     HwReg.Id = ID_UNKNOWN_;
4146     const StringRef tok = Parser.getTok().getString();
4147     int Last = ID_SYMBOLIC_LAST_;
4148     if (isSI() || isCI() || isVI())
4149       Last = ID_SYMBOLIC_FIRST_GFX9_;
4150     for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
4151       if (tok == IdSymbolic[i]) {
4152         HwReg.Id = i;
4153         break;
4154       }
4155     }
4156     Parser.Lex();
4157   } else {
4158     HwReg.IsSymbolic = false;
4159     if (getLexer().isNot(AsmToken::Integer))
4160       return true;
4161     if (getParser().parseAbsoluteExpression(HwReg.Id))
4162       return true;
4163   }
4164 
4165   if (getLexer().is(AsmToken::RParen)) {
4166     Parser.Lex();
4167     return false;
4168   }
4169 
4170   // optional params
4171   if (getLexer().isNot(AsmToken::Comma))
4172     return true;
4173   Parser.Lex();
4174 
4175   if (getLexer().isNot(AsmToken::Integer))
4176     return true;
4177   if (getParser().parseAbsoluteExpression(Offset))
4178     return true;
4179 
4180   if (getLexer().isNot(AsmToken::Comma))
4181     return true;
4182   Parser.Lex();
4183 
4184   if (getLexer().isNot(AsmToken::Integer))
4185     return true;
4186   if (getParser().parseAbsoluteExpression(Width))
4187     return true;
4188 
4189   if (getLexer().isNot(AsmToken::RParen))
4190     return true;
4191   Parser.Lex();
4192 
4193   return false;
4194 }
4195 
4196 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4197   using namespace llvm::AMDGPU::Hwreg;
4198 
4199   int64_t Imm16Val = 0;
4200   SMLoc S = Parser.getTok().getLoc();
4201 
4202   switch(getLexer().getKind()) {
4203     default: return MatchOperand_NoMatch;
4204     case AsmToken::Integer:
4205       // The operand can be an integer value.
4206       if (getParser().parseAbsoluteExpression(Imm16Val))
4207         return MatchOperand_NoMatch;
4208       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4209         Error(S, "invalid immediate: only 16-bit values are legal");
4210         // Do not return error code, but create an imm operand anyway and proceed
4211         // to the next operand, if any. That avoids unneccessary error messages.
4212       }
4213       break;
4214 
4215     case AsmToken::Identifier: {
4216         OperandInfoTy HwReg(ID_UNKNOWN_);
4217         int64_t Offset = OFFSET_DEFAULT_;
4218         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
4219         if (parseHwregConstruct(HwReg, Offset, Width))
4220           return MatchOperand_ParseFail;
4221         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
4222           if (HwReg.IsSymbolic)
4223             Error(S, "invalid symbolic name of hardware register");
4224           else
4225             Error(S, "invalid code of hardware register: only 6-bit values are legal");
4226         }
4227         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
4228           Error(S, "invalid bit offset: only 5-bit values are legal");
4229         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
4230           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
4231         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
4232       }
4233       break;
4234   }
4235   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
4236   return MatchOperand_Success;
4237 }
4238 
4239 bool AMDGPUOperand::isSWaitCnt() const {
4240   return isImm();
4241 }
4242 
4243 bool AMDGPUOperand::isHwreg() const {
4244   return isImmTy(ImmTyHwreg);
4245 }
4246 
4247 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
4248   using namespace llvm::AMDGPU::SendMsg;
4249 
4250   if (Parser.getTok().getString() != "sendmsg")
4251     return true;
4252   Parser.Lex();
4253 
4254   if (getLexer().isNot(AsmToken::LParen))
4255     return true;
4256   Parser.Lex();
4257 
4258   if (getLexer().is(AsmToken::Identifier)) {
4259     Msg.IsSymbolic = true;
4260     Msg.Id = ID_UNKNOWN_;
4261     const std::string tok = Parser.getTok().getString();
4262     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
4263       switch(i) {
4264         default: continue; // Omit gaps.
4265         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:  case ID_SYSMSG: break;
4266       }
4267       if (tok == IdSymbolic[i]) {
4268         Msg.Id = i;
4269         break;
4270       }
4271     }
4272     Parser.Lex();
4273   } else {
4274     Msg.IsSymbolic = false;
4275     if (getLexer().isNot(AsmToken::Integer))
4276       return true;
4277     if (getParser().parseAbsoluteExpression(Msg.Id))
4278       return true;
4279     if (getLexer().is(AsmToken::Integer))
4280       if (getParser().parseAbsoluteExpression(Msg.Id))
4281         Msg.Id = ID_UNKNOWN_;
4282   }
4283   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
4284     return false;
4285 
4286   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
4287     if (getLexer().isNot(AsmToken::RParen))
4288       return true;
4289     Parser.Lex();
4290     return false;
4291   }
4292 
4293   if (getLexer().isNot(AsmToken::Comma))
4294     return true;
4295   Parser.Lex();
4296 
4297   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
4298   Operation.Id = ID_UNKNOWN_;
4299   if (getLexer().is(AsmToken::Identifier)) {
4300     Operation.IsSymbolic = true;
4301     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
4302     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
4303     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
4304     const StringRef Tok = Parser.getTok().getString();
4305     for (int i = F; i < L; ++i) {
4306       if (Tok == S[i]) {
4307         Operation.Id = i;
4308         break;
4309       }
4310     }
4311     Parser.Lex();
4312   } else {
4313     Operation.IsSymbolic = false;
4314     if (getLexer().isNot(AsmToken::Integer))
4315       return true;
4316     if (getParser().parseAbsoluteExpression(Operation.Id))
4317       return true;
4318   }
4319 
4320   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4321     // Stream id is optional.
4322     if (getLexer().is(AsmToken::RParen)) {
4323       Parser.Lex();
4324       return false;
4325     }
4326 
4327     if (getLexer().isNot(AsmToken::Comma))
4328       return true;
4329     Parser.Lex();
4330 
4331     if (getLexer().isNot(AsmToken::Integer))
4332       return true;
4333     if (getParser().parseAbsoluteExpression(StreamId))
4334       return true;
4335   }
4336 
4337   if (getLexer().isNot(AsmToken::RParen))
4338     return true;
4339   Parser.Lex();
4340   return false;
4341 }
4342 
4343 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
4344   if (getLexer().getKind() != AsmToken::Identifier)
4345     return MatchOperand_NoMatch;
4346 
4347   StringRef Str = Parser.getTok().getString();
4348   int Slot = StringSwitch<int>(Str)
4349     .Case("p10", 0)
4350     .Case("p20", 1)
4351     .Case("p0", 2)
4352     .Default(-1);
4353 
4354   SMLoc S = Parser.getTok().getLoc();
4355   if (Slot == -1)
4356     return MatchOperand_ParseFail;
4357 
4358   Parser.Lex();
4359   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
4360                                               AMDGPUOperand::ImmTyInterpSlot));
4361   return MatchOperand_Success;
4362 }
4363 
4364 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
4365   if (getLexer().getKind() != AsmToken::Identifier)
4366     return MatchOperand_NoMatch;
4367 
4368   StringRef Str = Parser.getTok().getString();
4369   if (!Str.startswith("attr"))
4370     return MatchOperand_NoMatch;
4371 
4372   StringRef Chan = Str.take_back(2);
4373   int AttrChan = StringSwitch<int>(Chan)
4374     .Case(".x", 0)
4375     .Case(".y", 1)
4376     .Case(".z", 2)
4377     .Case(".w", 3)
4378     .Default(-1);
4379   if (AttrChan == -1)
4380     return MatchOperand_ParseFail;
4381 
4382   Str = Str.drop_back(2).drop_front(4);
4383 
4384   uint8_t Attr;
4385   if (Str.getAsInteger(10, Attr))
4386     return MatchOperand_ParseFail;
4387 
4388   SMLoc S = Parser.getTok().getLoc();
4389   Parser.Lex();
4390   if (Attr > 63) {
4391     Error(S, "out of bounds attr");
4392     return MatchOperand_Success;
4393   }
4394 
4395   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4396 
4397   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4398                                               AMDGPUOperand::ImmTyInterpAttr));
4399   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4400                                               AMDGPUOperand::ImmTyAttrChan));
4401   return MatchOperand_Success;
4402 }
4403 
4404 void AMDGPUAsmParser::errorExpTgt() {
4405   Error(Parser.getTok().getLoc(), "invalid exp target");
4406 }
4407 
4408 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4409                                                       uint8_t &Val) {
4410   if (Str == "null") {
4411     Val = 9;
4412     return MatchOperand_Success;
4413   }
4414 
4415   if (Str.startswith("mrt")) {
4416     Str = Str.drop_front(3);
4417     if (Str == "z") { // == mrtz
4418       Val = 8;
4419       return MatchOperand_Success;
4420     }
4421 
4422     if (Str.getAsInteger(10, Val))
4423       return MatchOperand_ParseFail;
4424 
4425     if (Val > 7)
4426       errorExpTgt();
4427 
4428     return MatchOperand_Success;
4429   }
4430 
4431   if (Str.startswith("pos")) {
4432     Str = Str.drop_front(3);
4433     if (Str.getAsInteger(10, Val))
4434       return MatchOperand_ParseFail;
4435 
4436     if (Val > 3)
4437       errorExpTgt();
4438 
4439     Val += 12;
4440     return MatchOperand_Success;
4441   }
4442 
4443   if (Str.startswith("param")) {
4444     Str = Str.drop_front(5);
4445     if (Str.getAsInteger(10, Val))
4446       return MatchOperand_ParseFail;
4447 
4448     if (Val >= 32)
4449       errorExpTgt();
4450 
4451     Val += 32;
4452     return MatchOperand_Success;
4453   }
4454 
4455   if (Str.startswith("invalid_target_")) {
4456     Str = Str.drop_front(15);
4457     if (Str.getAsInteger(10, Val))
4458       return MatchOperand_ParseFail;
4459 
4460     errorExpTgt();
4461     return MatchOperand_Success;
4462   }
4463 
4464   return MatchOperand_NoMatch;
4465 }
4466 
4467 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4468   uint8_t Val;
4469   StringRef Str = Parser.getTok().getString();
4470 
4471   auto Res = parseExpTgtImpl(Str, Val);
4472   if (Res != MatchOperand_Success)
4473     return Res;
4474 
4475   SMLoc S = Parser.getTok().getLoc();
4476   Parser.Lex();
4477 
4478   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4479                                               AMDGPUOperand::ImmTyExpTgt));
4480   return MatchOperand_Success;
4481 }
4482 
4483 OperandMatchResultTy
4484 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4485   using namespace llvm::AMDGPU::SendMsg;
4486 
4487   int64_t Imm16Val = 0;
4488   SMLoc S = Parser.getTok().getLoc();
4489 
4490   switch(getLexer().getKind()) {
4491   default:
4492     return MatchOperand_NoMatch;
4493   case AsmToken::Integer:
4494     // The operand can be an integer value.
4495     if (getParser().parseAbsoluteExpression(Imm16Val))
4496       return MatchOperand_NoMatch;
4497     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4498       Error(S, "invalid immediate: only 16-bit values are legal");
4499       // Do not return error code, but create an imm operand anyway and proceed
4500       // to the next operand, if any. That avoids unneccessary error messages.
4501     }
4502     break;
4503   case AsmToken::Identifier: {
4504       OperandInfoTy Msg(ID_UNKNOWN_);
4505       OperandInfoTy Operation(OP_UNKNOWN_);
4506       int64_t StreamId = STREAM_ID_DEFAULT_;
4507       if (parseSendMsgConstruct(Msg, Operation, StreamId))
4508         return MatchOperand_ParseFail;
4509       do {
4510         // Validate and encode message ID.
4511         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4512                 || Msg.Id == ID_SYSMSG)) {
4513           if (Msg.IsSymbolic)
4514             Error(S, "invalid/unsupported symbolic name of message");
4515           else
4516             Error(S, "invalid/unsupported code of message");
4517           break;
4518         }
4519         Imm16Val = (Msg.Id << ID_SHIFT_);
4520         // Validate and encode operation ID.
4521         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4522           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4523             if (Operation.IsSymbolic)
4524               Error(S, "invalid symbolic name of GS_OP");
4525             else
4526               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4527             break;
4528           }
4529           if (Operation.Id == OP_GS_NOP
4530               && Msg.Id != ID_GS_DONE) {
4531             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
4532             break;
4533           }
4534           Imm16Val |= (Operation.Id << OP_SHIFT_);
4535         }
4536         if (Msg.Id == ID_SYSMSG) {
4537           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
4538             if (Operation.IsSymbolic)
4539               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
4540             else
4541               Error(S, "invalid/unsupported code of SYSMSG_OP");
4542             break;
4543           }
4544           Imm16Val |= (Operation.Id << OP_SHIFT_);
4545         }
4546         // Validate and encode stream ID.
4547         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4548           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
4549             Error(S, "invalid stream id: only 2-bit values are legal");
4550             break;
4551           }
4552           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
4553         }
4554       } while (false);
4555     }
4556     break;
4557   }
4558   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
4559   return MatchOperand_Success;
4560 }
4561 
4562 bool AMDGPUOperand::isSendMsg() const {
4563   return isImmTy(ImmTySendMsg);
4564 }
4565 
4566 //===----------------------------------------------------------------------===//
4567 // parser helpers
4568 //===----------------------------------------------------------------------===//
4569 
4570 bool
4571 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
4572   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
4573 }
4574 
4575 bool
4576 AMDGPUAsmParser::isId(const StringRef Id) const {
4577   return isId(getToken(), Id);
4578 }
4579 
4580 bool
4581 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
4582   return getTokenKind() == Kind;
4583 }
4584 
4585 bool
4586 AMDGPUAsmParser::trySkipId(const StringRef Id) {
4587   if (isId(Id)) {
4588     lex();
4589     return true;
4590   }
4591   return false;
4592 }
4593 
4594 bool
4595 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
4596   if (isToken(Kind)) {
4597     lex();
4598     return true;
4599   }
4600   return false;
4601 }
4602 
4603 bool
4604 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
4605                            const StringRef ErrMsg) {
4606   if (!trySkipToken(Kind)) {
4607     Error(getLoc(), ErrMsg);
4608     return false;
4609   }
4610   return true;
4611 }
4612 
4613 bool
4614 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
4615   return !getParser().parseAbsoluteExpression(Imm);
4616 }
4617 
4618 bool
4619 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
4620   if (isToken(AsmToken::String)) {
4621     Val = getToken().getStringContents();
4622     lex();
4623     return true;
4624   } else {
4625     Error(getLoc(), ErrMsg);
4626     return false;
4627   }
4628 }
4629 
4630 AsmToken
4631 AMDGPUAsmParser::getToken() const {
4632   return Parser.getTok();
4633 }
4634 
4635 AsmToken
4636 AMDGPUAsmParser::peekToken() {
4637   return getLexer().peekTok();
4638 }
4639 
4640 AsmToken::TokenKind
4641 AMDGPUAsmParser::getTokenKind() const {
4642   return getLexer().getKind();
4643 }
4644 
4645 SMLoc
4646 AMDGPUAsmParser::getLoc() const {
4647   return getToken().getLoc();
4648 }
4649 
4650 StringRef
4651 AMDGPUAsmParser::getTokenStr() const {
4652   return getToken().getString();
4653 }
4654 
4655 void
4656 AMDGPUAsmParser::lex() {
4657   Parser.Lex();
4658 }
4659 
4660 //===----------------------------------------------------------------------===//
4661 // swizzle
4662 //===----------------------------------------------------------------------===//
4663 
4664 LLVM_READNONE
4665 static unsigned
4666 encodeBitmaskPerm(const unsigned AndMask,
4667                   const unsigned OrMask,
4668                   const unsigned XorMask) {
4669   using namespace llvm::AMDGPU::Swizzle;
4670 
4671   return BITMASK_PERM_ENC |
4672          (AndMask << BITMASK_AND_SHIFT) |
4673          (OrMask  << BITMASK_OR_SHIFT)  |
4674          (XorMask << BITMASK_XOR_SHIFT);
4675 }
4676 
4677 bool
4678 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
4679                                       const unsigned MinVal,
4680                                       const unsigned MaxVal,
4681                                       const StringRef ErrMsg) {
4682   for (unsigned i = 0; i < OpNum; ++i) {
4683     if (!skipToken(AsmToken::Comma, "expected a comma")){
4684       return false;
4685     }
4686     SMLoc ExprLoc = Parser.getTok().getLoc();
4687     if (!parseExpr(Op[i])) {
4688       return false;
4689     }
4690     if (Op[i] < MinVal || Op[i] > MaxVal) {
4691       Error(ExprLoc, ErrMsg);
4692       return false;
4693     }
4694   }
4695 
4696   return true;
4697 }
4698 
4699 bool
4700 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
4701   using namespace llvm::AMDGPU::Swizzle;
4702 
4703   int64_t Lane[LANE_NUM];
4704   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
4705                            "expected a 2-bit lane id")) {
4706     Imm = QUAD_PERM_ENC;
4707     for (unsigned I = 0; I < LANE_NUM; ++I) {
4708       Imm |= Lane[I] << (LANE_SHIFT * I);
4709     }
4710     return true;
4711   }
4712   return false;
4713 }
4714 
4715 bool
4716 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
4717   using namespace llvm::AMDGPU::Swizzle;
4718 
4719   SMLoc S = Parser.getTok().getLoc();
4720   int64_t GroupSize;
4721   int64_t LaneIdx;
4722 
4723   if (!parseSwizzleOperands(1, &GroupSize,
4724                             2, 32,
4725                             "group size must be in the interval [2,32]")) {
4726     return false;
4727   }
4728   if (!isPowerOf2_64(GroupSize)) {
4729     Error(S, "group size must be a power of two");
4730     return false;
4731   }
4732   if (parseSwizzleOperands(1, &LaneIdx,
4733                            0, GroupSize - 1,
4734                            "lane id must be in the interval [0,group size - 1]")) {
4735     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
4736     return true;
4737   }
4738   return false;
4739 }
4740 
4741 bool
4742 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
4743   using namespace llvm::AMDGPU::Swizzle;
4744 
4745   SMLoc S = Parser.getTok().getLoc();
4746   int64_t GroupSize;
4747 
4748   if (!parseSwizzleOperands(1, &GroupSize,
4749       2, 32, "group size must be in the interval [2,32]")) {
4750     return false;
4751   }
4752   if (!isPowerOf2_64(GroupSize)) {
4753     Error(S, "group size must be a power of two");
4754     return false;
4755   }
4756 
4757   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
4758   return true;
4759 }
4760 
4761 bool
4762 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
4763   using namespace llvm::AMDGPU::Swizzle;
4764 
4765   SMLoc S = Parser.getTok().getLoc();
4766   int64_t GroupSize;
4767 
4768   if (!parseSwizzleOperands(1, &GroupSize,
4769       1, 16, "group size must be in the interval [1,16]")) {
4770     return false;
4771   }
4772   if (!isPowerOf2_64(GroupSize)) {
4773     Error(S, "group size must be a power of two");
4774     return false;
4775   }
4776 
4777   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
4778   return true;
4779 }
4780 
4781 bool
4782 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
4783   using namespace llvm::AMDGPU::Swizzle;
4784 
4785   if (!skipToken(AsmToken::Comma, "expected a comma")) {
4786     return false;
4787   }
4788 
4789   StringRef Ctl;
4790   SMLoc StrLoc = Parser.getTok().getLoc();
4791   if (!parseString(Ctl)) {
4792     return false;
4793   }
4794   if (Ctl.size() != BITMASK_WIDTH) {
4795     Error(StrLoc, "expected a 5-character mask");
4796     return false;
4797   }
4798 
4799   unsigned AndMask = 0;
4800   unsigned OrMask = 0;
4801   unsigned XorMask = 0;
4802 
4803   for (size_t i = 0; i < Ctl.size(); ++i) {
4804     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
4805     switch(Ctl[i]) {
4806     default:
4807       Error(StrLoc, "invalid mask");
4808       return false;
4809     case '0':
4810       break;
4811     case '1':
4812       OrMask |= Mask;
4813       break;
4814     case 'p':
4815       AndMask |= Mask;
4816       break;
4817     case 'i':
4818       AndMask |= Mask;
4819       XorMask |= Mask;
4820       break;
4821     }
4822   }
4823 
4824   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
4825   return true;
4826 }
4827 
4828 bool
4829 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
4830 
4831   SMLoc OffsetLoc = Parser.getTok().getLoc();
4832 
4833   if (!parseExpr(Imm)) {
4834     return false;
4835   }
4836   if (!isUInt<16>(Imm)) {
4837     Error(OffsetLoc, "expected a 16-bit offset");
4838     return false;
4839   }
4840   return true;
4841 }
4842 
4843 bool
4844 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
4845   using namespace llvm::AMDGPU::Swizzle;
4846 
4847   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
4848 
4849     SMLoc ModeLoc = Parser.getTok().getLoc();
4850     bool Ok = false;
4851 
4852     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
4853       Ok = parseSwizzleQuadPerm(Imm);
4854     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
4855       Ok = parseSwizzleBitmaskPerm(Imm);
4856     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
4857       Ok = parseSwizzleBroadcast(Imm);
4858     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
4859       Ok = parseSwizzleSwap(Imm);
4860     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
4861       Ok = parseSwizzleReverse(Imm);
4862     } else {
4863       Error(ModeLoc, "expected a swizzle mode");
4864     }
4865 
4866     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
4867   }
4868 
4869   return false;
4870 }
4871 
4872 OperandMatchResultTy
4873 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
4874   SMLoc S = Parser.getTok().getLoc();
4875   int64_t Imm = 0;
4876 
4877   if (trySkipId("offset")) {
4878 
4879     bool Ok = false;
4880     if (skipToken(AsmToken::Colon, "expected a colon")) {
4881       if (trySkipId("swizzle")) {
4882         Ok = parseSwizzleMacro(Imm);
4883       } else {
4884         Ok = parseSwizzleOffset(Imm);
4885       }
4886     }
4887 
4888     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
4889 
4890     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
4891   } else {
4892     // Swizzle "offset" operand is optional.
4893     // If it is omitted, try parsing other optional operands.
4894     return parseOptionalOpr(Operands);
4895   }
4896 }
4897 
4898 bool
4899 AMDGPUOperand::isSwizzle() const {
4900   return isImmTy(ImmTySwizzle);
4901 }
4902 
4903 //===----------------------------------------------------------------------===//
4904 // VGPR Index Mode
4905 //===----------------------------------------------------------------------===//
4906 
4907 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
4908 
4909   using namespace llvm::AMDGPU::VGPRIndexMode;
4910 
4911   if (trySkipToken(AsmToken::RParen)) {
4912     return OFF;
4913   }
4914 
4915   int64_t Imm = 0;
4916 
4917   while (true) {
4918     unsigned Mode = 0;
4919     SMLoc S = Parser.getTok().getLoc();
4920 
4921     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
4922       if (trySkipId(IdSymbolic[ModeId])) {
4923         Mode = 1 << ModeId;
4924         break;
4925       }
4926     }
4927 
4928     if (Mode == 0) {
4929       Error(S, (Imm == 0)?
4930                "expected a VGPR index mode or a closing parenthesis" :
4931                "expected a VGPR index mode");
4932       break;
4933     }
4934 
4935     if (Imm & Mode) {
4936       Error(S, "duplicate VGPR index mode");
4937       break;
4938     }
4939     Imm |= Mode;
4940 
4941     if (trySkipToken(AsmToken::RParen))
4942       break;
4943     if (!skipToken(AsmToken::Comma,
4944                    "expected a comma or a closing parenthesis"))
4945       break;
4946   }
4947 
4948   return Imm;
4949 }
4950 
4951 OperandMatchResultTy
4952 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
4953 
4954   int64_t Imm = 0;
4955   SMLoc S = Parser.getTok().getLoc();
4956 
4957   if (getLexer().getKind() == AsmToken::Identifier &&
4958       Parser.getTok().getString() == "gpr_idx" &&
4959       getLexer().peekTok().is(AsmToken::LParen)) {
4960 
4961     Parser.Lex();
4962     Parser.Lex();
4963 
4964     // If parse failed, trigger an error but do not return error code
4965     // to avoid excessive error messages.
4966     Imm = parseGPRIdxMacro();
4967 
4968   } else {
4969     if (getParser().parseAbsoluteExpression(Imm))
4970       return MatchOperand_NoMatch;
4971     if (Imm < 0 || !isUInt<4>(Imm)) {
4972       Error(S, "invalid immediate: only 4-bit values are legal");
4973     }
4974   }
4975 
4976   Operands.push_back(
4977       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
4978   return MatchOperand_Success;
4979 }
4980 
4981 bool AMDGPUOperand::isGPRIdxMode() const {
4982   return isImmTy(ImmTyGprIdxMode);
4983 }
4984 
4985 //===----------------------------------------------------------------------===//
4986 // sopp branch targets
4987 //===----------------------------------------------------------------------===//
4988 
4989 OperandMatchResultTy
4990 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
4991   SMLoc S = Parser.getTok().getLoc();
4992 
4993   switch (getLexer().getKind()) {
4994     default: return MatchOperand_ParseFail;
4995     case AsmToken::Integer: {
4996       int64_t Imm;
4997       if (getParser().parseAbsoluteExpression(Imm))
4998         return MatchOperand_ParseFail;
4999       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
5000       return MatchOperand_Success;
5001     }
5002 
5003     case AsmToken::Identifier:
5004       Operands.push_back(AMDGPUOperand::CreateExpr(this,
5005           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
5006                                   Parser.getTok().getString()), getContext()), S));
5007       Parser.Lex();
5008       return MatchOperand_Success;
5009   }
5010 }
5011 
5012 //===----------------------------------------------------------------------===//
5013 // mubuf
5014 //===----------------------------------------------------------------------===//
5015 
5016 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5017   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5018 }
5019 
5020 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5021   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5022 }
5023 
5024 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5025                                const OperandVector &Operands,
5026                                bool IsAtomic,
5027                                bool IsAtomicReturn,
5028                                bool IsLds) {
5029   bool IsLdsOpcode = IsLds;
5030   bool HasLdsModifier = false;
5031   OptionalImmIndexMap OptionalIdx;
5032   assert(IsAtomicReturn ? IsAtomic : true);
5033   unsigned FirstOperandIdx = 1;
5034 
5035   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5036     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5037 
5038     // Add the register arguments
5039     if (Op.isReg()) {
5040       Op.addRegOperands(Inst, 1);
5041       // Insert a tied src for atomic return dst.
5042       // This cannot be postponed as subsequent calls to
5043       // addImmOperands rely on correct number of MC operands.
5044       if (IsAtomicReturn && i == FirstOperandIdx)
5045         Op.addRegOperands(Inst, 1);
5046       continue;
5047     }
5048 
5049     // Handle the case where soffset is an immediate
5050     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5051       Op.addImmOperands(Inst, 1);
5052       continue;
5053     }
5054 
5055     HasLdsModifier = Op.isLDS();
5056 
5057     // Handle tokens like 'offen' which are sometimes hard-coded into the
5058     // asm string.  There are no MCInst operands for these.
5059     if (Op.isToken()) {
5060       continue;
5061     }
5062     assert(Op.isImm());
5063 
5064     // Handle optional arguments
5065     OptionalIdx[Op.getImmTy()] = i;
5066   }
5067 
5068   // This is a workaround for an llvm quirk which may result in an
5069   // incorrect instruction selection. Lds and non-lds versions of
5070   // MUBUF instructions are identical except that lds versions
5071   // have mandatory 'lds' modifier. However this modifier follows
5072   // optional modifiers and llvm asm matcher regards this 'lds'
5073   // modifier as an optional one. As a result, an lds version
5074   // of opcode may be selected even if it has no 'lds' modifier.
5075   if (IsLdsOpcode && !HasLdsModifier) {
5076     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5077     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5078       Inst.setOpcode(NoLdsOpcode);
5079       IsLdsOpcode = false;
5080     }
5081   }
5082 
5083   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5084   if (!IsAtomic) { // glc is hard-coded.
5085     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5086   }
5087   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5088 
5089   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5090     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5091   }
5092 }
5093 
5094 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5095   OptionalImmIndexMap OptionalIdx;
5096 
5097   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5098     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5099 
5100     // Add the register arguments
5101     if (Op.isReg()) {
5102       Op.addRegOperands(Inst, 1);
5103       continue;
5104     }
5105 
5106     // Handle the case where soffset is an immediate
5107     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5108       Op.addImmOperands(Inst, 1);
5109       continue;
5110     }
5111 
5112     // Handle tokens like 'offen' which are sometimes hard-coded into the
5113     // asm string.  There are no MCInst operands for these.
5114     if (Op.isToken()) {
5115       continue;
5116     }
5117     assert(Op.isImm());
5118 
5119     // Handle optional arguments
5120     OptionalIdx[Op.getImmTy()] = i;
5121   }
5122 
5123   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5124                         AMDGPUOperand::ImmTyOffset);
5125   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5126   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5127   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5128   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5129 }
5130 
5131 //===----------------------------------------------------------------------===//
5132 // mimg
5133 //===----------------------------------------------------------------------===//
5134 
5135 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5136                               bool IsAtomic) {
5137   unsigned I = 1;
5138   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5139   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5140     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5141   }
5142 
5143   if (IsAtomic) {
5144     // Add src, same as dst
5145     assert(Desc.getNumDefs() == 1);
5146     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5147   }
5148 
5149   OptionalImmIndexMap OptionalIdx;
5150 
5151   for (unsigned E = Operands.size(); I != E; ++I) {
5152     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5153 
5154     // Add the register arguments
5155     if (Op.isReg()) {
5156       Op.addRegOperands(Inst, 1);
5157     } else if (Op.isImmModifier()) {
5158       OptionalIdx[Op.getImmTy()] = I;
5159     } else {
5160       llvm_unreachable("unexpected operand type");
5161     }
5162   }
5163 
5164   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5165   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5166   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5167   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5168   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5169   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5170   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5171   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5172   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5173 }
5174 
5175 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5176   cvtMIMG(Inst, Operands, true);
5177 }
5178 
5179 //===----------------------------------------------------------------------===//
5180 // smrd
5181 //===----------------------------------------------------------------------===//
5182 
5183 bool AMDGPUOperand::isSMRDOffset8() const {
5184   return isImm() && isUInt<8>(getImm());
5185 }
5186 
5187 bool AMDGPUOperand::isSMRDOffset20() const {
5188   return isImm() && isUInt<20>(getImm());
5189 }
5190 
5191 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5192   // 32-bit literals are only supported on CI and we only want to use them
5193   // when the offset is > 8-bits.
5194   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5195 }
5196 
5197 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5198   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5199 }
5200 
5201 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5202   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5203 }
5204 
5205 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5206   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5207 }
5208 
5209 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
5210   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5211 }
5212 
5213 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
5214   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5215 }
5216 
5217 //===----------------------------------------------------------------------===//
5218 // vop3
5219 //===----------------------------------------------------------------------===//
5220 
5221 static bool ConvertOmodMul(int64_t &Mul) {
5222   if (Mul != 1 && Mul != 2 && Mul != 4)
5223     return false;
5224 
5225   Mul >>= 1;
5226   return true;
5227 }
5228 
5229 static bool ConvertOmodDiv(int64_t &Div) {
5230   if (Div == 1) {
5231     Div = 0;
5232     return true;
5233   }
5234 
5235   if (Div == 2) {
5236     Div = 3;
5237     return true;
5238   }
5239 
5240   return false;
5241 }
5242 
5243 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5244   if (BoundCtrl == 0) {
5245     BoundCtrl = 1;
5246     return true;
5247   }
5248 
5249   if (BoundCtrl == -1) {
5250     BoundCtrl = 0;
5251     return true;
5252   }
5253 
5254   return false;
5255 }
5256 
5257 // Note: the order in this table matches the order of operands in AsmString.
5258 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5259   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5260   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5261   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5262   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5263   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5264   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5265   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5266   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5267   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5268   {"dfmt",    AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5269   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5270   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5271   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5272   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5273   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5274   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5275   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5276   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5277   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5278   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5279   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5280   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5281   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5282   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5283   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5284   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5285   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5286   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5287   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5288   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5289   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5290   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5291   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5292   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5293   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5294   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5295   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
5296 };
5297 
5298 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5299   unsigned size = Operands.size();
5300   assert(size > 0);
5301 
5302   OperandMatchResultTy res = parseOptionalOpr(Operands);
5303 
5304   // This is a hack to enable hardcoded mandatory operands which follow
5305   // optional operands.
5306   //
5307   // Current design assumes that all operands after the first optional operand
5308   // are also optional. However implementation of some instructions violates
5309   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5310   //
5311   // To alleviate this problem, we have to (implicitly) parse extra operands
5312   // to make sure autogenerated parser of custom operands never hit hardcoded
5313   // mandatory operands.
5314 
5315   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5316 
5317     // We have parsed the first optional operand.
5318     // Parse as many operands as necessary to skip all mandatory operands.
5319 
5320     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5321       if (res != MatchOperand_Success ||
5322           getLexer().is(AsmToken::EndOfStatement)) break;
5323       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5324       res = parseOptionalOpr(Operands);
5325     }
5326   }
5327 
5328   return res;
5329 }
5330 
5331 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
5332   OperandMatchResultTy res;
5333   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
5334     // try to parse any optional operand here
5335     if (Op.IsBit) {
5336       res = parseNamedBit(Op.Name, Operands, Op.Type);
5337     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
5338       res = parseOModOperand(Operands);
5339     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
5340                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
5341                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
5342       res = parseSDWASel(Operands, Op.Name, Op.Type);
5343     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
5344       res = parseSDWADstUnused(Operands);
5345     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
5346                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
5347                Op.Type == AMDGPUOperand::ImmTyNegLo ||
5348                Op.Type == AMDGPUOperand::ImmTyNegHi) {
5349       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
5350                                         Op.ConvertResult);
5351     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) {
5352       res = parseDfmtNfmt(Operands);
5353     } else {
5354       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
5355     }
5356     if (res != MatchOperand_NoMatch) {
5357       return res;
5358     }
5359   }
5360   return MatchOperand_NoMatch;
5361 }
5362 
5363 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
5364   StringRef Name = Parser.getTok().getString();
5365   if (Name == "mul") {
5366     return parseIntWithPrefix("mul", Operands,
5367                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
5368   }
5369 
5370   if (Name == "div") {
5371     return parseIntWithPrefix("div", Operands,
5372                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
5373   }
5374 
5375   return MatchOperand_NoMatch;
5376 }
5377 
5378 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
5379   cvtVOP3P(Inst, Operands);
5380 
5381   int Opc = Inst.getOpcode();
5382 
5383   int SrcNum;
5384   const int Ops[] = { AMDGPU::OpName::src0,
5385                       AMDGPU::OpName::src1,
5386                       AMDGPU::OpName::src2 };
5387   for (SrcNum = 0;
5388        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
5389        ++SrcNum);
5390   assert(SrcNum > 0);
5391 
5392   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5393   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5394 
5395   if ((OpSel & (1 << SrcNum)) != 0) {
5396     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
5397     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
5398     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
5399   }
5400 }
5401 
5402 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
5403       // 1. This operand is input modifiers
5404   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
5405       // 2. This is not last operand
5406       && Desc.NumOperands > (OpNum + 1)
5407       // 3. Next operand is register class
5408       && Desc.OpInfo[OpNum + 1].RegClass != -1
5409       // 4. Next register is not tied to any other operand
5410       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
5411 }
5412 
5413 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
5414 {
5415   OptionalImmIndexMap OptionalIdx;
5416   unsigned Opc = Inst.getOpcode();
5417 
5418   unsigned I = 1;
5419   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5420   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5421     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5422   }
5423 
5424   for (unsigned E = Operands.size(); I != E; ++I) {
5425     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5426     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5427       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5428     } else if (Op.isInterpSlot() ||
5429                Op.isInterpAttr() ||
5430                Op.isAttrChan()) {
5431       Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
5432     } else if (Op.isImmModifier()) {
5433       OptionalIdx[Op.getImmTy()] = I;
5434     } else {
5435       llvm_unreachable("unhandled operand type");
5436     }
5437   }
5438 
5439   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
5440     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
5441   }
5442 
5443   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5444     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5445   }
5446 
5447   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5448     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5449   }
5450 }
5451 
5452 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
5453                               OptionalImmIndexMap &OptionalIdx) {
5454   unsigned Opc = Inst.getOpcode();
5455 
5456   unsigned I = 1;
5457   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5458   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5459     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5460   }
5461 
5462   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
5463     // This instruction has src modifiers
5464     for (unsigned E = Operands.size(); I != E; ++I) {
5465       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5466       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5467         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5468       } else if (Op.isImmModifier()) {
5469         OptionalIdx[Op.getImmTy()] = I;
5470       } else if (Op.isRegOrImm()) {
5471         Op.addRegOrImmOperands(Inst, 1);
5472       } else {
5473         llvm_unreachable("unhandled operand type");
5474       }
5475     }
5476   } else {
5477     // No src modifiers
5478     for (unsigned E = Operands.size(); I != E; ++I) {
5479       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5480       if (Op.isMod()) {
5481         OptionalIdx[Op.getImmTy()] = I;
5482       } else {
5483         Op.addRegOrImmOperands(Inst, 1);
5484       }
5485     }
5486   }
5487 
5488   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5489     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5490   }
5491 
5492   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5493     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5494   }
5495 
5496   // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906):
5497   // it has src2 register operand that is tied to dst operand
5498   // we don't allow modifiers for this operand in assembler so src2_modifiers
5499   // should be 0.
5500   if (Opc == AMDGPU::V_MAC_F32_e64_si ||
5501       Opc == AMDGPU::V_MAC_F32_e64_vi ||
5502       Opc == AMDGPU::V_MAC_F16_e64_vi ||
5503       Opc == AMDGPU::V_FMAC_F32_e64_vi) {
5504     auto it = Inst.begin();
5505     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5506     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5507     ++it;
5508     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5509   }
5510 }
5511 
5512 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5513   OptionalImmIndexMap OptionalIdx;
5514   cvtVOP3(Inst, Operands, OptionalIdx);
5515 }
5516 
5517 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5518                                const OperandVector &Operands) {
5519   OptionalImmIndexMap OptIdx;
5520   const int Opc = Inst.getOpcode();
5521   const MCInstrDesc &Desc = MII.get(Opc);
5522 
5523   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5524 
5525   cvtVOP3(Inst, Operands, OptIdx);
5526 
5527   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5528     assert(!IsPacked);
5529     Inst.addOperand(Inst.getOperand(0));
5530   }
5531 
5532   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5533   // instruction, and then figure out where to actually put the modifiers
5534 
5535   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
5536 
5537   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5538   if (OpSelHiIdx != -1) {
5539     int DefaultVal = IsPacked ? -1 : 0;
5540     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
5541                           DefaultVal);
5542   }
5543 
5544   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
5545   if (NegLoIdx != -1) {
5546     assert(IsPacked);
5547     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
5548     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
5549   }
5550 
5551   const int Ops[] = { AMDGPU::OpName::src0,
5552                       AMDGPU::OpName::src1,
5553                       AMDGPU::OpName::src2 };
5554   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
5555                          AMDGPU::OpName::src1_modifiers,
5556                          AMDGPU::OpName::src2_modifiers };
5557 
5558   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5559 
5560   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5561   unsigned OpSelHi = 0;
5562   unsigned NegLo = 0;
5563   unsigned NegHi = 0;
5564 
5565   if (OpSelHiIdx != -1) {
5566     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5567   }
5568 
5569   if (NegLoIdx != -1) {
5570     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
5571     NegLo = Inst.getOperand(NegLoIdx).getImm();
5572     NegHi = Inst.getOperand(NegHiIdx).getImm();
5573   }
5574 
5575   for (int J = 0; J < 3; ++J) {
5576     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
5577     if (OpIdx == -1)
5578       break;
5579 
5580     uint32_t ModVal = 0;
5581 
5582     if ((OpSel & (1 << J)) != 0)
5583       ModVal |= SISrcMods::OP_SEL_0;
5584 
5585     if ((OpSelHi & (1 << J)) != 0)
5586       ModVal |= SISrcMods::OP_SEL_1;
5587 
5588     if ((NegLo & (1 << J)) != 0)
5589       ModVal |= SISrcMods::NEG;
5590 
5591     if ((NegHi & (1 << J)) != 0)
5592       ModVal |= SISrcMods::NEG_HI;
5593 
5594     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
5595 
5596     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
5597   }
5598 }
5599 
5600 //===----------------------------------------------------------------------===//
5601 // dpp
5602 //===----------------------------------------------------------------------===//
5603 
5604 bool AMDGPUOperand::isDPPCtrl() const {
5605   using namespace AMDGPU::DPP;
5606 
5607   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
5608   if (result) {
5609     int64_t Imm = getImm();
5610     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
5611            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
5612            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
5613            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
5614            (Imm == DppCtrl::WAVE_SHL1) ||
5615            (Imm == DppCtrl::WAVE_ROL1) ||
5616            (Imm == DppCtrl::WAVE_SHR1) ||
5617            (Imm == DppCtrl::WAVE_ROR1) ||
5618            (Imm == DppCtrl::ROW_MIRROR) ||
5619            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
5620            (Imm == DppCtrl::BCAST15) ||
5621            (Imm == DppCtrl::BCAST31);
5622   }
5623   return false;
5624 }
5625 
5626 bool AMDGPUOperand::isS16Imm() const {
5627   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
5628 }
5629 
5630 bool AMDGPUOperand::isU16Imm() const {
5631   return isImm() && isUInt<16>(getImm());
5632 }
5633 
5634 OperandMatchResultTy
5635 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
5636   using namespace AMDGPU::DPP;
5637 
5638   SMLoc S = Parser.getTok().getLoc();
5639   StringRef Prefix;
5640   int64_t Int;
5641 
5642   if (getLexer().getKind() == AsmToken::Identifier) {
5643     Prefix = Parser.getTok().getString();
5644   } else {
5645     return MatchOperand_NoMatch;
5646   }
5647 
5648   if (Prefix == "row_mirror") {
5649     Int = DppCtrl::ROW_MIRROR;
5650     Parser.Lex();
5651   } else if (Prefix == "row_half_mirror") {
5652     Int = DppCtrl::ROW_HALF_MIRROR;
5653     Parser.Lex();
5654   } else {
5655     // Check to prevent parseDPPCtrlOps from eating invalid tokens
5656     if (Prefix != "quad_perm"
5657         && Prefix != "row_shl"
5658         && Prefix != "row_shr"
5659         && Prefix != "row_ror"
5660         && Prefix != "wave_shl"
5661         && Prefix != "wave_rol"
5662         && Prefix != "wave_shr"
5663         && Prefix != "wave_ror"
5664         && Prefix != "row_bcast") {
5665       return MatchOperand_NoMatch;
5666     }
5667 
5668     Parser.Lex();
5669     if (getLexer().isNot(AsmToken::Colon))
5670       return MatchOperand_ParseFail;
5671 
5672     if (Prefix == "quad_perm") {
5673       // quad_perm:[%d,%d,%d,%d]
5674       Parser.Lex();
5675       if (getLexer().isNot(AsmToken::LBrac))
5676         return MatchOperand_ParseFail;
5677       Parser.Lex();
5678 
5679       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
5680         return MatchOperand_ParseFail;
5681 
5682       for (int i = 0; i < 3; ++i) {
5683         if (getLexer().isNot(AsmToken::Comma))
5684           return MatchOperand_ParseFail;
5685         Parser.Lex();
5686 
5687         int64_t Temp;
5688         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
5689           return MatchOperand_ParseFail;
5690         const int shift = i*2 + 2;
5691         Int += (Temp << shift);
5692       }
5693 
5694       if (getLexer().isNot(AsmToken::RBrac))
5695         return MatchOperand_ParseFail;
5696       Parser.Lex();
5697     } else {
5698       // sel:%d
5699       Parser.Lex();
5700       if (getParser().parseAbsoluteExpression(Int))
5701         return MatchOperand_ParseFail;
5702 
5703       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
5704         Int |= DppCtrl::ROW_SHL0;
5705       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
5706         Int |= DppCtrl::ROW_SHR0;
5707       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
5708         Int |= DppCtrl::ROW_ROR0;
5709       } else if (Prefix == "wave_shl" && 1 == Int) {
5710         Int = DppCtrl::WAVE_SHL1;
5711       } else if (Prefix == "wave_rol" && 1 == Int) {
5712         Int = DppCtrl::WAVE_ROL1;
5713       } else if (Prefix == "wave_shr" && 1 == Int) {
5714         Int = DppCtrl::WAVE_SHR1;
5715       } else if (Prefix == "wave_ror" && 1 == Int) {
5716         Int = DppCtrl::WAVE_ROR1;
5717       } else if (Prefix == "row_bcast") {
5718         if (Int == 15) {
5719           Int = DppCtrl::BCAST15;
5720         } else if (Int == 31) {
5721           Int = DppCtrl::BCAST31;
5722         } else {
5723           return MatchOperand_ParseFail;
5724         }
5725       } else {
5726         return MatchOperand_ParseFail;
5727       }
5728     }
5729   }
5730 
5731   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
5732   return MatchOperand_Success;
5733 }
5734 
5735 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
5736   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
5737 }
5738 
5739 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
5740   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
5741 }
5742 
5743 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
5744   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
5745 }
5746 
5747 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
5748   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
5749 }
5750 
5751 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
5752   OptionalImmIndexMap OptionalIdx;
5753 
5754   unsigned I = 1;
5755   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5756   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5757     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5758   }
5759 
5760   for (unsigned E = Operands.size(); I != E; ++I) {
5761     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
5762                                             MCOI::TIED_TO);
5763     if (TiedTo != -1) {
5764       assert((unsigned)TiedTo < Inst.getNumOperands());
5765       // handle tied old or src2 for MAC instructions
5766       Inst.addOperand(Inst.getOperand(TiedTo));
5767     }
5768     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5769     // Add the register arguments
5770     if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5771       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
5772       // Skip it.
5773       continue;
5774     } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5775       Op.addRegWithFPInputModsOperands(Inst, 2);
5776     } else if (Op.isDPPCtrl()) {
5777       Op.addImmOperands(Inst, 1);
5778     } else if (Op.isImm()) {
5779       // Handle optional arguments
5780       OptionalIdx[Op.getImmTy()] = I;
5781     } else {
5782       llvm_unreachable("Invalid operand type");
5783     }
5784   }
5785 
5786   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
5787   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
5788   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
5789 }
5790 
5791 //===----------------------------------------------------------------------===//
5792 // sdwa
5793 //===----------------------------------------------------------------------===//
5794 
5795 OperandMatchResultTy
5796 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
5797                               AMDGPUOperand::ImmTy Type) {
5798   using namespace llvm::AMDGPU::SDWA;
5799 
5800   SMLoc S = Parser.getTok().getLoc();
5801   StringRef Value;
5802   OperandMatchResultTy res;
5803 
5804   res = parseStringWithPrefix(Prefix, Value);
5805   if (res != MatchOperand_Success) {
5806     return res;
5807   }
5808 
5809   int64_t Int;
5810   Int = StringSwitch<int64_t>(Value)
5811         .Case("BYTE_0", SdwaSel::BYTE_0)
5812         .Case("BYTE_1", SdwaSel::BYTE_1)
5813         .Case("BYTE_2", SdwaSel::BYTE_2)
5814         .Case("BYTE_3", SdwaSel::BYTE_3)
5815         .Case("WORD_0", SdwaSel::WORD_0)
5816         .Case("WORD_1", SdwaSel::WORD_1)
5817         .Case("DWORD", SdwaSel::DWORD)
5818         .Default(0xffffffff);
5819   Parser.Lex(); // eat last token
5820 
5821   if (Int == 0xffffffff) {
5822     return MatchOperand_ParseFail;
5823   }
5824 
5825   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
5826   return MatchOperand_Success;
5827 }
5828 
5829 OperandMatchResultTy
5830 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
5831   using namespace llvm::AMDGPU::SDWA;
5832 
5833   SMLoc S = Parser.getTok().getLoc();
5834   StringRef Value;
5835   OperandMatchResultTy res;
5836 
5837   res = parseStringWithPrefix("dst_unused", Value);
5838   if (res != MatchOperand_Success) {
5839     return res;
5840   }
5841 
5842   int64_t Int;
5843   Int = StringSwitch<int64_t>(Value)
5844         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
5845         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
5846         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
5847         .Default(0xffffffff);
5848   Parser.Lex(); // eat last token
5849 
5850   if (Int == 0xffffffff) {
5851     return MatchOperand_ParseFail;
5852   }
5853 
5854   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
5855   return MatchOperand_Success;
5856 }
5857 
5858 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
5859   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
5860 }
5861 
5862 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
5863   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
5864 }
5865 
5866 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
5867   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
5868 }
5869 
5870 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
5871   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
5872 }
5873 
5874 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
5875                               uint64_t BasicInstType, bool skipVcc) {
5876   using namespace llvm::AMDGPU::SDWA;
5877 
5878   OptionalImmIndexMap OptionalIdx;
5879   bool skippedVcc = false;
5880 
5881   unsigned I = 1;
5882   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5883   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5884     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5885   }
5886 
5887   for (unsigned E = Operands.size(); I != E; ++I) {
5888     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5889     if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5890       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
5891       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
5892       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
5893       // Skip VCC only if we didn't skip it on previous iteration.
5894       if (BasicInstType == SIInstrFlags::VOP2 &&
5895           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
5896         skippedVcc = true;
5897         continue;
5898       } else if (BasicInstType == SIInstrFlags::VOPC &&
5899                  Inst.getNumOperands() == 0) {
5900         skippedVcc = true;
5901         continue;
5902       }
5903     }
5904     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5905       Op.addRegOrImmWithInputModsOperands(Inst, 2);
5906     } else if (Op.isImm()) {
5907       // Handle optional arguments
5908       OptionalIdx[Op.getImmTy()] = I;
5909     } else {
5910       llvm_unreachable("Invalid operand type");
5911     }
5912     skippedVcc = false;
5913   }
5914 
5915   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
5916       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
5917     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
5918     switch (BasicInstType) {
5919     case SIInstrFlags::VOP1:
5920       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5921       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5922         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5923       }
5924       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5925       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5926       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5927       break;
5928 
5929     case SIInstrFlags::VOP2:
5930       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5931       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5932         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5933       }
5934       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5935       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5936       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5937       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5938       break;
5939 
5940     case SIInstrFlags::VOPC:
5941       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5942       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5943       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5944       break;
5945 
5946     default:
5947       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
5948     }
5949   }
5950 
5951   // special case v_mac_{f16, f32}:
5952   // it has src2 register operand that is tied to dst operand
5953   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
5954       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
5955     auto it = Inst.begin();
5956     std::advance(
5957       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
5958     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5959   }
5960 }
5961 
5962 /// Force static initialization.
5963 extern "C" void LLVMInitializeAMDGPUAsmParser() {
5964   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
5965   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
5966 }
5967 
5968 #define GET_REGISTER_MATCHER
5969 #define GET_MATCHER_IMPLEMENTATION
5970 #define GET_MNEMONIC_SPELL_CHECKER
5971 #include "AMDGPUGenAsmMatcher.inc"
5972 
5973 // This fuction should be defined after auto-generated include so that we have
5974 // MatchClassKind enum defined
5975 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
5976                                                      unsigned Kind) {
5977   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
5978   // But MatchInstructionImpl() expects to meet token and fails to validate
5979   // operand. This method checks if we are given immediate operand but expect to
5980   // get corresponding token.
5981   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
5982   switch (Kind) {
5983   case MCK_addr64:
5984     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
5985   case MCK_gds:
5986     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
5987   case MCK_lds:
5988     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
5989   case MCK_glc:
5990     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
5991   case MCK_idxen:
5992     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
5993   case MCK_offen:
5994     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
5995   case MCK_SSrcB32:
5996     // When operands have expression values, they will return true for isToken,
5997     // because it is not possible to distinguish between a token and an
5998     // expression at parse time. MatchInstructionImpl() will always try to
5999     // match an operand as a token, when isToken returns true, and when the
6000     // name of the expression is not a valid token, the match will fail,
6001     // so we need to handle it here.
6002     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6003   case MCK_SSrcF32:
6004     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6005   case MCK_SoppBrTarget:
6006     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6007   case MCK_VReg32OrOff:
6008     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6009   case MCK_InterpSlot:
6010     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6011   case MCK_Attr:
6012     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6013   case MCK_AttrChan:
6014     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6015   default:
6016     return Match_InvalidOperand;
6017   }
6018 }
6019 
6020 //===----------------------------------------------------------------------===//
6021 // endpgm
6022 //===----------------------------------------------------------------------===//
6023 
6024 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6025   SMLoc S = Parser.getTok().getLoc();
6026   int64_t Imm = 0;
6027 
6028   if (!parseExpr(Imm)) {
6029     // The operand is optional, if not present default to 0
6030     Imm = 0;
6031   }
6032 
6033   if (!isUInt<16>(Imm)) {
6034     Error(S, "expected a 16-bit value");
6035     return MatchOperand_ParseFail;
6036   }
6037 
6038   Operands.push_back(
6039       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6040   return MatchOperand_Success;
6041 }
6042 
6043 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
6044