1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/MC/MCInst.h"
32 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/MC/MCInstrInfo.h"
34 #include "llvm/MC/MCParser/MCAsmLexer.h"
35 #include "llvm/MC/MCParser/MCAsmParser.h"
36 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
38 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
39 #include "llvm/MC/MCRegisterInfo.h"
40 #include "llvm/MC/MCStreamer.h"
41 #include "llvm/MC/MCSubtargetInfo.h"
42 #include "llvm/MC/MCSymbol.h"
43 #include "llvm/Support/AMDGPUMetadata.h"
44 #include "llvm/Support/AMDHSAKernelDescriptor.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/MachineValueType.h"
49 #include "llvm/Support/MathExtras.h"
50 #include "llvm/Support/SMLoc.h"
51 #include "llvm/Support/TargetParser.h"
52 #include "llvm/Support/TargetRegistry.h"
53 #include "llvm/Support/raw_ostream.h"
54 #include <algorithm>
55 #include <cassert>
56 #include <cstdint>
57 #include <cstring>
58 #include <iterator>
59 #include <map>
60 #include <memory>
61 #include <string>
62 
63 using namespace llvm;
64 using namespace llvm::AMDGPU;
65 using namespace llvm::amdhsa;
66 
67 namespace {
68 
69 class AMDGPUAsmParser;
70 
71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
72 
73 //===----------------------------------------------------------------------===//
74 // Operand
75 //===----------------------------------------------------------------------===//
76 
77 class AMDGPUOperand : public MCParsedAsmOperand {
78   enum KindTy {
79     Token,
80     Immediate,
81     Register,
82     Expression
83   } Kind;
84 
85   SMLoc StartLoc, EndLoc;
86   const AMDGPUAsmParser *AsmParser;
87 
88 public:
89   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
90     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
91 
92   using Ptr = std::unique_ptr<AMDGPUOperand>;
93 
94   struct Modifiers {
95     bool Abs = false;
96     bool Neg = false;
97     bool Sext = false;
98 
99     bool hasFPModifiers() const { return Abs || Neg; }
100     bool hasIntModifiers() const { return Sext; }
101     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
102 
103     int64_t getFPModifiersOperand() const {
104       int64_t Operand = 0;
105       Operand |= Abs ? SISrcMods::ABS : 0u;
106       Operand |= Neg ? SISrcMods::NEG : 0u;
107       return Operand;
108     }
109 
110     int64_t getIntModifiersOperand() const {
111       int64_t Operand = 0;
112       Operand |= Sext ? SISrcMods::SEXT : 0u;
113       return Operand;
114     }
115 
116     int64_t getModifiersOperand() const {
117       assert(!(hasFPModifiers() && hasIntModifiers())
118            && "fp and int modifiers should not be used simultaneously");
119       if (hasFPModifiers()) {
120         return getFPModifiersOperand();
121       } else if (hasIntModifiers()) {
122         return getIntModifiersOperand();
123       } else {
124         return 0;
125       }
126     }
127 
128     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
129   };
130 
131   enum ImmTy {
132     ImmTyNone,
133     ImmTyGDS,
134     ImmTyLDS,
135     ImmTyOffen,
136     ImmTyIdxen,
137     ImmTyAddr64,
138     ImmTyOffset,
139     ImmTyInstOffset,
140     ImmTyOffset0,
141     ImmTyOffset1,
142     ImmTyGLC,
143     ImmTySLC,
144     ImmTyTFE,
145     ImmTyD16,
146     ImmTyClampSI,
147     ImmTyOModSI,
148     ImmTyDppCtrl,
149     ImmTyDppRowMask,
150     ImmTyDppBankMask,
151     ImmTyDppBoundCtrl,
152     ImmTySdwaDstSel,
153     ImmTySdwaSrc0Sel,
154     ImmTySdwaSrc1Sel,
155     ImmTySdwaDstUnused,
156     ImmTyDMask,
157     ImmTyUNorm,
158     ImmTyDA,
159     ImmTyR128A16,
160     ImmTyLWE,
161     ImmTyExpTgt,
162     ImmTyExpCompr,
163     ImmTyExpVM,
164     ImmTyFORMAT,
165     ImmTyHwreg,
166     ImmTyOff,
167     ImmTySendMsg,
168     ImmTyInterpSlot,
169     ImmTyInterpAttr,
170     ImmTyAttrChan,
171     ImmTyOpSel,
172     ImmTyOpSelHi,
173     ImmTyNegLo,
174     ImmTyNegHi,
175     ImmTySwizzle,
176     ImmTyGprIdxMode,
177     ImmTyEndpgm,
178     ImmTyHigh
179   };
180 
181   struct TokOp {
182     const char *Data;
183     unsigned Length;
184   };
185 
186   struct ImmOp {
187     int64_t Val;
188     ImmTy Type;
189     bool IsFPImm;
190     Modifiers Mods;
191   };
192 
193   struct RegOp {
194     unsigned RegNo;
195     bool IsForcedVOP3;
196     Modifiers Mods;
197   };
198 
199   union {
200     TokOp Tok;
201     ImmOp Imm;
202     RegOp Reg;
203     const MCExpr *Expr;
204   };
205 
206   bool isToken() const override {
207     if (Kind == Token)
208       return true;
209 
210     if (Kind != Expression || !Expr)
211       return false;
212 
213     // When parsing operands, we can't always tell if something was meant to be
214     // a token, like 'gds', or an expression that references a global variable.
215     // In this case, we assume the string is an expression, and if we need to
216     // interpret is a token, then we treat the symbol name as the token.
217     return isa<MCSymbolRefExpr>(Expr);
218   }
219 
220   bool isImm() const override {
221     return Kind == Immediate;
222   }
223 
224   bool isInlinableImm(MVT type) const;
225   bool isLiteralImm(MVT type) const;
226 
227   bool isRegKind() const {
228     return Kind == Register;
229   }
230 
231   bool isReg() const override {
232     return isRegKind() && !hasModifiers();
233   }
234 
235   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
236     return isRegClass(RCID) || isInlinableImm(type);
237   }
238 
239   bool isRegOrImmWithInt16InputMods() const {
240     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
241   }
242 
243   bool isRegOrImmWithInt32InputMods() const {
244     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
245   }
246 
247   bool isRegOrImmWithInt64InputMods() const {
248     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
249   }
250 
251   bool isRegOrImmWithFP16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
253   }
254 
255   bool isRegOrImmWithFP32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
257   }
258 
259   bool isRegOrImmWithFP64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
261   }
262 
263   bool isVReg() const {
264     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
265            isRegClass(AMDGPU::VReg_64RegClassID) ||
266            isRegClass(AMDGPU::VReg_96RegClassID) ||
267            isRegClass(AMDGPU::VReg_128RegClassID) ||
268            isRegClass(AMDGPU::VReg_256RegClassID) ||
269            isRegClass(AMDGPU::VReg_512RegClassID);
270   }
271 
272   bool isVReg32() const {
273     return isRegClass(AMDGPU::VGPR_32RegClassID);
274   }
275 
276   bool isVReg32OrOff() const {
277     return isOff() || isVReg32();
278   }
279 
280   bool isSDWAOperand(MVT type) const;
281   bool isSDWAFP16Operand() const;
282   bool isSDWAFP32Operand() const;
283   bool isSDWAInt16Operand() const;
284   bool isSDWAInt32Operand() const;
285 
286   bool isImmTy(ImmTy ImmT) const {
287     return isImm() && Imm.Type == ImmT;
288   }
289 
290   bool isImmModifier() const {
291     return isImm() && Imm.Type != ImmTyNone;
292   }
293 
294   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
295   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
296   bool isDMask() const { return isImmTy(ImmTyDMask); }
297   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
298   bool isDA() const { return isImmTy(ImmTyDA); }
299   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
300   bool isLWE() const { return isImmTy(ImmTyLWE); }
301   bool isOff() const { return isImmTy(ImmTyOff); }
302   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
303   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
304   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
305   bool isOffen() const { return isImmTy(ImmTyOffen); }
306   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
307   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
308   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
309   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
310   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
311 
312   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
313   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
314   bool isGDS() const { return isImmTy(ImmTyGDS); }
315   bool isLDS() const { return isImmTy(ImmTyLDS); }
316   bool isGLC() const { return isImmTy(ImmTyGLC); }
317   bool isSLC() const { return isImmTy(ImmTySLC); }
318   bool isTFE() const { return isImmTy(ImmTyTFE); }
319   bool isD16() const { return isImmTy(ImmTyD16); }
320   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
321   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
322   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
323   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
324   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
325   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
326   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
327   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
328   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
329   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
330   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
331   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
332   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
333   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
334   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
335   bool isHigh() const { return isImmTy(ImmTyHigh); }
336 
337   bool isMod() const {
338     return isClampSI() || isOModSI();
339   }
340 
341   bool isRegOrImm() const {
342     return isReg() || isImm();
343   }
344 
345   bool isRegClass(unsigned RCID) const;
346 
347   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
348     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
349   }
350 
351   bool isSCSrcB16() const {
352     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
353   }
354 
355   bool isSCSrcV2B16() const {
356     return isSCSrcB16();
357   }
358 
359   bool isSCSrcB32() const {
360     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
361   }
362 
363   bool isSCSrcB64() const {
364     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
365   }
366 
367   bool isSCSrcF16() const {
368     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
369   }
370 
371   bool isSCSrcV2F16() const {
372     return isSCSrcF16();
373   }
374 
375   bool isSCSrcF32() const {
376     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
377   }
378 
379   bool isSCSrcF64() const {
380     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
381   }
382 
383   bool isSSrcB32() const {
384     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
385   }
386 
387   bool isSSrcB16() const {
388     return isSCSrcB16() || isLiteralImm(MVT::i16);
389   }
390 
391   bool isSSrcV2B16() const {
392     llvm_unreachable("cannot happen");
393     return isSSrcB16();
394   }
395 
396   bool isSSrcB64() const {
397     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
398     // See isVSrc64().
399     return isSCSrcB64() || isLiteralImm(MVT::i64);
400   }
401 
402   bool isSSrcF32() const {
403     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
404   }
405 
406   bool isSSrcF64() const {
407     return isSCSrcB64() || isLiteralImm(MVT::f64);
408   }
409 
410   bool isSSrcF16() const {
411     return isSCSrcB16() || isLiteralImm(MVT::f16);
412   }
413 
414   bool isSSrcV2F16() const {
415     llvm_unreachable("cannot happen");
416     return isSSrcF16();
417   }
418 
419   bool isSSrcOrLdsB32() const {
420     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
421            isLiteralImm(MVT::i32) || isExpr();
422   }
423 
424   bool isVCSrcB32() const {
425     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
426   }
427 
428   bool isVCSrcB64() const {
429     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
430   }
431 
432   bool isVCSrcB16() const {
433     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
434   }
435 
436   bool isVCSrcV2B16() const {
437     return isVCSrcB16();
438   }
439 
440   bool isVCSrcF32() const {
441     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
442   }
443 
444   bool isVCSrcF64() const {
445     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
446   }
447 
448   bool isVCSrcF16() const {
449     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
450   }
451 
452   bool isVCSrcV2F16() const {
453     return isVCSrcF16();
454   }
455 
456   bool isVSrcB32() const {
457     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
458   }
459 
460   bool isVSrcB64() const {
461     return isVCSrcF64() || isLiteralImm(MVT::i64);
462   }
463 
464   bool isVSrcB16() const {
465     return isVCSrcF16() || isLiteralImm(MVT::i16);
466   }
467 
468   bool isVSrcV2B16() const {
469     llvm_unreachable("cannot happen");
470     return isVSrcB16();
471   }
472 
473   bool isVSrcF32() const {
474     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
475   }
476 
477   bool isVSrcF64() const {
478     return isVCSrcF64() || isLiteralImm(MVT::f64);
479   }
480 
481   bool isVSrcF16() const {
482     return isVCSrcF16() || isLiteralImm(MVT::f16);
483   }
484 
485   bool isVSrcV2F16() const {
486     llvm_unreachable("cannot happen");
487     return isVSrcF16();
488   }
489 
490   bool isKImmFP32() const {
491     return isLiteralImm(MVT::f32);
492   }
493 
494   bool isKImmFP16() const {
495     return isLiteralImm(MVT::f16);
496   }
497 
498   bool isMem() const override {
499     return false;
500   }
501 
502   bool isExpr() const {
503     return Kind == Expression;
504   }
505 
506   bool isSoppBrTarget() const {
507     return isExpr() || isImm();
508   }
509 
510   bool isSWaitCnt() const;
511   bool isHwreg() const;
512   bool isSendMsg() const;
513   bool isSwizzle() const;
514   bool isSMRDOffset8() const;
515   bool isSMRDOffset20() const;
516   bool isSMRDLiteralOffset() const;
517   bool isDPPCtrl() const;
518   bool isGPRIdxMode() const;
519   bool isS16Imm() const;
520   bool isU16Imm() const;
521   bool isEndpgm() const;
522 
523   StringRef getExpressionAsToken() const {
524     assert(isExpr());
525     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
526     return S->getSymbol().getName();
527   }
528 
529   StringRef getToken() const {
530     assert(isToken());
531 
532     if (Kind == Expression)
533       return getExpressionAsToken();
534 
535     return StringRef(Tok.Data, Tok.Length);
536   }
537 
538   int64_t getImm() const {
539     assert(isImm());
540     return Imm.Val;
541   }
542 
543   ImmTy getImmTy() const {
544     assert(isImm());
545     return Imm.Type;
546   }
547 
548   unsigned getReg() const override {
549     return Reg.RegNo;
550   }
551 
552   SMLoc getStartLoc() const override {
553     return StartLoc;
554   }
555 
556   SMLoc getEndLoc() const override {
557     return EndLoc;
558   }
559 
560   SMRange getLocRange() const {
561     return SMRange(StartLoc, EndLoc);
562   }
563 
564   Modifiers getModifiers() const {
565     assert(isRegKind() || isImmTy(ImmTyNone));
566     return isRegKind() ? Reg.Mods : Imm.Mods;
567   }
568 
569   void setModifiers(Modifiers Mods) {
570     assert(isRegKind() || isImmTy(ImmTyNone));
571     if (isRegKind())
572       Reg.Mods = Mods;
573     else
574       Imm.Mods = Mods;
575   }
576 
577   bool hasModifiers() const {
578     return getModifiers().hasModifiers();
579   }
580 
581   bool hasFPModifiers() const {
582     return getModifiers().hasFPModifiers();
583   }
584 
585   bool hasIntModifiers() const {
586     return getModifiers().hasIntModifiers();
587   }
588 
589   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
590 
591   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
592 
593   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
594 
595   template <unsigned Bitwidth>
596   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
597 
598   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
599     addKImmFPOperands<16>(Inst, N);
600   }
601 
602   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
603     addKImmFPOperands<32>(Inst, N);
604   }
605 
606   void addRegOperands(MCInst &Inst, unsigned N) const;
607 
608   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
609     if (isRegKind())
610       addRegOperands(Inst, N);
611     else if (isExpr())
612       Inst.addOperand(MCOperand::createExpr(Expr));
613     else
614       addImmOperands(Inst, N);
615   }
616 
617   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
618     Modifiers Mods = getModifiers();
619     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
620     if (isRegKind()) {
621       addRegOperands(Inst, N);
622     } else {
623       addImmOperands(Inst, N, false);
624     }
625   }
626 
627   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
628     assert(!hasIntModifiers());
629     addRegOrImmWithInputModsOperands(Inst, N);
630   }
631 
632   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
633     assert(!hasFPModifiers());
634     addRegOrImmWithInputModsOperands(Inst, N);
635   }
636 
637   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
638     Modifiers Mods = getModifiers();
639     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
640     assert(isRegKind());
641     addRegOperands(Inst, N);
642   }
643 
644   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
645     assert(!hasIntModifiers());
646     addRegWithInputModsOperands(Inst, N);
647   }
648 
649   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
650     assert(!hasFPModifiers());
651     addRegWithInputModsOperands(Inst, N);
652   }
653 
654   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
655     if (isImm())
656       addImmOperands(Inst, N);
657     else {
658       assert(isExpr());
659       Inst.addOperand(MCOperand::createExpr(Expr));
660     }
661   }
662 
663   static void printImmTy(raw_ostream& OS, ImmTy Type) {
664     switch (Type) {
665     case ImmTyNone: OS << "None"; break;
666     case ImmTyGDS: OS << "GDS"; break;
667     case ImmTyLDS: OS << "LDS"; break;
668     case ImmTyOffen: OS << "Offen"; break;
669     case ImmTyIdxen: OS << "Idxen"; break;
670     case ImmTyAddr64: OS << "Addr64"; break;
671     case ImmTyOffset: OS << "Offset"; break;
672     case ImmTyInstOffset: OS << "InstOffset"; break;
673     case ImmTyOffset0: OS << "Offset0"; break;
674     case ImmTyOffset1: OS << "Offset1"; break;
675     case ImmTyGLC: OS << "GLC"; break;
676     case ImmTySLC: OS << "SLC"; break;
677     case ImmTyTFE: OS << "TFE"; break;
678     case ImmTyD16: OS << "D16"; break;
679     case ImmTyFORMAT: OS << "FORMAT"; break;
680     case ImmTyClampSI: OS << "ClampSI"; break;
681     case ImmTyOModSI: OS << "OModSI"; break;
682     case ImmTyDppCtrl: OS << "DppCtrl"; break;
683     case ImmTyDppRowMask: OS << "DppRowMask"; break;
684     case ImmTyDppBankMask: OS << "DppBankMask"; break;
685     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
686     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
687     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
688     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
689     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
690     case ImmTyDMask: OS << "DMask"; break;
691     case ImmTyUNorm: OS << "UNorm"; break;
692     case ImmTyDA: OS << "DA"; break;
693     case ImmTyR128A16: OS << "R128A16"; break;
694     case ImmTyLWE: OS << "LWE"; break;
695     case ImmTyOff: OS << "Off"; break;
696     case ImmTyExpTgt: OS << "ExpTgt"; break;
697     case ImmTyExpCompr: OS << "ExpCompr"; break;
698     case ImmTyExpVM: OS << "ExpVM"; break;
699     case ImmTyHwreg: OS << "Hwreg"; break;
700     case ImmTySendMsg: OS << "SendMsg"; break;
701     case ImmTyInterpSlot: OS << "InterpSlot"; break;
702     case ImmTyInterpAttr: OS << "InterpAttr"; break;
703     case ImmTyAttrChan: OS << "AttrChan"; break;
704     case ImmTyOpSel: OS << "OpSel"; break;
705     case ImmTyOpSelHi: OS << "OpSelHi"; break;
706     case ImmTyNegLo: OS << "NegLo"; break;
707     case ImmTyNegHi: OS << "NegHi"; break;
708     case ImmTySwizzle: OS << "Swizzle"; break;
709     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
710     case ImmTyHigh: OS << "High"; break;
711     case ImmTyEndpgm:
712       OS << "Endpgm";
713       break;
714     }
715   }
716 
717   void print(raw_ostream &OS) const override {
718     switch (Kind) {
719     case Register:
720       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
721       break;
722     case Immediate:
723       OS << '<' << getImm();
724       if (getImmTy() != ImmTyNone) {
725         OS << " type: "; printImmTy(OS, getImmTy());
726       }
727       OS << " mods: " << Imm.Mods << '>';
728       break;
729     case Token:
730       OS << '\'' << getToken() << '\'';
731       break;
732     case Expression:
733       OS << "<expr " << *Expr << '>';
734       break;
735     }
736   }
737 
738   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
739                                       int64_t Val, SMLoc Loc,
740                                       ImmTy Type = ImmTyNone,
741                                       bool IsFPImm = false) {
742     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
743     Op->Imm.Val = Val;
744     Op->Imm.IsFPImm = IsFPImm;
745     Op->Imm.Type = Type;
746     Op->Imm.Mods = Modifiers();
747     Op->StartLoc = Loc;
748     Op->EndLoc = Loc;
749     return Op;
750   }
751 
752   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
753                                         StringRef Str, SMLoc Loc,
754                                         bool HasExplicitEncodingSize = true) {
755     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
756     Res->Tok.Data = Str.data();
757     Res->Tok.Length = Str.size();
758     Res->StartLoc = Loc;
759     Res->EndLoc = Loc;
760     return Res;
761   }
762 
763   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
764                                       unsigned RegNo, SMLoc S,
765                                       SMLoc E,
766                                       bool ForceVOP3) {
767     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
768     Op->Reg.RegNo = RegNo;
769     Op->Reg.Mods = Modifiers();
770     Op->Reg.IsForcedVOP3 = ForceVOP3;
771     Op->StartLoc = S;
772     Op->EndLoc = E;
773     return Op;
774   }
775 
776   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
777                                        const class MCExpr *Expr, SMLoc S) {
778     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
779     Op->Expr = Expr;
780     Op->StartLoc = S;
781     Op->EndLoc = S;
782     return Op;
783   }
784 };
785 
786 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
787   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
788   return OS;
789 }
790 
791 //===----------------------------------------------------------------------===//
792 // AsmParser
793 //===----------------------------------------------------------------------===//
794 
795 // Holds info related to the current kernel, e.g. count of SGPRs used.
796 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
797 // .amdgpu_hsa_kernel or at EOF.
798 class KernelScopeInfo {
799   int SgprIndexUnusedMin = -1;
800   int VgprIndexUnusedMin = -1;
801   MCContext *Ctx = nullptr;
802 
803   void usesSgprAt(int i) {
804     if (i >= SgprIndexUnusedMin) {
805       SgprIndexUnusedMin = ++i;
806       if (Ctx) {
807         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
808         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
809       }
810     }
811   }
812 
813   void usesVgprAt(int i) {
814     if (i >= VgprIndexUnusedMin) {
815       VgprIndexUnusedMin = ++i;
816       if (Ctx) {
817         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
818         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
819       }
820     }
821   }
822 
823 public:
824   KernelScopeInfo() = default;
825 
826   void initialize(MCContext &Context) {
827     Ctx = &Context;
828     usesSgprAt(SgprIndexUnusedMin = -1);
829     usesVgprAt(VgprIndexUnusedMin = -1);
830   }
831 
832   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
833     switch (RegKind) {
834       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
835       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
836       default: break;
837     }
838   }
839 };
840 
841 class AMDGPUAsmParser : public MCTargetAsmParser {
842   MCAsmParser &Parser;
843 
844   // Number of extra operands parsed after the first optional operand.
845   // This may be necessary to skip hardcoded mandatory operands.
846   static const unsigned MAX_OPR_LOOKAHEAD = 8;
847 
848   unsigned ForcedEncodingSize = 0;
849   bool ForcedDPP = false;
850   bool ForcedSDWA = false;
851   KernelScopeInfo KernelScope;
852 
853   /// @name Auto-generated Match Functions
854   /// {
855 
856 #define GET_ASSEMBLER_HEADER
857 #include "AMDGPUGenAsmMatcher.inc"
858 
859   /// }
860 
861 private:
862   bool ParseAsAbsoluteExpression(uint32_t &Ret);
863   bool OutOfRangeError(SMRange Range);
864   /// Calculate VGPR/SGPR blocks required for given target, reserved
865   /// registers, and user-specified NextFreeXGPR values.
866   ///
867   /// \param Features [in] Target features, used for bug corrections.
868   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
869   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
870   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
871   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
872   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
873   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
874   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
875   /// \param VGPRBlocks [out] Result VGPR block count.
876   /// \param SGPRBlocks [out] Result SGPR block count.
877   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
878                           bool FlatScrUsed, bool XNACKUsed,
879                           unsigned NextFreeVGPR, SMRange VGPRRange,
880                           unsigned NextFreeSGPR, SMRange SGPRRange,
881                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
882   bool ParseDirectiveAMDGCNTarget();
883   bool ParseDirectiveAMDHSAKernel();
884   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
885   bool ParseDirectiveHSACodeObjectVersion();
886   bool ParseDirectiveHSACodeObjectISA();
887   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
888   bool ParseDirectiveAMDKernelCodeT();
889   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
890   bool ParseDirectiveAMDGPUHsaKernel();
891 
892   bool ParseDirectiveISAVersion();
893   bool ParseDirectiveHSAMetadata();
894   bool ParseDirectivePALMetadata();
895 
896   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
897                              RegisterKind RegKind, unsigned Reg1,
898                              unsigned RegNum);
899   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
900                            unsigned& RegNum, unsigned& RegWidth,
901                            unsigned *DwordRegIndex);
902   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
903   void initializeGprCountSymbol(RegisterKind RegKind);
904   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
905                              unsigned RegWidth);
906   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
907                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
908   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
909                  bool IsGdsHardcoded);
910 
911 public:
912   enum AMDGPUMatchResultTy {
913     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
914   };
915 
916   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
917 
918   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
919                const MCInstrInfo &MII,
920                const MCTargetOptions &Options)
921       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
922     MCAsmParserExtension::Initialize(Parser);
923 
924     if (getFeatureBits().none()) {
925       // Set default features.
926       copySTI().ToggleFeature("SOUTHERN_ISLANDS");
927     }
928 
929     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
930 
931     {
932       // TODO: make those pre-defined variables read-only.
933       // Currently there is none suitable machinery in the core llvm-mc for this.
934       // MCSymbol::isRedefinable is intended for another purpose, and
935       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
936       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
937       MCContext &Ctx = getContext();
938       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
939         MCSymbol *Sym =
940             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
941         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
942         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
943         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
944         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
945         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
946       } else {
947         MCSymbol *Sym =
948             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
949         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
950         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
951         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
952         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
953         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
954       }
955       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
956         initializeGprCountSymbol(IS_VGPR);
957         initializeGprCountSymbol(IS_SGPR);
958       } else
959         KernelScope.initialize(getContext());
960     }
961   }
962 
963   bool hasXNACK() const {
964     return AMDGPU::hasXNACK(getSTI());
965   }
966 
967   bool hasMIMG_R128() const {
968     return AMDGPU::hasMIMG_R128(getSTI());
969   }
970 
971   bool hasPackedD16() const {
972     return AMDGPU::hasPackedD16(getSTI());
973   }
974 
975   bool isSI() const {
976     return AMDGPU::isSI(getSTI());
977   }
978 
979   bool isCI() const {
980     return AMDGPU::isCI(getSTI());
981   }
982 
983   bool isVI() const {
984     return AMDGPU::isVI(getSTI());
985   }
986 
987   bool isGFX9() const {
988     return AMDGPU::isGFX9(getSTI());
989   }
990 
991   bool hasInv2PiInlineImm() const {
992     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
993   }
994 
995   bool hasFlatOffsets() const {
996     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
997   }
998 
999   bool hasSGPR102_SGPR103() const {
1000     return !isVI();
1001   }
1002 
1003   bool hasIntClamp() const {
1004     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1005   }
1006 
1007   AMDGPUTargetStreamer &getTargetStreamer() {
1008     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1009     return static_cast<AMDGPUTargetStreamer &>(TS);
1010   }
1011 
1012   const MCRegisterInfo *getMRI() const {
1013     // We need this const_cast because for some reason getContext() is not const
1014     // in MCAsmParser.
1015     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1016   }
1017 
1018   const MCInstrInfo *getMII() const {
1019     return &MII;
1020   }
1021 
1022   const FeatureBitset &getFeatureBits() const {
1023     return getSTI().getFeatureBits();
1024   }
1025 
1026   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1027   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1028   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1029 
1030   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1031   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1032   bool isForcedDPP() const { return ForcedDPP; }
1033   bool isForcedSDWA() const { return ForcedSDWA; }
1034   ArrayRef<unsigned> getMatchedVariants() const;
1035 
1036   std::unique_ptr<AMDGPUOperand> parseRegister();
1037   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1038   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1039   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1040                                       unsigned Kind) override;
1041   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1042                                OperandVector &Operands, MCStreamer &Out,
1043                                uint64_t &ErrorInfo,
1044                                bool MatchingInlineAsm) override;
1045   bool ParseDirective(AsmToken DirectiveID) override;
1046   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
1047   StringRef parseMnemonicSuffix(StringRef Name);
1048   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1049                         SMLoc NameLoc, OperandVector &Operands) override;
1050   //bool ProcessInstruction(MCInst &Inst);
1051 
1052   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1053 
1054   OperandMatchResultTy
1055   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1056                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1057                      bool (*ConvertResult)(int64_t &) = nullptr);
1058 
1059   OperandMatchResultTy parseOperandArrayWithPrefix(
1060     const char *Prefix,
1061     OperandVector &Operands,
1062     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1063     bool (*ConvertResult)(int64_t&) = nullptr);
1064 
1065   OperandMatchResultTy
1066   parseNamedBit(const char *Name, OperandVector &Operands,
1067                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1068   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1069                                              StringRef &Value);
1070 
1071   bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
1072   OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
1073   OperandMatchResultTy parseReg(OperandVector &Operands);
1074   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
1075   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1076   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1077   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1078   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1079   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1080   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1081 
1082   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1083   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1084   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1085   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1086 
1087   bool parseCnt(int64_t &IntVal);
1088   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1089   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1090 
1091 private:
1092   struct OperandInfoTy {
1093     int64_t Id;
1094     bool IsSymbolic = false;
1095 
1096     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1097   };
1098 
1099   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1100   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1101 
1102   void errorExpTgt();
1103   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1104 
1105   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1106   bool validateSOPLiteral(const MCInst &Inst) const;
1107   bool validateConstantBusLimitations(const MCInst &Inst);
1108   bool validateEarlyClobberLimitations(const MCInst &Inst);
1109   bool validateIntClampSupported(const MCInst &Inst);
1110   bool validateMIMGAtomicDMask(const MCInst &Inst);
1111   bool validateMIMGGatherDMask(const MCInst &Inst);
1112   bool validateMIMGDataSize(const MCInst &Inst);
1113   bool validateMIMGD16(const MCInst &Inst);
1114   bool validateLdsDirect(const MCInst &Inst);
1115   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1116   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1117   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1118 
1119   bool trySkipId(const StringRef Id);
1120   bool trySkipToken(const AsmToken::TokenKind Kind);
1121   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1122   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1123   bool parseExpr(int64_t &Imm);
1124 
1125 public:
1126   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1127   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1128 
1129   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1130   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1131   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1132   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1133   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1134 
1135   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1136                             const unsigned MinVal,
1137                             const unsigned MaxVal,
1138                             const StringRef ErrMsg);
1139   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1140   bool parseSwizzleOffset(int64_t &Imm);
1141   bool parseSwizzleMacro(int64_t &Imm);
1142   bool parseSwizzleQuadPerm(int64_t &Imm);
1143   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1144   bool parseSwizzleBroadcast(int64_t &Imm);
1145   bool parseSwizzleSwap(int64_t &Imm);
1146   bool parseSwizzleReverse(int64_t &Imm);
1147 
1148   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1149   int64_t parseGPRIdxMacro();
1150 
1151   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1152   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1153   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1154   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1155   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1156 
1157   AMDGPUOperand::Ptr defaultGLC() const;
1158   AMDGPUOperand::Ptr defaultSLC() const;
1159 
1160   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1161   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1162   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1163   AMDGPUOperand::Ptr defaultOffsetU12() const;
1164   AMDGPUOperand::Ptr defaultOffsetS13() const;
1165 
1166   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1167 
1168   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1169                OptionalImmIndexMap &OptionalIdx);
1170   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1171   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1172   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1173 
1174   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1175 
1176   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1177                bool IsAtomic = false);
1178   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1179 
1180   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1181   AMDGPUOperand::Ptr defaultRowMask() const;
1182   AMDGPUOperand::Ptr defaultBankMask() const;
1183   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1184   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1185 
1186   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1187                                     AMDGPUOperand::ImmTy Type);
1188   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1189   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1190   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1191   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1192   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1193   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1194                 uint64_t BasicInstType, bool skipVcc = false);
1195 
1196   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1197   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1198 };
1199 
1200 struct OptionalOperand {
1201   const char *Name;
1202   AMDGPUOperand::ImmTy Type;
1203   bool IsBit;
1204   bool (*ConvertResult)(int64_t&);
1205 };
1206 
1207 } // end anonymous namespace
1208 
1209 // May be called with integer type with equivalent bitwidth.
1210 static const fltSemantics *getFltSemantics(unsigned Size) {
1211   switch (Size) {
1212   case 4:
1213     return &APFloat::IEEEsingle();
1214   case 8:
1215     return &APFloat::IEEEdouble();
1216   case 2:
1217     return &APFloat::IEEEhalf();
1218   default:
1219     llvm_unreachable("unsupported fp type");
1220   }
1221 }
1222 
1223 static const fltSemantics *getFltSemantics(MVT VT) {
1224   return getFltSemantics(VT.getSizeInBits() / 8);
1225 }
1226 
1227 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1228   switch (OperandType) {
1229   case AMDGPU::OPERAND_REG_IMM_INT32:
1230   case AMDGPU::OPERAND_REG_IMM_FP32:
1231   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1232   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1233     return &APFloat::IEEEsingle();
1234   case AMDGPU::OPERAND_REG_IMM_INT64:
1235   case AMDGPU::OPERAND_REG_IMM_FP64:
1236   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1237   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1238     return &APFloat::IEEEdouble();
1239   case AMDGPU::OPERAND_REG_IMM_INT16:
1240   case AMDGPU::OPERAND_REG_IMM_FP16:
1241   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1242   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1243   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1244   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1245     return &APFloat::IEEEhalf();
1246   default:
1247     llvm_unreachable("unsupported fp type");
1248   }
1249 }
1250 
1251 //===----------------------------------------------------------------------===//
1252 // Operand
1253 //===----------------------------------------------------------------------===//
1254 
1255 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1256   bool Lost;
1257 
1258   // Convert literal to single precision
1259   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1260                                                APFloat::rmNearestTiesToEven,
1261                                                &Lost);
1262   // We allow precision lost but not overflow or underflow
1263   if (Status != APFloat::opOK &&
1264       Lost &&
1265       ((Status & APFloat::opOverflow)  != 0 ||
1266        (Status & APFloat::opUnderflow) != 0)) {
1267     return false;
1268   }
1269 
1270   return true;
1271 }
1272 
1273 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1274   if (!isImmTy(ImmTyNone)) {
1275     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1276     return false;
1277   }
1278   // TODO: We should avoid using host float here. It would be better to
1279   // check the float bit values which is what a few other places do.
1280   // We've had bot failures before due to weird NaN support on mips hosts.
1281 
1282   APInt Literal(64, Imm.Val);
1283 
1284   if (Imm.IsFPImm) { // We got fp literal token
1285     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1286       return AMDGPU::isInlinableLiteral64(Imm.Val,
1287                                           AsmParser->hasInv2PiInlineImm());
1288     }
1289 
1290     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1291     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1292       return false;
1293 
1294     if (type.getScalarSizeInBits() == 16) {
1295       return AMDGPU::isInlinableLiteral16(
1296         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1297         AsmParser->hasInv2PiInlineImm());
1298     }
1299 
1300     // Check if single precision literal is inlinable
1301     return AMDGPU::isInlinableLiteral32(
1302       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1303       AsmParser->hasInv2PiInlineImm());
1304   }
1305 
1306   // We got int literal token.
1307   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1308     return AMDGPU::isInlinableLiteral64(Imm.Val,
1309                                         AsmParser->hasInv2PiInlineImm());
1310   }
1311 
1312   if (type.getScalarSizeInBits() == 16) {
1313     return AMDGPU::isInlinableLiteral16(
1314       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1315       AsmParser->hasInv2PiInlineImm());
1316   }
1317 
1318   return AMDGPU::isInlinableLiteral32(
1319     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1320     AsmParser->hasInv2PiInlineImm());
1321 }
1322 
1323 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1324   // Check that this immediate can be added as literal
1325   if (!isImmTy(ImmTyNone)) {
1326     return false;
1327   }
1328 
1329   if (!Imm.IsFPImm) {
1330     // We got int literal token.
1331 
1332     if (type == MVT::f64 && hasFPModifiers()) {
1333       // Cannot apply fp modifiers to int literals preserving the same semantics
1334       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1335       // disable these cases.
1336       return false;
1337     }
1338 
1339     unsigned Size = type.getSizeInBits();
1340     if (Size == 64)
1341       Size = 32;
1342 
1343     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1344     // types.
1345     return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
1346   }
1347 
1348   // We got fp literal token
1349   if (type == MVT::f64) { // Expected 64-bit fp operand
1350     // We would set low 64-bits of literal to zeroes but we accept this literals
1351     return true;
1352   }
1353 
1354   if (type == MVT::i64) { // Expected 64-bit int operand
1355     // We don't allow fp literals in 64-bit integer instructions. It is
1356     // unclear how we should encode them.
1357     return false;
1358   }
1359 
1360   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1361   return canLosslesslyConvertToFPType(FPLiteral, type);
1362 }
1363 
1364 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1365   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1366 }
1367 
1368 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1369   if (AsmParser->isVI())
1370     return isVReg32();
1371   else if (AsmParser->isGFX9())
1372     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1373   else
1374     return false;
1375 }
1376 
1377 bool AMDGPUOperand::isSDWAFP16Operand() const {
1378   return isSDWAOperand(MVT::f16);
1379 }
1380 
1381 bool AMDGPUOperand::isSDWAFP32Operand() const {
1382   return isSDWAOperand(MVT::f32);
1383 }
1384 
1385 bool AMDGPUOperand::isSDWAInt16Operand() const {
1386   return isSDWAOperand(MVT::i16);
1387 }
1388 
1389 bool AMDGPUOperand::isSDWAInt32Operand() const {
1390   return isSDWAOperand(MVT::i32);
1391 }
1392 
1393 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1394 {
1395   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1396   assert(Size == 2 || Size == 4 || Size == 8);
1397 
1398   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1399 
1400   if (Imm.Mods.Abs) {
1401     Val &= ~FpSignMask;
1402   }
1403   if (Imm.Mods.Neg) {
1404     Val ^= FpSignMask;
1405   }
1406 
1407   return Val;
1408 }
1409 
1410 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1411   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1412                              Inst.getNumOperands())) {
1413     addLiteralImmOperand(Inst, Imm.Val,
1414                          ApplyModifiers &
1415                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1416   } else {
1417     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1418     Inst.addOperand(MCOperand::createImm(Imm.Val));
1419   }
1420 }
1421 
1422 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1423   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1424   auto OpNum = Inst.getNumOperands();
1425   // Check that this operand accepts literals
1426   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1427 
1428   if (ApplyModifiers) {
1429     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1430     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1431     Val = applyInputFPModifiers(Val, Size);
1432   }
1433 
1434   APInt Literal(64, Val);
1435   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1436 
1437   if (Imm.IsFPImm) { // We got fp literal token
1438     switch (OpTy) {
1439     case AMDGPU::OPERAND_REG_IMM_INT64:
1440     case AMDGPU::OPERAND_REG_IMM_FP64:
1441     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1442     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1443       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1444                                        AsmParser->hasInv2PiInlineImm())) {
1445         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1446         return;
1447       }
1448 
1449       // Non-inlineable
1450       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1451         // For fp operands we check if low 32 bits are zeros
1452         if (Literal.getLoBits(32) != 0) {
1453           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1454           "Can't encode literal as exact 64-bit floating-point operand. "
1455           "Low 32-bits will be set to zero");
1456         }
1457 
1458         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1459         return;
1460       }
1461 
1462       // We don't allow fp literals in 64-bit integer instructions. It is
1463       // unclear how we should encode them. This case should be checked earlier
1464       // in predicate methods (isLiteralImm())
1465       llvm_unreachable("fp literal in 64-bit integer instruction.");
1466 
1467     case AMDGPU::OPERAND_REG_IMM_INT32:
1468     case AMDGPU::OPERAND_REG_IMM_FP32:
1469     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1470     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1471     case AMDGPU::OPERAND_REG_IMM_INT16:
1472     case AMDGPU::OPERAND_REG_IMM_FP16:
1473     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1474     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1475     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1476     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1477       bool lost;
1478       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1479       // Convert literal to single precision
1480       FPLiteral.convert(*getOpFltSemantics(OpTy),
1481                         APFloat::rmNearestTiesToEven, &lost);
1482       // We allow precision lost but not overflow or underflow. This should be
1483       // checked earlier in isLiteralImm()
1484 
1485       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1486       if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
1487           OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
1488         ImmVal |= (ImmVal << 16);
1489       }
1490 
1491       Inst.addOperand(MCOperand::createImm(ImmVal));
1492       return;
1493     }
1494     default:
1495       llvm_unreachable("invalid operand size");
1496     }
1497 
1498     return;
1499   }
1500 
1501    // We got int literal token.
1502   // Only sign extend inline immediates.
1503   // FIXME: No errors on truncation
1504   switch (OpTy) {
1505   case AMDGPU::OPERAND_REG_IMM_INT32:
1506   case AMDGPU::OPERAND_REG_IMM_FP32:
1507   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1508   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1509     if (isInt<32>(Val) &&
1510         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1511                                      AsmParser->hasInv2PiInlineImm())) {
1512       Inst.addOperand(MCOperand::createImm(Val));
1513       return;
1514     }
1515 
1516     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1517     return;
1518 
1519   case AMDGPU::OPERAND_REG_IMM_INT64:
1520   case AMDGPU::OPERAND_REG_IMM_FP64:
1521   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1522   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1523     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1524       Inst.addOperand(MCOperand::createImm(Val));
1525       return;
1526     }
1527 
1528     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1529     return;
1530 
1531   case AMDGPU::OPERAND_REG_IMM_INT16:
1532   case AMDGPU::OPERAND_REG_IMM_FP16:
1533   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1534   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1535     if (isInt<16>(Val) &&
1536         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1537                                      AsmParser->hasInv2PiInlineImm())) {
1538       Inst.addOperand(MCOperand::createImm(Val));
1539       return;
1540     }
1541 
1542     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1543     return;
1544 
1545   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1546   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1547     auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
1548     assert(AMDGPU::isInlinableLiteral16(LiteralVal,
1549                                         AsmParser->hasInv2PiInlineImm()));
1550 
1551     uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
1552                       static_cast<uint32_t>(LiteralVal);
1553     Inst.addOperand(MCOperand::createImm(ImmVal));
1554     return;
1555   }
1556   default:
1557     llvm_unreachable("invalid operand size");
1558   }
1559 }
1560 
1561 template <unsigned Bitwidth>
1562 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1563   APInt Literal(64, Imm.Val);
1564 
1565   if (!Imm.IsFPImm) {
1566     // We got int literal token.
1567     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1568     return;
1569   }
1570 
1571   bool Lost;
1572   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1573   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1574                     APFloat::rmNearestTiesToEven, &Lost);
1575   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1576 }
1577 
1578 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1579   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1580 }
1581 
1582 //===----------------------------------------------------------------------===//
1583 // AsmParser
1584 //===----------------------------------------------------------------------===//
1585 
1586 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1587   if (Is == IS_VGPR) {
1588     switch (RegWidth) {
1589       default: return -1;
1590       case 1: return AMDGPU::VGPR_32RegClassID;
1591       case 2: return AMDGPU::VReg_64RegClassID;
1592       case 3: return AMDGPU::VReg_96RegClassID;
1593       case 4: return AMDGPU::VReg_128RegClassID;
1594       case 8: return AMDGPU::VReg_256RegClassID;
1595       case 16: return AMDGPU::VReg_512RegClassID;
1596     }
1597   } else if (Is == IS_TTMP) {
1598     switch (RegWidth) {
1599       default: return -1;
1600       case 1: return AMDGPU::TTMP_32RegClassID;
1601       case 2: return AMDGPU::TTMP_64RegClassID;
1602       case 4: return AMDGPU::TTMP_128RegClassID;
1603       case 8: return AMDGPU::TTMP_256RegClassID;
1604       case 16: return AMDGPU::TTMP_512RegClassID;
1605     }
1606   } else if (Is == IS_SGPR) {
1607     switch (RegWidth) {
1608       default: return -1;
1609       case 1: return AMDGPU::SGPR_32RegClassID;
1610       case 2: return AMDGPU::SGPR_64RegClassID;
1611       case 4: return AMDGPU::SGPR_128RegClassID;
1612       case 8: return AMDGPU::SGPR_256RegClassID;
1613       case 16: return AMDGPU::SGPR_512RegClassID;
1614     }
1615   }
1616   return -1;
1617 }
1618 
1619 static unsigned getSpecialRegForName(StringRef RegName) {
1620   return StringSwitch<unsigned>(RegName)
1621     .Case("exec", AMDGPU::EXEC)
1622     .Case("vcc", AMDGPU::VCC)
1623     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1624     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1625     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1626     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1627     .Case("m0", AMDGPU::M0)
1628     .Case("scc", AMDGPU::SCC)
1629     .Case("tba", AMDGPU::TBA)
1630     .Case("tma", AMDGPU::TMA)
1631     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1632     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1633     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1634     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1635     .Case("vcc_lo", AMDGPU::VCC_LO)
1636     .Case("vcc_hi", AMDGPU::VCC_HI)
1637     .Case("exec_lo", AMDGPU::EXEC_LO)
1638     .Case("exec_hi", AMDGPU::EXEC_HI)
1639     .Case("tma_lo", AMDGPU::TMA_LO)
1640     .Case("tma_hi", AMDGPU::TMA_HI)
1641     .Case("tba_lo", AMDGPU::TBA_LO)
1642     .Case("tba_hi", AMDGPU::TBA_HI)
1643     .Default(0);
1644 }
1645 
1646 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1647                                     SMLoc &EndLoc) {
1648   auto R = parseRegister();
1649   if (!R) return true;
1650   assert(R->isReg());
1651   RegNo = R->getReg();
1652   StartLoc = R->getStartLoc();
1653   EndLoc = R->getEndLoc();
1654   return false;
1655 }
1656 
1657 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1658                                             RegisterKind RegKind, unsigned Reg1,
1659                                             unsigned RegNum) {
1660   switch (RegKind) {
1661   case IS_SPECIAL:
1662     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1663       Reg = AMDGPU::EXEC;
1664       RegWidth = 2;
1665       return true;
1666     }
1667     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1668       Reg = AMDGPU::FLAT_SCR;
1669       RegWidth = 2;
1670       return true;
1671     }
1672     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1673       Reg = AMDGPU::XNACK_MASK;
1674       RegWidth = 2;
1675       return true;
1676     }
1677     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1678       Reg = AMDGPU::VCC;
1679       RegWidth = 2;
1680       return true;
1681     }
1682     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1683       Reg = AMDGPU::TBA;
1684       RegWidth = 2;
1685       return true;
1686     }
1687     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1688       Reg = AMDGPU::TMA;
1689       RegWidth = 2;
1690       return true;
1691     }
1692     return false;
1693   case IS_VGPR:
1694   case IS_SGPR:
1695   case IS_TTMP:
1696     if (Reg1 != Reg + RegWidth) {
1697       return false;
1698     }
1699     RegWidth++;
1700     return true;
1701   default:
1702     llvm_unreachable("unexpected register kind");
1703   }
1704 }
1705 
1706 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1707                                           unsigned &RegNum, unsigned &RegWidth,
1708                                           unsigned *DwordRegIndex) {
1709   if (DwordRegIndex) { *DwordRegIndex = 0; }
1710   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1711   if (getLexer().is(AsmToken::Identifier)) {
1712     StringRef RegName = Parser.getTok().getString();
1713     if ((Reg = getSpecialRegForName(RegName))) {
1714       Parser.Lex();
1715       RegKind = IS_SPECIAL;
1716     } else {
1717       unsigned RegNumIndex = 0;
1718       if (RegName[0] == 'v') {
1719         RegNumIndex = 1;
1720         RegKind = IS_VGPR;
1721       } else if (RegName[0] == 's') {
1722         RegNumIndex = 1;
1723         RegKind = IS_SGPR;
1724       } else if (RegName.startswith("ttmp")) {
1725         RegNumIndex = strlen("ttmp");
1726         RegKind = IS_TTMP;
1727       } else {
1728         return false;
1729       }
1730       if (RegName.size() > RegNumIndex) {
1731         // Single 32-bit register: vXX.
1732         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1733           return false;
1734         Parser.Lex();
1735         RegWidth = 1;
1736       } else {
1737         // Range of registers: v[XX:YY]. ":YY" is optional.
1738         Parser.Lex();
1739         int64_t RegLo, RegHi;
1740         if (getLexer().isNot(AsmToken::LBrac))
1741           return false;
1742         Parser.Lex();
1743 
1744         if (getParser().parseAbsoluteExpression(RegLo))
1745           return false;
1746 
1747         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1748         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1749           return false;
1750         Parser.Lex();
1751 
1752         if (isRBrace) {
1753           RegHi = RegLo;
1754         } else {
1755           if (getParser().parseAbsoluteExpression(RegHi))
1756             return false;
1757 
1758           if (getLexer().isNot(AsmToken::RBrac))
1759             return false;
1760           Parser.Lex();
1761         }
1762         RegNum = (unsigned) RegLo;
1763         RegWidth = (RegHi - RegLo) + 1;
1764       }
1765     }
1766   } else if (getLexer().is(AsmToken::LBrac)) {
1767     // List of consecutive registers: [s0,s1,s2,s3]
1768     Parser.Lex();
1769     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1770       return false;
1771     if (RegWidth != 1)
1772       return false;
1773     RegisterKind RegKind1;
1774     unsigned Reg1, RegNum1, RegWidth1;
1775     do {
1776       if (getLexer().is(AsmToken::Comma)) {
1777         Parser.Lex();
1778       } else if (getLexer().is(AsmToken::RBrac)) {
1779         Parser.Lex();
1780         break;
1781       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1782         if (RegWidth1 != 1) {
1783           return false;
1784         }
1785         if (RegKind1 != RegKind) {
1786           return false;
1787         }
1788         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1789           return false;
1790         }
1791       } else {
1792         return false;
1793       }
1794     } while (true);
1795   } else {
1796     return false;
1797   }
1798   switch (RegKind) {
1799   case IS_SPECIAL:
1800     RegNum = 0;
1801     RegWidth = 1;
1802     break;
1803   case IS_VGPR:
1804   case IS_SGPR:
1805   case IS_TTMP:
1806   {
1807     unsigned Size = 1;
1808     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1809       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1810       Size = std::min(RegWidth, 4u);
1811     }
1812     if (RegNum % Size != 0)
1813       return false;
1814     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1815     RegNum = RegNum / Size;
1816     int RCID = getRegClass(RegKind, RegWidth);
1817     if (RCID == -1)
1818       return false;
1819     const MCRegisterClass RC = TRI->getRegClass(RCID);
1820     if (RegNum >= RC.getNumRegs())
1821       return false;
1822     Reg = RC.getRegister(RegNum);
1823     break;
1824   }
1825 
1826   default:
1827     llvm_unreachable("unexpected register kind");
1828   }
1829 
1830   if (!subtargetHasRegister(*TRI, Reg))
1831     return false;
1832   return true;
1833 }
1834 
1835 Optional<StringRef>
1836 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
1837   switch (RegKind) {
1838   case IS_VGPR:
1839     return StringRef(".amdgcn.next_free_vgpr");
1840   case IS_SGPR:
1841     return StringRef(".amdgcn.next_free_sgpr");
1842   default:
1843     return None;
1844   }
1845 }
1846 
1847 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
1848   auto SymbolName = getGprCountSymbolName(RegKind);
1849   assert(SymbolName && "initializing invalid register kind");
1850   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1851   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
1852 }
1853 
1854 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
1855                                             unsigned DwordRegIndex,
1856                                             unsigned RegWidth) {
1857   // Symbols are only defined for GCN targets
1858   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
1859     return true;
1860 
1861   auto SymbolName = getGprCountSymbolName(RegKind);
1862   if (!SymbolName)
1863     return true;
1864   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1865 
1866   int64_t NewMax = DwordRegIndex + RegWidth - 1;
1867   int64_t OldCount;
1868 
1869   if (!Sym->isVariable())
1870     return !Error(getParser().getTok().getLoc(),
1871                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
1872   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
1873     return !Error(
1874         getParser().getTok().getLoc(),
1875         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
1876 
1877   if (OldCount <= NewMax)
1878     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
1879 
1880   return true;
1881 }
1882 
1883 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1884   const auto &Tok = Parser.getTok();
1885   SMLoc StartLoc = Tok.getLoc();
1886   SMLoc EndLoc = Tok.getEndLoc();
1887   RegisterKind RegKind;
1888   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1889 
1890   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
1891     return nullptr;
1892   }
1893   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1894     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
1895       return nullptr;
1896   } else
1897     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
1898   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
1899 }
1900 
1901 bool
1902 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
1903   if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
1904       (getLexer().getKind() == AsmToken::Integer ||
1905        getLexer().getKind() == AsmToken::Real)) {
1906     // This is a workaround for handling operands like these:
1907     //     |1.0|
1908     //     |-1|
1909     // This syntax is not compatible with syntax of standard
1910     // MC expressions (due to the trailing '|').
1911 
1912     SMLoc EndLoc;
1913     const MCExpr *Expr;
1914 
1915     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
1916       return true;
1917     }
1918 
1919     return !Expr->evaluateAsAbsolute(Val);
1920   }
1921 
1922   return getParser().parseAbsoluteExpression(Val);
1923 }
1924 
1925 OperandMatchResultTy
1926 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
1927   // TODO: add syntactic sugar for 1/(2*PI)
1928   bool Minus = false;
1929   if (getLexer().getKind() == AsmToken::Minus) {
1930     const AsmToken NextToken = getLexer().peekTok();
1931     if (!NextToken.is(AsmToken::Integer) &&
1932         !NextToken.is(AsmToken::Real)) {
1933         return MatchOperand_NoMatch;
1934     }
1935     Minus = true;
1936     Parser.Lex();
1937   }
1938 
1939   SMLoc S = Parser.getTok().getLoc();
1940   switch(getLexer().getKind()) {
1941   case AsmToken::Integer: {
1942     int64_t IntVal;
1943     if (parseAbsoluteExpr(IntVal, AbsMod))
1944       return MatchOperand_ParseFail;
1945     if (Minus)
1946       IntVal *= -1;
1947     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
1948     return MatchOperand_Success;
1949   }
1950   case AsmToken::Real: {
1951     int64_t IntVal;
1952     if (parseAbsoluteExpr(IntVal, AbsMod))
1953       return MatchOperand_ParseFail;
1954 
1955     APFloat F(BitsToDouble(IntVal));
1956     if (Minus)
1957       F.changeSign();
1958     Operands.push_back(
1959         AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
1960                                  AMDGPUOperand::ImmTyNone, true));
1961     return MatchOperand_Success;
1962   }
1963   default:
1964     return MatchOperand_NoMatch;
1965   }
1966 }
1967 
1968 OperandMatchResultTy
1969 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
1970   if (auto R = parseRegister()) {
1971     assert(R->isReg());
1972     R->Reg.IsForcedVOP3 = isForcedVOP3();
1973     Operands.push_back(std::move(R));
1974     return MatchOperand_Success;
1975   }
1976   return MatchOperand_NoMatch;
1977 }
1978 
1979 OperandMatchResultTy
1980 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
1981   auto res = parseImm(Operands, AbsMod);
1982   if (res != MatchOperand_NoMatch) {
1983     return res;
1984   }
1985 
1986   return parseReg(Operands);
1987 }
1988 
1989 OperandMatchResultTy
1990 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
1991                                               bool AllowImm) {
1992   bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
1993 
1994   if (getLexer().getKind()== AsmToken::Minus) {
1995     const AsmToken NextToken = getLexer().peekTok();
1996 
1997     // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
1998     if (NextToken.is(AsmToken::Minus)) {
1999       Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
2000       return MatchOperand_ParseFail;
2001     }
2002 
2003     // '-' followed by an integer literal N should be interpreted as integer
2004     // negation rather than a floating-point NEG modifier applied to N.
2005     // Beside being contr-intuitive, such use of floating-point NEG modifier
2006     // results in different meaning of integer literals used with VOP1/2/C
2007     // and VOP3, for example:
2008     //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2009     //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2010     // Negative fp literals should be handled likewise for unifomtity
2011     if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
2012       Parser.Lex();
2013       Negate = true;
2014     }
2015   }
2016 
2017   if (getLexer().getKind() == AsmToken::Identifier &&
2018       Parser.getTok().getString() == "neg") {
2019     if (Negate) {
2020       Error(Parser.getTok().getLoc(), "expected register or immediate");
2021       return MatchOperand_ParseFail;
2022     }
2023     Parser.Lex();
2024     Negate2 = true;
2025     if (getLexer().isNot(AsmToken::LParen)) {
2026       Error(Parser.getTok().getLoc(), "expected left paren after neg");
2027       return MatchOperand_ParseFail;
2028     }
2029     Parser.Lex();
2030   }
2031 
2032   if (getLexer().getKind() == AsmToken::Identifier &&
2033       Parser.getTok().getString() == "abs") {
2034     Parser.Lex();
2035     Abs2 = true;
2036     if (getLexer().isNot(AsmToken::LParen)) {
2037       Error(Parser.getTok().getLoc(), "expected left paren after abs");
2038       return MatchOperand_ParseFail;
2039     }
2040     Parser.Lex();
2041   }
2042 
2043   if (getLexer().getKind() == AsmToken::Pipe) {
2044     if (Abs2) {
2045       Error(Parser.getTok().getLoc(), "expected register or immediate");
2046       return MatchOperand_ParseFail;
2047     }
2048     Parser.Lex();
2049     Abs = true;
2050   }
2051 
2052   OperandMatchResultTy Res;
2053   if (AllowImm) {
2054     Res = parseRegOrImm(Operands, Abs);
2055   } else {
2056     Res = parseReg(Operands);
2057   }
2058   if (Res != MatchOperand_Success) {
2059     return Res;
2060   }
2061 
2062   AMDGPUOperand::Modifiers Mods;
2063   if (Abs) {
2064     if (getLexer().getKind() != AsmToken::Pipe) {
2065       Error(Parser.getTok().getLoc(), "expected vertical bar");
2066       return MatchOperand_ParseFail;
2067     }
2068     Parser.Lex();
2069     Mods.Abs = true;
2070   }
2071   if (Abs2) {
2072     if (getLexer().isNot(AsmToken::RParen)) {
2073       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2074       return MatchOperand_ParseFail;
2075     }
2076     Parser.Lex();
2077     Mods.Abs = true;
2078   }
2079 
2080   if (Negate) {
2081     Mods.Neg = true;
2082   } else if (Negate2) {
2083     if (getLexer().isNot(AsmToken::RParen)) {
2084       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2085       return MatchOperand_ParseFail;
2086     }
2087     Parser.Lex();
2088     Mods.Neg = true;
2089   }
2090 
2091   if (Mods.hasFPModifiers()) {
2092     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2093     Op.setModifiers(Mods);
2094   }
2095   return MatchOperand_Success;
2096 }
2097 
2098 OperandMatchResultTy
2099 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2100                                                bool AllowImm) {
2101   bool Sext = false;
2102 
2103   if (getLexer().getKind() == AsmToken::Identifier &&
2104       Parser.getTok().getString() == "sext") {
2105     Parser.Lex();
2106     Sext = true;
2107     if (getLexer().isNot(AsmToken::LParen)) {
2108       Error(Parser.getTok().getLoc(), "expected left paren after sext");
2109       return MatchOperand_ParseFail;
2110     }
2111     Parser.Lex();
2112   }
2113 
2114   OperandMatchResultTy Res;
2115   if (AllowImm) {
2116     Res = parseRegOrImm(Operands);
2117   } else {
2118     Res = parseReg(Operands);
2119   }
2120   if (Res != MatchOperand_Success) {
2121     return Res;
2122   }
2123 
2124   AMDGPUOperand::Modifiers Mods;
2125   if (Sext) {
2126     if (getLexer().isNot(AsmToken::RParen)) {
2127       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2128       return MatchOperand_ParseFail;
2129     }
2130     Parser.Lex();
2131     Mods.Sext = true;
2132   }
2133 
2134   if (Mods.hasIntModifiers()) {
2135     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2136     Op.setModifiers(Mods);
2137   }
2138 
2139   return MatchOperand_Success;
2140 }
2141 
2142 OperandMatchResultTy
2143 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2144   return parseRegOrImmWithFPInputMods(Operands, false);
2145 }
2146 
2147 OperandMatchResultTy
2148 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2149   return parseRegOrImmWithIntInputMods(Operands, false);
2150 }
2151 
2152 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2153   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2154   if (Reg) {
2155     Operands.push_back(std::move(Reg));
2156     return MatchOperand_Success;
2157   }
2158 
2159   const AsmToken &Tok = Parser.getTok();
2160   if (Tok.getString() == "off") {
2161     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
2162                                                 AMDGPUOperand::ImmTyOff, false));
2163     Parser.Lex();
2164     return MatchOperand_Success;
2165   }
2166 
2167   return MatchOperand_NoMatch;
2168 }
2169 
2170 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2171   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2172 
2173   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2174       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2175       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2176       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2177     return Match_InvalidOperand;
2178 
2179   if ((TSFlags & SIInstrFlags::VOP3) &&
2180       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2181       getForcedEncodingSize() != 64)
2182     return Match_PreferE32;
2183 
2184   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2185       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2186     // v_mac_f32/16 allow only dst_sel == DWORD;
2187     auto OpNum =
2188         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2189     const auto &Op = Inst.getOperand(OpNum);
2190     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2191       return Match_InvalidOperand;
2192     }
2193   }
2194 
2195   if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2196     // FIXME: Produces error without correct column reported.
2197     auto OpNum =
2198         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2199     const auto &Op = Inst.getOperand(OpNum);
2200     if (Op.getImm() != 0)
2201       return Match_InvalidOperand;
2202   }
2203 
2204   return Match_Success;
2205 }
2206 
2207 // What asm variants we should check
2208 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2209   if (getForcedEncodingSize() == 32) {
2210     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2211     return makeArrayRef(Variants);
2212   }
2213 
2214   if (isForcedVOP3()) {
2215     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2216     return makeArrayRef(Variants);
2217   }
2218 
2219   if (isForcedSDWA()) {
2220     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2221                                         AMDGPUAsmVariants::SDWA9};
2222     return makeArrayRef(Variants);
2223   }
2224 
2225   if (isForcedDPP()) {
2226     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2227     return makeArrayRef(Variants);
2228   }
2229 
2230   static const unsigned Variants[] = {
2231     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2232     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2233   };
2234 
2235   return makeArrayRef(Variants);
2236 }
2237 
2238 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2239   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2240   const unsigned Num = Desc.getNumImplicitUses();
2241   for (unsigned i = 0; i < Num; ++i) {
2242     unsigned Reg = Desc.ImplicitUses[i];
2243     switch (Reg) {
2244     case AMDGPU::FLAT_SCR:
2245     case AMDGPU::VCC:
2246     case AMDGPU::M0:
2247       return Reg;
2248     default:
2249       break;
2250     }
2251   }
2252   return AMDGPU::NoRegister;
2253 }
2254 
2255 // NB: This code is correct only when used to check constant
2256 // bus limitations because GFX7 support no f16 inline constants.
2257 // Note that there are no cases when a GFX7 opcode violates
2258 // constant bus limitations due to the use of an f16 constant.
2259 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2260                                        unsigned OpIdx) const {
2261   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2262 
2263   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2264     return false;
2265   }
2266 
2267   const MCOperand &MO = Inst.getOperand(OpIdx);
2268 
2269   int64_t Val = MO.getImm();
2270   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2271 
2272   switch (OpSize) { // expected operand size
2273   case 8:
2274     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2275   case 4:
2276     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2277   case 2: {
2278     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2279     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2280         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2281       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2282     } else {
2283       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2284     }
2285   }
2286   default:
2287     llvm_unreachable("invalid operand size");
2288   }
2289 }
2290 
2291 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2292   const MCOperand &MO = Inst.getOperand(OpIdx);
2293   if (MO.isImm()) {
2294     return !isInlineConstant(Inst, OpIdx);
2295   }
2296   return !MO.isReg() ||
2297          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2298 }
2299 
2300 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2301   const unsigned Opcode = Inst.getOpcode();
2302   const MCInstrDesc &Desc = MII.get(Opcode);
2303   unsigned ConstantBusUseCount = 0;
2304 
2305   if (Desc.TSFlags &
2306       (SIInstrFlags::VOPC |
2307        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2308        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2309        SIInstrFlags::SDWA)) {
2310     // Check special imm operands (used by madmk, etc)
2311     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2312       ++ConstantBusUseCount;
2313     }
2314 
2315     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2316     if (SGPRUsed != AMDGPU::NoRegister) {
2317       ++ConstantBusUseCount;
2318     }
2319 
2320     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2321     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2322     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2323 
2324     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2325 
2326     for (int OpIdx : OpIndices) {
2327       if (OpIdx == -1) break;
2328 
2329       const MCOperand &MO = Inst.getOperand(OpIdx);
2330       if (usesConstantBus(Inst, OpIdx)) {
2331         if (MO.isReg()) {
2332           const unsigned Reg = mc2PseudoReg(MO.getReg());
2333           // Pairs of registers with a partial intersections like these
2334           //   s0, s[0:1]
2335           //   flat_scratch_lo, flat_scratch
2336           //   flat_scratch_lo, flat_scratch_hi
2337           // are theoretically valid but they are disabled anyway.
2338           // Note that this code mimics SIInstrInfo::verifyInstruction
2339           if (Reg != SGPRUsed) {
2340             ++ConstantBusUseCount;
2341           }
2342           SGPRUsed = Reg;
2343         } else { // Expression or a literal
2344           ++ConstantBusUseCount;
2345         }
2346       }
2347     }
2348   }
2349 
2350   return ConstantBusUseCount <= 1;
2351 }
2352 
2353 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2354   const unsigned Opcode = Inst.getOpcode();
2355   const MCInstrDesc &Desc = MII.get(Opcode);
2356 
2357   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2358   if (DstIdx == -1 ||
2359       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2360     return true;
2361   }
2362 
2363   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2364 
2365   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2366   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2367   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2368 
2369   assert(DstIdx != -1);
2370   const MCOperand &Dst = Inst.getOperand(DstIdx);
2371   assert(Dst.isReg());
2372   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2373 
2374   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2375 
2376   for (int SrcIdx : SrcIndices) {
2377     if (SrcIdx == -1) break;
2378     const MCOperand &Src = Inst.getOperand(SrcIdx);
2379     if (Src.isReg()) {
2380       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2381       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2382         return false;
2383       }
2384     }
2385   }
2386 
2387   return true;
2388 }
2389 
2390 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2391 
2392   const unsigned Opc = Inst.getOpcode();
2393   const MCInstrDesc &Desc = MII.get(Opc);
2394 
2395   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2396     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2397     assert(ClampIdx != -1);
2398     return Inst.getOperand(ClampIdx).getImm() == 0;
2399   }
2400 
2401   return true;
2402 }
2403 
2404 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2405 
2406   const unsigned Opc = Inst.getOpcode();
2407   const MCInstrDesc &Desc = MII.get(Opc);
2408 
2409   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2410     return true;
2411 
2412   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2413   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2414   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2415 
2416   assert(VDataIdx != -1);
2417   assert(DMaskIdx != -1);
2418   assert(TFEIdx != -1);
2419 
2420   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2421   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2422   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2423   if (DMask == 0)
2424     DMask = 1;
2425 
2426   unsigned DataSize =
2427     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2428   if (hasPackedD16()) {
2429     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2430     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2431       DataSize = (DataSize + 1) / 2;
2432   }
2433 
2434   return (VDataSize / 4) == DataSize + TFESize;
2435 }
2436 
2437 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2438 
2439   const unsigned Opc = Inst.getOpcode();
2440   const MCInstrDesc &Desc = MII.get(Opc);
2441 
2442   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2443     return true;
2444   if (!Desc.mayLoad() || !Desc.mayStore())
2445     return true; // Not atomic
2446 
2447   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2448   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2449 
2450   // This is an incomplete check because image_atomic_cmpswap
2451   // may only use 0x3 and 0xf while other atomic operations
2452   // may use 0x1 and 0x3. However these limitations are
2453   // verified when we check that dmask matches dst size.
2454   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2455 }
2456 
2457 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2458 
2459   const unsigned Opc = Inst.getOpcode();
2460   const MCInstrDesc &Desc = MII.get(Opc);
2461 
2462   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2463     return true;
2464 
2465   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2466   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2467 
2468   // GATHER4 instructions use dmask in a different fashion compared to
2469   // other MIMG instructions. The only useful DMASK values are
2470   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2471   // (red,red,red,red) etc.) The ISA document doesn't mention
2472   // this.
2473   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2474 }
2475 
2476 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2477 
2478   const unsigned Opc = Inst.getOpcode();
2479   const MCInstrDesc &Desc = MII.get(Opc);
2480 
2481   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2482     return true;
2483 
2484   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2485   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2486     if (isCI() || isSI())
2487       return false;
2488   }
2489 
2490   return true;
2491 }
2492 
2493 static bool IsRevOpcode(const unsigned Opcode)
2494 {
2495   switch (Opcode) {
2496   case AMDGPU::V_SUBREV_F32_e32:
2497   case AMDGPU::V_SUBREV_F32_e64:
2498   case AMDGPU::V_SUBREV_F32_e32_si:
2499   case AMDGPU::V_SUBREV_F32_e32_vi:
2500   case AMDGPU::V_SUBREV_F32_e64_si:
2501   case AMDGPU::V_SUBREV_F32_e64_vi:
2502   case AMDGPU::V_SUBREV_I32_e32:
2503   case AMDGPU::V_SUBREV_I32_e64:
2504   case AMDGPU::V_SUBREV_I32_e32_si:
2505   case AMDGPU::V_SUBREV_I32_e64_si:
2506   case AMDGPU::V_SUBBREV_U32_e32:
2507   case AMDGPU::V_SUBBREV_U32_e64:
2508   case AMDGPU::V_SUBBREV_U32_e32_si:
2509   case AMDGPU::V_SUBBREV_U32_e32_vi:
2510   case AMDGPU::V_SUBBREV_U32_e64_si:
2511   case AMDGPU::V_SUBBREV_U32_e64_vi:
2512   case AMDGPU::V_SUBREV_U32_e32:
2513   case AMDGPU::V_SUBREV_U32_e64:
2514   case AMDGPU::V_SUBREV_U32_e32_gfx9:
2515   case AMDGPU::V_SUBREV_U32_e32_vi:
2516   case AMDGPU::V_SUBREV_U32_e64_gfx9:
2517   case AMDGPU::V_SUBREV_U32_e64_vi:
2518   case AMDGPU::V_SUBREV_F16_e32:
2519   case AMDGPU::V_SUBREV_F16_e64:
2520   case AMDGPU::V_SUBREV_F16_e32_vi:
2521   case AMDGPU::V_SUBREV_F16_e64_vi:
2522   case AMDGPU::V_SUBREV_U16_e32:
2523   case AMDGPU::V_SUBREV_U16_e64:
2524   case AMDGPU::V_SUBREV_U16_e32_vi:
2525   case AMDGPU::V_SUBREV_U16_e64_vi:
2526   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
2527   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
2528   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
2529   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
2530   case AMDGPU::V_LSHLREV_B32_e32_si:
2531   case AMDGPU::V_LSHLREV_B32_e64_si:
2532   case AMDGPU::V_LSHLREV_B16_e32_vi:
2533   case AMDGPU::V_LSHLREV_B16_e64_vi:
2534   case AMDGPU::V_LSHLREV_B32_e32_vi:
2535   case AMDGPU::V_LSHLREV_B32_e64_vi:
2536   case AMDGPU::V_LSHLREV_B64_vi:
2537   case AMDGPU::V_LSHRREV_B32_e32_si:
2538   case AMDGPU::V_LSHRREV_B32_e64_si:
2539   case AMDGPU::V_LSHRREV_B16_e32_vi:
2540   case AMDGPU::V_LSHRREV_B16_e64_vi:
2541   case AMDGPU::V_LSHRREV_B32_e32_vi:
2542   case AMDGPU::V_LSHRREV_B32_e64_vi:
2543   case AMDGPU::V_LSHRREV_B64_vi:
2544   case AMDGPU::V_ASHRREV_I32_e64_si:
2545   case AMDGPU::V_ASHRREV_I32_e32_si:
2546   case AMDGPU::V_ASHRREV_I16_e32_vi:
2547   case AMDGPU::V_ASHRREV_I16_e64_vi:
2548   case AMDGPU::V_ASHRREV_I32_e32_vi:
2549   case AMDGPU::V_ASHRREV_I32_e64_vi:
2550   case AMDGPU::V_ASHRREV_I64_vi:
2551   case AMDGPU::V_PK_LSHLREV_B16_vi:
2552   case AMDGPU::V_PK_LSHRREV_B16_vi:
2553   case AMDGPU::V_PK_ASHRREV_I16_vi:
2554     return true;
2555   default:
2556     return false;
2557   }
2558 }
2559 
2560 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
2561 
2562   using namespace SIInstrFlags;
2563   const unsigned Opcode = Inst.getOpcode();
2564   const MCInstrDesc &Desc = MII.get(Opcode);
2565 
2566   // lds_direct register is defined so that it can be used
2567   // with 9-bit operands only. Ignore encodings which do not accept these.
2568   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
2569     return true;
2570 
2571   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2572   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2573   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2574 
2575   const int SrcIndices[] = { Src1Idx, Src2Idx };
2576 
2577   // lds_direct cannot be specified as either src1 or src2.
2578   for (int SrcIdx : SrcIndices) {
2579     if (SrcIdx == -1) break;
2580     const MCOperand &Src = Inst.getOperand(SrcIdx);
2581     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
2582       return false;
2583     }
2584   }
2585 
2586   if (Src0Idx == -1)
2587     return true;
2588 
2589   const MCOperand &Src = Inst.getOperand(Src0Idx);
2590   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
2591     return true;
2592 
2593   // lds_direct is specified as src0. Check additional limitations.
2594   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
2595 }
2596 
2597 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
2598   unsigned Opcode = Inst.getOpcode();
2599   const MCInstrDesc &Desc = MII.get(Opcode);
2600   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
2601     return true;
2602 
2603   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2604   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2605 
2606   const int OpIndices[] = { Src0Idx, Src1Idx };
2607 
2608   unsigned NumLiterals = 0;
2609   uint32_t LiteralValue;
2610 
2611   for (int OpIdx : OpIndices) {
2612     if (OpIdx == -1) break;
2613 
2614     const MCOperand &MO = Inst.getOperand(OpIdx);
2615     if (MO.isImm() &&
2616         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
2617         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
2618         !isInlineConstant(Inst, OpIdx)) {
2619       uint32_t Value = static_cast<uint32_t>(MO.getImm());
2620       if (NumLiterals == 0 || LiteralValue != Value) {
2621         LiteralValue = Value;
2622         ++NumLiterals;
2623       }
2624     }
2625   }
2626 
2627   return NumLiterals <= 1;
2628 }
2629 
2630 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2631                                           const SMLoc &IDLoc) {
2632   if (!validateLdsDirect(Inst)) {
2633     Error(IDLoc,
2634       "invalid use of lds_direct");
2635     return false;
2636   }
2637   if (!validateSOPLiteral(Inst)) {
2638     Error(IDLoc,
2639       "only one literal operand is allowed");
2640     return false;
2641   }
2642   if (!validateConstantBusLimitations(Inst)) {
2643     Error(IDLoc,
2644       "invalid operand (violates constant bus restrictions)");
2645     return false;
2646   }
2647   if (!validateEarlyClobberLimitations(Inst)) {
2648     Error(IDLoc,
2649       "destination must be different than all sources");
2650     return false;
2651   }
2652   if (!validateIntClampSupported(Inst)) {
2653     Error(IDLoc,
2654       "integer clamping is not supported on this GPU");
2655     return false;
2656   }
2657   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
2658   if (!validateMIMGD16(Inst)) {
2659     Error(IDLoc,
2660       "d16 modifier is not supported on this GPU");
2661     return false;
2662   }
2663   if (!validateMIMGDataSize(Inst)) {
2664     Error(IDLoc,
2665       "image data size does not match dmask and tfe");
2666     return false;
2667   }
2668   if (!validateMIMGAtomicDMask(Inst)) {
2669     Error(IDLoc,
2670       "invalid atomic image dmask");
2671     return false;
2672   }
2673   if (!validateMIMGGatherDMask(Inst)) {
2674     Error(IDLoc,
2675       "invalid image_gather dmask: only one bit must be set");
2676     return false;
2677   }
2678 
2679   return true;
2680 }
2681 
2682 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
2683                                             const FeatureBitset &FBS,
2684                                             unsigned VariantID = 0);
2685 
2686 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2687                                               OperandVector &Operands,
2688                                               MCStreamer &Out,
2689                                               uint64_t &ErrorInfo,
2690                                               bool MatchingInlineAsm) {
2691   MCInst Inst;
2692   unsigned Result = Match_Success;
2693   for (auto Variant : getMatchedVariants()) {
2694     uint64_t EI;
2695     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2696                                   Variant);
2697     // We order match statuses from least to most specific. We use most specific
2698     // status as resulting
2699     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2700     if ((R == Match_Success) ||
2701         (R == Match_PreferE32) ||
2702         (R == Match_MissingFeature && Result != Match_PreferE32) ||
2703         (R == Match_InvalidOperand && Result != Match_MissingFeature
2704                                    && Result != Match_PreferE32) ||
2705         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
2706                                    && Result != Match_MissingFeature
2707                                    && Result != Match_PreferE32)) {
2708       Result = R;
2709       ErrorInfo = EI;
2710     }
2711     if (R == Match_Success)
2712       break;
2713   }
2714 
2715   switch (Result) {
2716   default: break;
2717   case Match_Success:
2718     if (!validateInstruction(Inst, IDLoc)) {
2719       return true;
2720     }
2721     Inst.setLoc(IDLoc);
2722     Out.EmitInstruction(Inst, getSTI());
2723     return false;
2724 
2725   case Match_MissingFeature:
2726     return Error(IDLoc, "instruction not supported on this GPU");
2727 
2728   case Match_MnemonicFail: {
2729     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
2730     std::string Suggestion = AMDGPUMnemonicSpellCheck(
2731         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
2732     return Error(IDLoc, "invalid instruction" + Suggestion,
2733                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
2734   }
2735 
2736   case Match_InvalidOperand: {
2737     SMLoc ErrorLoc = IDLoc;
2738     if (ErrorInfo != ~0ULL) {
2739       if (ErrorInfo >= Operands.size()) {
2740         return Error(IDLoc, "too few operands for instruction");
2741       }
2742       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2743       if (ErrorLoc == SMLoc())
2744         ErrorLoc = IDLoc;
2745     }
2746     return Error(ErrorLoc, "invalid operand for instruction");
2747   }
2748 
2749   case Match_PreferE32:
2750     return Error(IDLoc, "internal error: instruction without _e64 suffix "
2751                         "should be encoded as e32");
2752   }
2753   llvm_unreachable("Implement any new match types added!");
2754 }
2755 
2756 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2757   int64_t Tmp = -1;
2758   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2759     return true;
2760   }
2761   if (getParser().parseAbsoluteExpression(Tmp)) {
2762     return true;
2763   }
2764   Ret = static_cast<uint32_t>(Tmp);
2765   return false;
2766 }
2767 
2768 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2769                                                uint32_t &Minor) {
2770   if (ParseAsAbsoluteExpression(Major))
2771     return TokError("invalid major version");
2772 
2773   if (getLexer().isNot(AsmToken::Comma))
2774     return TokError("minor version number required, comma expected");
2775   Lex();
2776 
2777   if (ParseAsAbsoluteExpression(Minor))
2778     return TokError("invalid minor version");
2779 
2780   return false;
2781 }
2782 
2783 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
2784   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2785     return TokError("directive only supported for amdgcn architecture");
2786 
2787   std::string Target;
2788 
2789   SMLoc TargetStart = getTok().getLoc();
2790   if (getParser().parseEscapedString(Target))
2791     return true;
2792   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
2793 
2794   std::string ExpectedTarget;
2795   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
2796   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
2797 
2798   if (Target != ExpectedTargetOS.str())
2799     return getParser().Error(TargetRange.Start, "target must match options",
2800                              TargetRange);
2801 
2802   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
2803   return false;
2804 }
2805 
2806 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
2807   return getParser().Error(Range.Start, "value out of range", Range);
2808 }
2809 
2810 bool AMDGPUAsmParser::calculateGPRBlocks(
2811     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
2812     bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
2813     unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
2814     unsigned &SGPRBlocks) {
2815   // TODO(scott.linder): These calculations are duplicated from
2816   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
2817   IsaVersion Version = getIsaVersion(getSTI().getCPU());
2818 
2819   unsigned NumVGPRs = NextFreeVGPR;
2820   unsigned NumSGPRs = NextFreeSGPR;
2821   unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
2822 
2823   if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
2824       NumSGPRs > MaxAddressableNumSGPRs)
2825     return OutOfRangeError(SGPRRange);
2826 
2827   NumSGPRs +=
2828       IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
2829 
2830   if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
2831       NumSGPRs > MaxAddressableNumSGPRs)
2832     return OutOfRangeError(SGPRRange);
2833 
2834   if (Features.test(FeatureSGPRInitBug))
2835     NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
2836 
2837   VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
2838   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
2839 
2840   return false;
2841 }
2842 
2843 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
2844   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2845     return TokError("directive only supported for amdgcn architecture");
2846 
2847   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
2848     return TokError("directive only supported for amdhsa OS");
2849 
2850   StringRef KernelName;
2851   if (getParser().parseIdentifier(KernelName))
2852     return true;
2853 
2854   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
2855 
2856   StringSet<> Seen;
2857 
2858   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
2859 
2860   SMRange VGPRRange;
2861   uint64_t NextFreeVGPR = 0;
2862   SMRange SGPRRange;
2863   uint64_t NextFreeSGPR = 0;
2864   unsigned UserSGPRCount = 0;
2865   bool ReserveVCC = true;
2866   bool ReserveFlatScr = true;
2867   bool ReserveXNACK = hasXNACK();
2868 
2869   while (true) {
2870     while (getLexer().is(AsmToken::EndOfStatement))
2871       Lex();
2872 
2873     if (getLexer().isNot(AsmToken::Identifier))
2874       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
2875 
2876     StringRef ID = getTok().getIdentifier();
2877     SMRange IDRange = getTok().getLocRange();
2878     Lex();
2879 
2880     if (ID == ".end_amdhsa_kernel")
2881       break;
2882 
2883     if (Seen.find(ID) != Seen.end())
2884       return TokError(".amdhsa_ directives cannot be repeated");
2885     Seen.insert(ID);
2886 
2887     SMLoc ValStart = getTok().getLoc();
2888     int64_t IVal;
2889     if (getParser().parseAbsoluteExpression(IVal))
2890       return true;
2891     SMLoc ValEnd = getTok().getLoc();
2892     SMRange ValRange = SMRange(ValStart, ValEnd);
2893 
2894     if (IVal < 0)
2895       return OutOfRangeError(ValRange);
2896 
2897     uint64_t Val = IVal;
2898 
2899 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
2900   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
2901     return OutOfRangeError(RANGE);                                             \
2902   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
2903 
2904     if (ID == ".amdhsa_group_segment_fixed_size") {
2905       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
2906         return OutOfRangeError(ValRange);
2907       KD.group_segment_fixed_size = Val;
2908     } else if (ID == ".amdhsa_private_segment_fixed_size") {
2909       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
2910         return OutOfRangeError(ValRange);
2911       KD.private_segment_fixed_size = Val;
2912     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
2913       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2914                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
2915                        Val, ValRange);
2916       UserSGPRCount++;
2917     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
2918       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2919                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
2920                        ValRange);
2921       UserSGPRCount++;
2922     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
2923       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2924                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
2925                        ValRange);
2926       UserSGPRCount++;
2927     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
2928       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2929                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
2930                        Val, ValRange);
2931       UserSGPRCount++;
2932     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
2933       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2934                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
2935                        ValRange);
2936       UserSGPRCount++;
2937     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
2938       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2939                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
2940                        ValRange);
2941       UserSGPRCount++;
2942     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
2943       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2944                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
2945                        Val, ValRange);
2946       UserSGPRCount++;
2947     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
2948       PARSE_BITS_ENTRY(
2949           KD.compute_pgm_rsrc2,
2950           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
2951           ValRange);
2952     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
2953       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2954                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
2955                        ValRange);
2956     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
2957       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2958                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
2959                        ValRange);
2960     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
2961       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2962                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
2963                        ValRange);
2964     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
2965       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2966                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
2967                        ValRange);
2968     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
2969       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2970                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
2971                        ValRange);
2972     } else if (ID == ".amdhsa_next_free_vgpr") {
2973       VGPRRange = ValRange;
2974       NextFreeVGPR = Val;
2975     } else if (ID == ".amdhsa_next_free_sgpr") {
2976       SGPRRange = ValRange;
2977       NextFreeSGPR = Val;
2978     } else if (ID == ".amdhsa_reserve_vcc") {
2979       if (!isUInt<1>(Val))
2980         return OutOfRangeError(ValRange);
2981       ReserveVCC = Val;
2982     } else if (ID == ".amdhsa_reserve_flat_scratch") {
2983       if (IVersion.Major < 7)
2984         return getParser().Error(IDRange.Start, "directive requires gfx7+",
2985                                  IDRange);
2986       if (!isUInt<1>(Val))
2987         return OutOfRangeError(ValRange);
2988       ReserveFlatScr = Val;
2989     } else if (ID == ".amdhsa_reserve_xnack_mask") {
2990       if (IVersion.Major < 8)
2991         return getParser().Error(IDRange.Start, "directive requires gfx8+",
2992                                  IDRange);
2993       if (!isUInt<1>(Val))
2994         return OutOfRangeError(ValRange);
2995       ReserveXNACK = Val;
2996     } else if (ID == ".amdhsa_float_round_mode_32") {
2997       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2998                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
2999     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3000       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3001                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3002     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3003       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3004                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3005     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3006       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3007                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3008                        ValRange);
3009     } else if (ID == ".amdhsa_dx10_clamp") {
3010       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3011                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3012     } else if (ID == ".amdhsa_ieee_mode") {
3013       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3014                        Val, ValRange);
3015     } else if (ID == ".amdhsa_fp16_overflow") {
3016       if (IVersion.Major < 9)
3017         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3018                                  IDRange);
3019       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3020                        ValRange);
3021     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3022       PARSE_BITS_ENTRY(
3023           KD.compute_pgm_rsrc2,
3024           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3025           ValRange);
3026     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3027       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3028                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3029                        Val, ValRange);
3030     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3031       PARSE_BITS_ENTRY(
3032           KD.compute_pgm_rsrc2,
3033           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3034           ValRange);
3035     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3036       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3037                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3038                        Val, ValRange);
3039     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3040       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3041                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3042                        Val, ValRange);
3043     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3044       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3045                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3046                        Val, ValRange);
3047     } else if (ID == ".amdhsa_exception_int_div_zero") {
3048       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3049                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3050                        Val, ValRange);
3051     } else {
3052       return getParser().Error(IDRange.Start,
3053                                "unknown .amdhsa_kernel directive", IDRange);
3054     }
3055 
3056 #undef PARSE_BITS_ENTRY
3057   }
3058 
3059   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3060     return TokError(".amdhsa_next_free_vgpr directive is required");
3061 
3062   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3063     return TokError(".amdhsa_next_free_sgpr directive is required");
3064 
3065   unsigned VGPRBlocks;
3066   unsigned SGPRBlocks;
3067   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3068                          ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
3069                          SGPRRange, VGPRBlocks, SGPRBlocks))
3070     return true;
3071 
3072   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3073           VGPRBlocks))
3074     return OutOfRangeError(VGPRRange);
3075   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3076                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3077 
3078   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3079           SGPRBlocks))
3080     return OutOfRangeError(SGPRRange);
3081   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3082                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3083                   SGPRBlocks);
3084 
3085   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3086     return TokError("too many user SGPRs enabled");
3087   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3088                   UserSGPRCount);
3089 
3090   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3091       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3092       ReserveFlatScr, ReserveXNACK);
3093   return false;
3094 }
3095 
3096 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3097   uint32_t Major;
3098   uint32_t Minor;
3099 
3100   if (ParseDirectiveMajorMinor(Major, Minor))
3101     return true;
3102 
3103   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3104   return false;
3105 }
3106 
3107 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3108   uint32_t Major;
3109   uint32_t Minor;
3110   uint32_t Stepping;
3111   StringRef VendorName;
3112   StringRef ArchName;
3113 
3114   // If this directive has no arguments, then use the ISA version for the
3115   // targeted GPU.
3116   if (getLexer().is(AsmToken::EndOfStatement)) {
3117     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3118     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3119                                                       ISA.Stepping,
3120                                                       "AMD", "AMDGPU");
3121     return false;
3122   }
3123 
3124   if (ParseDirectiveMajorMinor(Major, Minor))
3125     return true;
3126 
3127   if (getLexer().isNot(AsmToken::Comma))
3128     return TokError("stepping version number required, comma expected");
3129   Lex();
3130 
3131   if (ParseAsAbsoluteExpression(Stepping))
3132     return TokError("invalid stepping version");
3133 
3134   if (getLexer().isNot(AsmToken::Comma))
3135     return TokError("vendor name required, comma expected");
3136   Lex();
3137 
3138   if (getLexer().isNot(AsmToken::String))
3139     return TokError("invalid vendor name");
3140 
3141   VendorName = getLexer().getTok().getStringContents();
3142   Lex();
3143 
3144   if (getLexer().isNot(AsmToken::Comma))
3145     return TokError("arch name required, comma expected");
3146   Lex();
3147 
3148   if (getLexer().isNot(AsmToken::String))
3149     return TokError("invalid arch name");
3150 
3151   ArchName = getLexer().getTok().getStringContents();
3152   Lex();
3153 
3154   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3155                                                     VendorName, ArchName);
3156   return false;
3157 }
3158 
3159 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3160                                                amd_kernel_code_t &Header) {
3161   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3162   // assembly for backwards compatibility.
3163   if (ID == "max_scratch_backing_memory_byte_size") {
3164     Parser.eatToEndOfStatement();
3165     return false;
3166   }
3167 
3168   SmallString<40> ErrStr;
3169   raw_svector_ostream Err(ErrStr);
3170   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3171     return TokError(Err.str());
3172   }
3173   Lex();
3174   return false;
3175 }
3176 
3177 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3178   amd_kernel_code_t Header;
3179   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3180 
3181   while (true) {
3182     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3183     // will set the current token to EndOfStatement.
3184     while(getLexer().is(AsmToken::EndOfStatement))
3185       Lex();
3186 
3187     if (getLexer().isNot(AsmToken::Identifier))
3188       return TokError("expected value identifier or .end_amd_kernel_code_t");
3189 
3190     StringRef ID = getLexer().getTok().getIdentifier();
3191     Lex();
3192 
3193     if (ID == ".end_amd_kernel_code_t")
3194       break;
3195 
3196     if (ParseAMDKernelCodeTValue(ID, Header))
3197       return true;
3198   }
3199 
3200   getTargetStreamer().EmitAMDKernelCodeT(Header);
3201 
3202   return false;
3203 }
3204 
3205 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3206   if (getLexer().isNot(AsmToken::Identifier))
3207     return TokError("expected symbol name");
3208 
3209   StringRef KernelName = Parser.getTok().getString();
3210 
3211   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3212                                            ELF::STT_AMDGPU_HSA_KERNEL);
3213   Lex();
3214   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3215     KernelScope.initialize(getContext());
3216   return false;
3217 }
3218 
3219 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3220   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3221     return Error(getParser().getTok().getLoc(),
3222                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3223                  "architectures");
3224   }
3225 
3226   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3227 
3228   std::string ISAVersionStringFromSTI;
3229   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3230   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3231 
3232   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3233     return Error(getParser().getTok().getLoc(),
3234                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3235                  "arguments specified through the command line");
3236   }
3237 
3238   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3239   Lex();
3240 
3241   return false;
3242 }
3243 
3244 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3245   const char *AssemblerDirectiveBegin;
3246   const char *AssemblerDirectiveEnd;
3247   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3248       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3249           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3250                             HSAMD::V3::AssemblerDirectiveEnd)
3251           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3252                             HSAMD::AssemblerDirectiveEnd);
3253 
3254   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3255     return Error(getParser().getTok().getLoc(),
3256                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
3257                  "not available on non-amdhsa OSes")).str());
3258   }
3259 
3260   std::string HSAMetadataString;
3261   raw_string_ostream YamlStream(HSAMetadataString);
3262 
3263   getLexer().setSkipSpace(false);
3264 
3265   bool FoundEnd = false;
3266   while (!getLexer().is(AsmToken::Eof)) {
3267     while (getLexer().is(AsmToken::Space)) {
3268       YamlStream << getLexer().getTok().getString();
3269       Lex();
3270     }
3271 
3272     if (getLexer().is(AsmToken::Identifier)) {
3273       StringRef ID = getLexer().getTok().getIdentifier();
3274       if (ID == AssemblerDirectiveEnd) {
3275         Lex();
3276         FoundEnd = true;
3277         break;
3278       }
3279     }
3280 
3281     YamlStream << Parser.parseStringToEndOfStatement()
3282                << getContext().getAsmInfo()->getSeparatorString();
3283 
3284     Parser.eatToEndOfStatement();
3285   }
3286 
3287   getLexer().setSkipSpace(true);
3288 
3289   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3290     return TokError(Twine("expected directive ") +
3291                     Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found"));
3292   }
3293 
3294   YamlStream.flush();
3295 
3296   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3297     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3298       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3299   } else {
3300     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3301       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3302   }
3303 
3304   return false;
3305 }
3306 
3307 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3308   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3309     return Error(getParser().getTok().getLoc(),
3310                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3311                  "not available on non-amdpal OSes")).str());
3312   }
3313 
3314   PALMD::Metadata PALMetadata;
3315   for (;;) {
3316     uint32_t Value;
3317     if (ParseAsAbsoluteExpression(Value)) {
3318       return TokError(Twine("invalid value in ") +
3319                       Twine(PALMD::AssemblerDirective));
3320     }
3321     PALMetadata.push_back(Value);
3322     if (getLexer().isNot(AsmToken::Comma))
3323       break;
3324     Lex();
3325   }
3326   getTargetStreamer().EmitPALMetadata(PALMetadata);
3327   return false;
3328 }
3329 
3330 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3331   StringRef IDVal = DirectiveID.getString();
3332 
3333   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3334     if (IDVal == ".amdgcn_target")
3335       return ParseDirectiveAMDGCNTarget();
3336 
3337     if (IDVal == ".amdhsa_kernel")
3338       return ParseDirectiveAMDHSAKernel();
3339 
3340     // TODO: Restructure/combine with PAL metadata directive.
3341     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3342       return ParseDirectiveHSAMetadata();
3343   } else {
3344     if (IDVal == ".hsa_code_object_version")
3345       return ParseDirectiveHSACodeObjectVersion();
3346 
3347     if (IDVal == ".hsa_code_object_isa")
3348       return ParseDirectiveHSACodeObjectISA();
3349 
3350     if (IDVal == ".amd_kernel_code_t")
3351       return ParseDirectiveAMDKernelCodeT();
3352 
3353     if (IDVal == ".amdgpu_hsa_kernel")
3354       return ParseDirectiveAMDGPUHsaKernel();
3355 
3356     if (IDVal == ".amd_amdgpu_isa")
3357       return ParseDirectiveISAVersion();
3358 
3359     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3360       return ParseDirectiveHSAMetadata();
3361   }
3362 
3363   if (IDVal == PALMD::AssemblerDirective)
3364     return ParseDirectivePALMetadata();
3365 
3366   return true;
3367 }
3368 
3369 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
3370                                            unsigned RegNo) const {
3371 
3372   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
3373        R.isValid(); ++R) {
3374     if (*R == RegNo)
3375       return isGFX9();
3376   }
3377 
3378   switch (RegNo) {
3379   case AMDGPU::TBA:
3380   case AMDGPU::TBA_LO:
3381   case AMDGPU::TBA_HI:
3382   case AMDGPU::TMA:
3383   case AMDGPU::TMA_LO:
3384   case AMDGPU::TMA_HI:
3385     return !isGFX9();
3386   case AMDGPU::XNACK_MASK:
3387   case AMDGPU::XNACK_MASK_LO:
3388   case AMDGPU::XNACK_MASK_HI:
3389     return !isCI() && !isSI() && hasXNACK();
3390   default:
3391     break;
3392   }
3393 
3394   if (isCI())
3395     return true;
3396 
3397   if (isSI()) {
3398     // No flat_scr
3399     switch (RegNo) {
3400     case AMDGPU::FLAT_SCR:
3401     case AMDGPU::FLAT_SCR_LO:
3402     case AMDGPU::FLAT_SCR_HI:
3403       return false;
3404     default:
3405       return true;
3406     }
3407   }
3408 
3409   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
3410   // SI/CI have.
3411   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
3412        R.isValid(); ++R) {
3413     if (*R == RegNo)
3414       return false;
3415   }
3416 
3417   return true;
3418 }
3419 
3420 OperandMatchResultTy
3421 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
3422   // Try to parse with a custom parser
3423   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
3424 
3425   // If we successfully parsed the operand or if there as an error parsing,
3426   // we are done.
3427   //
3428   // If we are parsing after we reach EndOfStatement then this means we
3429   // are appending default values to the Operands list.  This is only done
3430   // by custom parser, so we shouldn't continue on to the generic parsing.
3431   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
3432       getLexer().is(AsmToken::EndOfStatement))
3433     return ResTy;
3434 
3435   ResTy = parseRegOrImm(Operands);
3436 
3437   if (ResTy == MatchOperand_Success)
3438     return ResTy;
3439 
3440   const auto &Tok = Parser.getTok();
3441   SMLoc S = Tok.getLoc();
3442 
3443   const MCExpr *Expr = nullptr;
3444   if (!Parser.parseExpression(Expr)) {
3445     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3446     return MatchOperand_Success;
3447   }
3448 
3449   // Possibly this is an instruction flag like 'gds'.
3450   if (Tok.getKind() == AsmToken::Identifier) {
3451     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
3452     Parser.Lex();
3453     return MatchOperand_Success;
3454   }
3455 
3456   return MatchOperand_NoMatch;
3457 }
3458 
3459 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
3460   // Clear any forced encodings from the previous instruction.
3461   setForcedEncodingSize(0);
3462   setForcedDPP(false);
3463   setForcedSDWA(false);
3464 
3465   if (Name.endswith("_e64")) {
3466     setForcedEncodingSize(64);
3467     return Name.substr(0, Name.size() - 4);
3468   } else if (Name.endswith("_e32")) {
3469     setForcedEncodingSize(32);
3470     return Name.substr(0, Name.size() - 4);
3471   } else if (Name.endswith("_dpp")) {
3472     setForcedDPP(true);
3473     return Name.substr(0, Name.size() - 4);
3474   } else if (Name.endswith("_sdwa")) {
3475     setForcedSDWA(true);
3476     return Name.substr(0, Name.size() - 5);
3477   }
3478   return Name;
3479 }
3480 
3481 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
3482                                        StringRef Name,
3483                                        SMLoc NameLoc, OperandVector &Operands) {
3484   // Add the instruction mnemonic
3485   Name = parseMnemonicSuffix(Name);
3486   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
3487 
3488   while (!getLexer().is(AsmToken::EndOfStatement)) {
3489     OperandMatchResultTy Res = parseOperand(Operands, Name);
3490 
3491     // Eat the comma or space if there is one.
3492     if (getLexer().is(AsmToken::Comma))
3493       Parser.Lex();
3494 
3495     switch (Res) {
3496       case MatchOperand_Success: break;
3497       case MatchOperand_ParseFail:
3498         Error(getLexer().getLoc(), "failed parsing operand.");
3499         while (!getLexer().is(AsmToken::EndOfStatement)) {
3500           Parser.Lex();
3501         }
3502         return true;
3503       case MatchOperand_NoMatch:
3504         Error(getLexer().getLoc(), "not a valid operand.");
3505         while (!getLexer().is(AsmToken::EndOfStatement)) {
3506           Parser.Lex();
3507         }
3508         return true;
3509     }
3510   }
3511 
3512   return false;
3513 }
3514 
3515 //===----------------------------------------------------------------------===//
3516 // Utility functions
3517 //===----------------------------------------------------------------------===//
3518 
3519 OperandMatchResultTy
3520 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
3521   switch(getLexer().getKind()) {
3522     default: return MatchOperand_NoMatch;
3523     case AsmToken::Identifier: {
3524       StringRef Name = Parser.getTok().getString();
3525       if (!Name.equals(Prefix)) {
3526         return MatchOperand_NoMatch;
3527       }
3528 
3529       Parser.Lex();
3530       if (getLexer().isNot(AsmToken::Colon))
3531         return MatchOperand_ParseFail;
3532 
3533       Parser.Lex();
3534 
3535       bool IsMinus = false;
3536       if (getLexer().getKind() == AsmToken::Minus) {
3537         Parser.Lex();
3538         IsMinus = true;
3539       }
3540 
3541       if (getLexer().isNot(AsmToken::Integer))
3542         return MatchOperand_ParseFail;
3543 
3544       if (getParser().parseAbsoluteExpression(Int))
3545         return MatchOperand_ParseFail;
3546 
3547       if (IsMinus)
3548         Int = -Int;
3549       break;
3550     }
3551   }
3552   return MatchOperand_Success;
3553 }
3554 
3555 OperandMatchResultTy
3556 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
3557                                     AMDGPUOperand::ImmTy ImmTy,
3558                                     bool (*ConvertResult)(int64_t&)) {
3559   SMLoc S = Parser.getTok().getLoc();
3560   int64_t Value = 0;
3561 
3562   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
3563   if (Res != MatchOperand_Success)
3564     return Res;
3565 
3566   if (ConvertResult && !ConvertResult(Value)) {
3567     return MatchOperand_ParseFail;
3568   }
3569 
3570   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
3571   return MatchOperand_Success;
3572 }
3573 
3574 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
3575   const char *Prefix,
3576   OperandVector &Operands,
3577   AMDGPUOperand::ImmTy ImmTy,
3578   bool (*ConvertResult)(int64_t&)) {
3579   StringRef Name = Parser.getTok().getString();
3580   if (!Name.equals(Prefix))
3581     return MatchOperand_NoMatch;
3582 
3583   Parser.Lex();
3584   if (getLexer().isNot(AsmToken::Colon))
3585     return MatchOperand_ParseFail;
3586 
3587   Parser.Lex();
3588   if (getLexer().isNot(AsmToken::LBrac))
3589     return MatchOperand_ParseFail;
3590   Parser.Lex();
3591 
3592   unsigned Val = 0;
3593   SMLoc S = Parser.getTok().getLoc();
3594 
3595   // FIXME: How to verify the number of elements matches the number of src
3596   // operands?
3597   for (int I = 0; I < 4; ++I) {
3598     if (I != 0) {
3599       if (getLexer().is(AsmToken::RBrac))
3600         break;
3601 
3602       if (getLexer().isNot(AsmToken::Comma))
3603         return MatchOperand_ParseFail;
3604       Parser.Lex();
3605     }
3606 
3607     if (getLexer().isNot(AsmToken::Integer))
3608       return MatchOperand_ParseFail;
3609 
3610     int64_t Op;
3611     if (getParser().parseAbsoluteExpression(Op))
3612       return MatchOperand_ParseFail;
3613 
3614     if (Op != 0 && Op != 1)
3615       return MatchOperand_ParseFail;
3616     Val |= (Op << I);
3617   }
3618 
3619   Parser.Lex();
3620   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
3621   return MatchOperand_Success;
3622 }
3623 
3624 OperandMatchResultTy
3625 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
3626                                AMDGPUOperand::ImmTy ImmTy) {
3627   int64_t Bit = 0;
3628   SMLoc S = Parser.getTok().getLoc();
3629 
3630   // We are at the end of the statement, and this is a default argument, so
3631   // use a default value.
3632   if (getLexer().isNot(AsmToken::EndOfStatement)) {
3633     switch(getLexer().getKind()) {
3634       case AsmToken::Identifier: {
3635         StringRef Tok = Parser.getTok().getString();
3636         if (Tok == Name) {
3637           if (Tok == "r128" && isGFX9())
3638             Error(S, "r128 modifier is not supported on this GPU");
3639           if (Tok == "a16" && !isGFX9())
3640             Error(S, "a16 modifier is not supported on this GPU");
3641           Bit = 1;
3642           Parser.Lex();
3643         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
3644           Bit = 0;
3645           Parser.Lex();
3646         } else {
3647           return MatchOperand_NoMatch;
3648         }
3649         break;
3650       }
3651       default:
3652         return MatchOperand_NoMatch;
3653     }
3654   }
3655 
3656   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
3657   return MatchOperand_Success;
3658 }
3659 
3660 static void addOptionalImmOperand(
3661   MCInst& Inst, const OperandVector& Operands,
3662   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
3663   AMDGPUOperand::ImmTy ImmT,
3664   int64_t Default = 0) {
3665   auto i = OptionalIdx.find(ImmT);
3666   if (i != OptionalIdx.end()) {
3667     unsigned Idx = i->second;
3668     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
3669   } else {
3670     Inst.addOperand(MCOperand::createImm(Default));
3671   }
3672 }
3673 
3674 OperandMatchResultTy
3675 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
3676   if (getLexer().isNot(AsmToken::Identifier)) {
3677     return MatchOperand_NoMatch;
3678   }
3679   StringRef Tok = Parser.getTok().getString();
3680   if (Tok != Prefix) {
3681     return MatchOperand_NoMatch;
3682   }
3683 
3684   Parser.Lex();
3685   if (getLexer().isNot(AsmToken::Colon)) {
3686     return MatchOperand_ParseFail;
3687   }
3688 
3689   Parser.Lex();
3690   if (getLexer().isNot(AsmToken::Identifier)) {
3691     return MatchOperand_ParseFail;
3692   }
3693 
3694   Value = Parser.getTok().getString();
3695   return MatchOperand_Success;
3696 }
3697 
3698 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
3699 // values to live in a joint format operand in the MCInst encoding.
3700 OperandMatchResultTy
3701 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
3702   SMLoc S = Parser.getTok().getLoc();
3703   int64_t Dfmt = 0, Nfmt = 0;
3704   // dfmt and nfmt can appear in either order, and each is optional.
3705   bool GotDfmt = false, GotNfmt = false;
3706   while (!GotDfmt || !GotNfmt) {
3707     if (!GotDfmt) {
3708       auto Res = parseIntWithPrefix("dfmt", Dfmt);
3709       if (Res != MatchOperand_NoMatch) {
3710         if (Res != MatchOperand_Success)
3711           return Res;
3712         if (Dfmt >= 16) {
3713           Error(Parser.getTok().getLoc(), "out of range dfmt");
3714           return MatchOperand_ParseFail;
3715         }
3716         GotDfmt = true;
3717         Parser.Lex();
3718         continue;
3719       }
3720     }
3721     if (!GotNfmt) {
3722       auto Res = parseIntWithPrefix("nfmt", Nfmt);
3723       if (Res != MatchOperand_NoMatch) {
3724         if (Res != MatchOperand_Success)
3725           return Res;
3726         if (Nfmt >= 8) {
3727           Error(Parser.getTok().getLoc(), "out of range nfmt");
3728           return MatchOperand_ParseFail;
3729         }
3730         GotNfmt = true;
3731         Parser.Lex();
3732         continue;
3733       }
3734     }
3735     break;
3736   }
3737   if (!GotDfmt && !GotNfmt)
3738     return MatchOperand_NoMatch;
3739   auto Format = Dfmt | Nfmt << 4;
3740   Operands.push_back(
3741       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
3742   return MatchOperand_Success;
3743 }
3744 
3745 //===----------------------------------------------------------------------===//
3746 // ds
3747 //===----------------------------------------------------------------------===//
3748 
3749 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
3750                                     const OperandVector &Operands) {
3751   OptionalImmIndexMap OptionalIdx;
3752 
3753   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3754     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3755 
3756     // Add the register arguments
3757     if (Op.isReg()) {
3758       Op.addRegOperands(Inst, 1);
3759       continue;
3760     }
3761 
3762     // Handle optional arguments
3763     OptionalIdx[Op.getImmTy()] = i;
3764   }
3765 
3766   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
3767   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
3768   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3769 
3770   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3771 }
3772 
3773 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
3774                                 bool IsGdsHardcoded) {
3775   OptionalImmIndexMap OptionalIdx;
3776 
3777   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3778     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3779 
3780     // Add the register arguments
3781     if (Op.isReg()) {
3782       Op.addRegOperands(Inst, 1);
3783       continue;
3784     }
3785 
3786     if (Op.isToken() && Op.getToken() == "gds") {
3787       IsGdsHardcoded = true;
3788       continue;
3789     }
3790 
3791     // Handle optional arguments
3792     OptionalIdx[Op.getImmTy()] = i;
3793   }
3794 
3795   AMDGPUOperand::ImmTy OffsetType =
3796     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
3797      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
3798                                                       AMDGPUOperand::ImmTyOffset;
3799 
3800   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
3801 
3802   if (!IsGdsHardcoded) {
3803     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3804   }
3805   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3806 }
3807 
3808 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
3809   OptionalImmIndexMap OptionalIdx;
3810 
3811   unsigned OperandIdx[4];
3812   unsigned EnMask = 0;
3813   int SrcIdx = 0;
3814 
3815   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3816     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3817 
3818     // Add the register arguments
3819     if (Op.isReg()) {
3820       assert(SrcIdx < 4);
3821       OperandIdx[SrcIdx] = Inst.size();
3822       Op.addRegOperands(Inst, 1);
3823       ++SrcIdx;
3824       continue;
3825     }
3826 
3827     if (Op.isOff()) {
3828       assert(SrcIdx < 4);
3829       OperandIdx[SrcIdx] = Inst.size();
3830       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
3831       ++SrcIdx;
3832       continue;
3833     }
3834 
3835     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
3836       Op.addImmOperands(Inst, 1);
3837       continue;
3838     }
3839 
3840     if (Op.isToken() && Op.getToken() == "done")
3841       continue;
3842 
3843     // Handle optional arguments
3844     OptionalIdx[Op.getImmTy()] = i;
3845   }
3846 
3847   assert(SrcIdx == 4);
3848 
3849   bool Compr = false;
3850   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
3851     Compr = true;
3852     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
3853     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
3854     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
3855   }
3856 
3857   for (auto i = 0; i < SrcIdx; ++i) {
3858     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
3859       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
3860     }
3861   }
3862 
3863   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
3864   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
3865 
3866   Inst.addOperand(MCOperand::createImm(EnMask));
3867 }
3868 
3869 //===----------------------------------------------------------------------===//
3870 // s_waitcnt
3871 //===----------------------------------------------------------------------===//
3872 
3873 static bool
3874 encodeCnt(
3875   const AMDGPU::IsaVersion ISA,
3876   int64_t &IntVal,
3877   int64_t CntVal,
3878   bool Saturate,
3879   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
3880   unsigned (*decode)(const IsaVersion &Version, unsigned))
3881 {
3882   bool Failed = false;
3883 
3884   IntVal = encode(ISA, IntVal, CntVal);
3885   if (CntVal != decode(ISA, IntVal)) {
3886     if (Saturate) {
3887       IntVal = encode(ISA, IntVal, -1);
3888     } else {
3889       Failed = true;
3890     }
3891   }
3892   return Failed;
3893 }
3894 
3895 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
3896   StringRef CntName = Parser.getTok().getString();
3897   int64_t CntVal;
3898 
3899   Parser.Lex();
3900   if (getLexer().isNot(AsmToken::LParen))
3901     return true;
3902 
3903   Parser.Lex();
3904   if (getLexer().isNot(AsmToken::Integer))
3905     return true;
3906 
3907   SMLoc ValLoc = Parser.getTok().getLoc();
3908   if (getParser().parseAbsoluteExpression(CntVal))
3909     return true;
3910 
3911   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3912 
3913   bool Failed = true;
3914   bool Sat = CntName.endswith("_sat");
3915 
3916   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
3917     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
3918   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
3919     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
3920   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
3921     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
3922   }
3923 
3924   if (Failed) {
3925     Error(ValLoc, "too large value for " + CntName);
3926     return true;
3927   }
3928 
3929   if (getLexer().isNot(AsmToken::RParen)) {
3930     return true;
3931   }
3932 
3933   Parser.Lex();
3934   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
3935     const AsmToken NextToken = getLexer().peekTok();
3936     if (NextToken.is(AsmToken::Identifier)) {
3937       Parser.Lex();
3938     }
3939   }
3940 
3941   return false;
3942 }
3943 
3944 OperandMatchResultTy
3945 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
3946   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3947   int64_t Waitcnt = getWaitcntBitMask(ISA);
3948   SMLoc S = Parser.getTok().getLoc();
3949 
3950   switch(getLexer().getKind()) {
3951     default: return MatchOperand_ParseFail;
3952     case AsmToken::Integer:
3953       // The operand can be an integer value.
3954       if (getParser().parseAbsoluteExpression(Waitcnt))
3955         return MatchOperand_ParseFail;
3956       break;
3957 
3958     case AsmToken::Identifier:
3959       do {
3960         if (parseCnt(Waitcnt))
3961           return MatchOperand_ParseFail;
3962       } while(getLexer().isNot(AsmToken::EndOfStatement));
3963       break;
3964   }
3965   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
3966   return MatchOperand_Success;
3967 }
3968 
3969 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
3970                                           int64_t &Width) {
3971   using namespace llvm::AMDGPU::Hwreg;
3972 
3973   if (Parser.getTok().getString() != "hwreg")
3974     return true;
3975   Parser.Lex();
3976 
3977   if (getLexer().isNot(AsmToken::LParen))
3978     return true;
3979   Parser.Lex();
3980 
3981   if (getLexer().is(AsmToken::Identifier)) {
3982     HwReg.IsSymbolic = true;
3983     HwReg.Id = ID_UNKNOWN_;
3984     const StringRef tok = Parser.getTok().getString();
3985     int Last = ID_SYMBOLIC_LAST_;
3986     if (isSI() || isCI() || isVI())
3987       Last = ID_SYMBOLIC_FIRST_GFX9_;
3988     for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
3989       if (tok == IdSymbolic[i]) {
3990         HwReg.Id = i;
3991         break;
3992       }
3993     }
3994     Parser.Lex();
3995   } else {
3996     HwReg.IsSymbolic = false;
3997     if (getLexer().isNot(AsmToken::Integer))
3998       return true;
3999     if (getParser().parseAbsoluteExpression(HwReg.Id))
4000       return true;
4001   }
4002 
4003   if (getLexer().is(AsmToken::RParen)) {
4004     Parser.Lex();
4005     return false;
4006   }
4007 
4008   // optional params
4009   if (getLexer().isNot(AsmToken::Comma))
4010     return true;
4011   Parser.Lex();
4012 
4013   if (getLexer().isNot(AsmToken::Integer))
4014     return true;
4015   if (getParser().parseAbsoluteExpression(Offset))
4016     return true;
4017 
4018   if (getLexer().isNot(AsmToken::Comma))
4019     return true;
4020   Parser.Lex();
4021 
4022   if (getLexer().isNot(AsmToken::Integer))
4023     return true;
4024   if (getParser().parseAbsoluteExpression(Width))
4025     return true;
4026 
4027   if (getLexer().isNot(AsmToken::RParen))
4028     return true;
4029   Parser.Lex();
4030 
4031   return false;
4032 }
4033 
4034 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4035   using namespace llvm::AMDGPU::Hwreg;
4036 
4037   int64_t Imm16Val = 0;
4038   SMLoc S = Parser.getTok().getLoc();
4039 
4040   switch(getLexer().getKind()) {
4041     default: return MatchOperand_NoMatch;
4042     case AsmToken::Integer:
4043       // The operand can be an integer value.
4044       if (getParser().parseAbsoluteExpression(Imm16Val))
4045         return MatchOperand_NoMatch;
4046       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4047         Error(S, "invalid immediate: only 16-bit values are legal");
4048         // Do not return error code, but create an imm operand anyway and proceed
4049         // to the next operand, if any. That avoids unneccessary error messages.
4050       }
4051       break;
4052 
4053     case AsmToken::Identifier: {
4054         OperandInfoTy HwReg(ID_UNKNOWN_);
4055         int64_t Offset = OFFSET_DEFAULT_;
4056         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
4057         if (parseHwregConstruct(HwReg, Offset, Width))
4058           return MatchOperand_ParseFail;
4059         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
4060           if (HwReg.IsSymbolic)
4061             Error(S, "invalid symbolic name of hardware register");
4062           else
4063             Error(S, "invalid code of hardware register: only 6-bit values are legal");
4064         }
4065         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
4066           Error(S, "invalid bit offset: only 5-bit values are legal");
4067         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
4068           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
4069         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
4070       }
4071       break;
4072   }
4073   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
4074   return MatchOperand_Success;
4075 }
4076 
4077 bool AMDGPUOperand::isSWaitCnt() const {
4078   return isImm();
4079 }
4080 
4081 bool AMDGPUOperand::isHwreg() const {
4082   return isImmTy(ImmTyHwreg);
4083 }
4084 
4085 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
4086   using namespace llvm::AMDGPU::SendMsg;
4087 
4088   if (Parser.getTok().getString() != "sendmsg")
4089     return true;
4090   Parser.Lex();
4091 
4092   if (getLexer().isNot(AsmToken::LParen))
4093     return true;
4094   Parser.Lex();
4095 
4096   if (getLexer().is(AsmToken::Identifier)) {
4097     Msg.IsSymbolic = true;
4098     Msg.Id = ID_UNKNOWN_;
4099     const std::string tok = Parser.getTok().getString();
4100     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
4101       switch(i) {
4102         default: continue; // Omit gaps.
4103         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:  case ID_SYSMSG: break;
4104       }
4105       if (tok == IdSymbolic[i]) {
4106         Msg.Id = i;
4107         break;
4108       }
4109     }
4110     Parser.Lex();
4111   } else {
4112     Msg.IsSymbolic = false;
4113     if (getLexer().isNot(AsmToken::Integer))
4114       return true;
4115     if (getParser().parseAbsoluteExpression(Msg.Id))
4116       return true;
4117     if (getLexer().is(AsmToken::Integer))
4118       if (getParser().parseAbsoluteExpression(Msg.Id))
4119         Msg.Id = ID_UNKNOWN_;
4120   }
4121   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
4122     return false;
4123 
4124   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
4125     if (getLexer().isNot(AsmToken::RParen))
4126       return true;
4127     Parser.Lex();
4128     return false;
4129   }
4130 
4131   if (getLexer().isNot(AsmToken::Comma))
4132     return true;
4133   Parser.Lex();
4134 
4135   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
4136   Operation.Id = ID_UNKNOWN_;
4137   if (getLexer().is(AsmToken::Identifier)) {
4138     Operation.IsSymbolic = true;
4139     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
4140     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
4141     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
4142     const StringRef Tok = Parser.getTok().getString();
4143     for (int i = F; i < L; ++i) {
4144       if (Tok == S[i]) {
4145         Operation.Id = i;
4146         break;
4147       }
4148     }
4149     Parser.Lex();
4150   } else {
4151     Operation.IsSymbolic = false;
4152     if (getLexer().isNot(AsmToken::Integer))
4153       return true;
4154     if (getParser().parseAbsoluteExpression(Operation.Id))
4155       return true;
4156   }
4157 
4158   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4159     // Stream id is optional.
4160     if (getLexer().is(AsmToken::RParen)) {
4161       Parser.Lex();
4162       return false;
4163     }
4164 
4165     if (getLexer().isNot(AsmToken::Comma))
4166       return true;
4167     Parser.Lex();
4168 
4169     if (getLexer().isNot(AsmToken::Integer))
4170       return true;
4171     if (getParser().parseAbsoluteExpression(StreamId))
4172       return true;
4173   }
4174 
4175   if (getLexer().isNot(AsmToken::RParen))
4176     return true;
4177   Parser.Lex();
4178   return false;
4179 }
4180 
4181 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
4182   if (getLexer().getKind() != AsmToken::Identifier)
4183     return MatchOperand_NoMatch;
4184 
4185   StringRef Str = Parser.getTok().getString();
4186   int Slot = StringSwitch<int>(Str)
4187     .Case("p10", 0)
4188     .Case("p20", 1)
4189     .Case("p0", 2)
4190     .Default(-1);
4191 
4192   SMLoc S = Parser.getTok().getLoc();
4193   if (Slot == -1)
4194     return MatchOperand_ParseFail;
4195 
4196   Parser.Lex();
4197   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
4198                                               AMDGPUOperand::ImmTyInterpSlot));
4199   return MatchOperand_Success;
4200 }
4201 
4202 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
4203   if (getLexer().getKind() != AsmToken::Identifier)
4204     return MatchOperand_NoMatch;
4205 
4206   StringRef Str = Parser.getTok().getString();
4207   if (!Str.startswith("attr"))
4208     return MatchOperand_NoMatch;
4209 
4210   StringRef Chan = Str.take_back(2);
4211   int AttrChan = StringSwitch<int>(Chan)
4212     .Case(".x", 0)
4213     .Case(".y", 1)
4214     .Case(".z", 2)
4215     .Case(".w", 3)
4216     .Default(-1);
4217   if (AttrChan == -1)
4218     return MatchOperand_ParseFail;
4219 
4220   Str = Str.drop_back(2).drop_front(4);
4221 
4222   uint8_t Attr;
4223   if (Str.getAsInteger(10, Attr))
4224     return MatchOperand_ParseFail;
4225 
4226   SMLoc S = Parser.getTok().getLoc();
4227   Parser.Lex();
4228   if (Attr > 63) {
4229     Error(S, "out of bounds attr");
4230     return MatchOperand_Success;
4231   }
4232 
4233   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4234 
4235   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4236                                               AMDGPUOperand::ImmTyInterpAttr));
4237   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4238                                               AMDGPUOperand::ImmTyAttrChan));
4239   return MatchOperand_Success;
4240 }
4241 
4242 void AMDGPUAsmParser::errorExpTgt() {
4243   Error(Parser.getTok().getLoc(), "invalid exp target");
4244 }
4245 
4246 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4247                                                       uint8_t &Val) {
4248   if (Str == "null") {
4249     Val = 9;
4250     return MatchOperand_Success;
4251   }
4252 
4253   if (Str.startswith("mrt")) {
4254     Str = Str.drop_front(3);
4255     if (Str == "z") { // == mrtz
4256       Val = 8;
4257       return MatchOperand_Success;
4258     }
4259 
4260     if (Str.getAsInteger(10, Val))
4261       return MatchOperand_ParseFail;
4262 
4263     if (Val > 7)
4264       errorExpTgt();
4265 
4266     return MatchOperand_Success;
4267   }
4268 
4269   if (Str.startswith("pos")) {
4270     Str = Str.drop_front(3);
4271     if (Str.getAsInteger(10, Val))
4272       return MatchOperand_ParseFail;
4273 
4274     if (Val > 3)
4275       errorExpTgt();
4276 
4277     Val += 12;
4278     return MatchOperand_Success;
4279   }
4280 
4281   if (Str.startswith("param")) {
4282     Str = Str.drop_front(5);
4283     if (Str.getAsInteger(10, Val))
4284       return MatchOperand_ParseFail;
4285 
4286     if (Val >= 32)
4287       errorExpTgt();
4288 
4289     Val += 32;
4290     return MatchOperand_Success;
4291   }
4292 
4293   if (Str.startswith("invalid_target_")) {
4294     Str = Str.drop_front(15);
4295     if (Str.getAsInteger(10, Val))
4296       return MatchOperand_ParseFail;
4297 
4298     errorExpTgt();
4299     return MatchOperand_Success;
4300   }
4301 
4302   return MatchOperand_NoMatch;
4303 }
4304 
4305 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4306   uint8_t Val;
4307   StringRef Str = Parser.getTok().getString();
4308 
4309   auto Res = parseExpTgtImpl(Str, Val);
4310   if (Res != MatchOperand_Success)
4311     return Res;
4312 
4313   SMLoc S = Parser.getTok().getLoc();
4314   Parser.Lex();
4315 
4316   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4317                                               AMDGPUOperand::ImmTyExpTgt));
4318   return MatchOperand_Success;
4319 }
4320 
4321 OperandMatchResultTy
4322 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4323   using namespace llvm::AMDGPU::SendMsg;
4324 
4325   int64_t Imm16Val = 0;
4326   SMLoc S = Parser.getTok().getLoc();
4327 
4328   switch(getLexer().getKind()) {
4329   default:
4330     return MatchOperand_NoMatch;
4331   case AsmToken::Integer:
4332     // The operand can be an integer value.
4333     if (getParser().parseAbsoluteExpression(Imm16Val))
4334       return MatchOperand_NoMatch;
4335     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4336       Error(S, "invalid immediate: only 16-bit values are legal");
4337       // Do not return error code, but create an imm operand anyway and proceed
4338       // to the next operand, if any. That avoids unneccessary error messages.
4339     }
4340     break;
4341   case AsmToken::Identifier: {
4342       OperandInfoTy Msg(ID_UNKNOWN_);
4343       OperandInfoTy Operation(OP_UNKNOWN_);
4344       int64_t StreamId = STREAM_ID_DEFAULT_;
4345       if (parseSendMsgConstruct(Msg, Operation, StreamId))
4346         return MatchOperand_ParseFail;
4347       do {
4348         // Validate and encode message ID.
4349         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4350                 || Msg.Id == ID_SYSMSG)) {
4351           if (Msg.IsSymbolic)
4352             Error(S, "invalid/unsupported symbolic name of message");
4353           else
4354             Error(S, "invalid/unsupported code of message");
4355           break;
4356         }
4357         Imm16Val = (Msg.Id << ID_SHIFT_);
4358         // Validate and encode operation ID.
4359         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4360           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4361             if (Operation.IsSymbolic)
4362               Error(S, "invalid symbolic name of GS_OP");
4363             else
4364               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4365             break;
4366           }
4367           if (Operation.Id == OP_GS_NOP
4368               && Msg.Id != ID_GS_DONE) {
4369             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
4370             break;
4371           }
4372           Imm16Val |= (Operation.Id << OP_SHIFT_);
4373         }
4374         if (Msg.Id == ID_SYSMSG) {
4375           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
4376             if (Operation.IsSymbolic)
4377               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
4378             else
4379               Error(S, "invalid/unsupported code of SYSMSG_OP");
4380             break;
4381           }
4382           Imm16Val |= (Operation.Id << OP_SHIFT_);
4383         }
4384         // Validate and encode stream ID.
4385         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4386           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
4387             Error(S, "invalid stream id: only 2-bit values are legal");
4388             break;
4389           }
4390           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
4391         }
4392       } while (false);
4393     }
4394     break;
4395   }
4396   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
4397   return MatchOperand_Success;
4398 }
4399 
4400 bool AMDGPUOperand::isSendMsg() const {
4401   return isImmTy(ImmTySendMsg);
4402 }
4403 
4404 //===----------------------------------------------------------------------===//
4405 // parser helpers
4406 //===----------------------------------------------------------------------===//
4407 
4408 bool
4409 AMDGPUAsmParser::trySkipId(const StringRef Id) {
4410   if (getLexer().getKind() == AsmToken::Identifier &&
4411       Parser.getTok().getString() == Id) {
4412     Parser.Lex();
4413     return true;
4414   }
4415   return false;
4416 }
4417 
4418 bool
4419 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
4420   if (getLexer().getKind() == Kind) {
4421     Parser.Lex();
4422     return true;
4423   }
4424   return false;
4425 }
4426 
4427 bool
4428 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
4429                            const StringRef ErrMsg) {
4430   if (!trySkipToken(Kind)) {
4431     Error(Parser.getTok().getLoc(), ErrMsg);
4432     return false;
4433   }
4434   return true;
4435 }
4436 
4437 bool
4438 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
4439   return !getParser().parseAbsoluteExpression(Imm);
4440 }
4441 
4442 bool
4443 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
4444   SMLoc S = Parser.getTok().getLoc();
4445   if (getLexer().getKind() == AsmToken::String) {
4446     Val = Parser.getTok().getStringContents();
4447     Parser.Lex();
4448     return true;
4449   } else {
4450     Error(S, ErrMsg);
4451     return false;
4452   }
4453 }
4454 
4455 //===----------------------------------------------------------------------===//
4456 // swizzle
4457 //===----------------------------------------------------------------------===//
4458 
4459 LLVM_READNONE
4460 static unsigned
4461 encodeBitmaskPerm(const unsigned AndMask,
4462                   const unsigned OrMask,
4463                   const unsigned XorMask) {
4464   using namespace llvm::AMDGPU::Swizzle;
4465 
4466   return BITMASK_PERM_ENC |
4467          (AndMask << BITMASK_AND_SHIFT) |
4468          (OrMask  << BITMASK_OR_SHIFT)  |
4469          (XorMask << BITMASK_XOR_SHIFT);
4470 }
4471 
4472 bool
4473 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
4474                                       const unsigned MinVal,
4475                                       const unsigned MaxVal,
4476                                       const StringRef ErrMsg) {
4477   for (unsigned i = 0; i < OpNum; ++i) {
4478     if (!skipToken(AsmToken::Comma, "expected a comma")){
4479       return false;
4480     }
4481     SMLoc ExprLoc = Parser.getTok().getLoc();
4482     if (!parseExpr(Op[i])) {
4483       return false;
4484     }
4485     if (Op[i] < MinVal || Op[i] > MaxVal) {
4486       Error(ExprLoc, ErrMsg);
4487       return false;
4488     }
4489   }
4490 
4491   return true;
4492 }
4493 
4494 bool
4495 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
4496   using namespace llvm::AMDGPU::Swizzle;
4497 
4498   int64_t Lane[LANE_NUM];
4499   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
4500                            "expected a 2-bit lane id")) {
4501     Imm = QUAD_PERM_ENC;
4502     for (unsigned I = 0; I < LANE_NUM; ++I) {
4503       Imm |= Lane[I] << (LANE_SHIFT * I);
4504     }
4505     return true;
4506   }
4507   return false;
4508 }
4509 
4510 bool
4511 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
4512   using namespace llvm::AMDGPU::Swizzle;
4513 
4514   SMLoc S = Parser.getTok().getLoc();
4515   int64_t GroupSize;
4516   int64_t LaneIdx;
4517 
4518   if (!parseSwizzleOperands(1, &GroupSize,
4519                             2, 32,
4520                             "group size must be in the interval [2,32]")) {
4521     return false;
4522   }
4523   if (!isPowerOf2_64(GroupSize)) {
4524     Error(S, "group size must be a power of two");
4525     return false;
4526   }
4527   if (parseSwizzleOperands(1, &LaneIdx,
4528                            0, GroupSize - 1,
4529                            "lane id must be in the interval [0,group size - 1]")) {
4530     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
4531     return true;
4532   }
4533   return false;
4534 }
4535 
4536 bool
4537 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
4538   using namespace llvm::AMDGPU::Swizzle;
4539 
4540   SMLoc S = Parser.getTok().getLoc();
4541   int64_t GroupSize;
4542 
4543   if (!parseSwizzleOperands(1, &GroupSize,
4544       2, 32, "group size must be in the interval [2,32]")) {
4545     return false;
4546   }
4547   if (!isPowerOf2_64(GroupSize)) {
4548     Error(S, "group size must be a power of two");
4549     return false;
4550   }
4551 
4552   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
4553   return true;
4554 }
4555 
4556 bool
4557 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
4558   using namespace llvm::AMDGPU::Swizzle;
4559 
4560   SMLoc S = Parser.getTok().getLoc();
4561   int64_t GroupSize;
4562 
4563   if (!parseSwizzleOperands(1, &GroupSize,
4564       1, 16, "group size must be in the interval [1,16]")) {
4565     return false;
4566   }
4567   if (!isPowerOf2_64(GroupSize)) {
4568     Error(S, "group size must be a power of two");
4569     return false;
4570   }
4571 
4572   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
4573   return true;
4574 }
4575 
4576 bool
4577 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
4578   using namespace llvm::AMDGPU::Swizzle;
4579 
4580   if (!skipToken(AsmToken::Comma, "expected a comma")) {
4581     return false;
4582   }
4583 
4584   StringRef Ctl;
4585   SMLoc StrLoc = Parser.getTok().getLoc();
4586   if (!parseString(Ctl)) {
4587     return false;
4588   }
4589   if (Ctl.size() != BITMASK_WIDTH) {
4590     Error(StrLoc, "expected a 5-character mask");
4591     return false;
4592   }
4593 
4594   unsigned AndMask = 0;
4595   unsigned OrMask = 0;
4596   unsigned XorMask = 0;
4597 
4598   for (size_t i = 0; i < Ctl.size(); ++i) {
4599     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
4600     switch(Ctl[i]) {
4601     default:
4602       Error(StrLoc, "invalid mask");
4603       return false;
4604     case '0':
4605       break;
4606     case '1':
4607       OrMask |= Mask;
4608       break;
4609     case 'p':
4610       AndMask |= Mask;
4611       break;
4612     case 'i':
4613       AndMask |= Mask;
4614       XorMask |= Mask;
4615       break;
4616     }
4617   }
4618 
4619   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
4620   return true;
4621 }
4622 
4623 bool
4624 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
4625 
4626   SMLoc OffsetLoc = Parser.getTok().getLoc();
4627 
4628   if (!parseExpr(Imm)) {
4629     return false;
4630   }
4631   if (!isUInt<16>(Imm)) {
4632     Error(OffsetLoc, "expected a 16-bit offset");
4633     return false;
4634   }
4635   return true;
4636 }
4637 
4638 bool
4639 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
4640   using namespace llvm::AMDGPU::Swizzle;
4641 
4642   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
4643 
4644     SMLoc ModeLoc = Parser.getTok().getLoc();
4645     bool Ok = false;
4646 
4647     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
4648       Ok = parseSwizzleQuadPerm(Imm);
4649     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
4650       Ok = parseSwizzleBitmaskPerm(Imm);
4651     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
4652       Ok = parseSwizzleBroadcast(Imm);
4653     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
4654       Ok = parseSwizzleSwap(Imm);
4655     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
4656       Ok = parseSwizzleReverse(Imm);
4657     } else {
4658       Error(ModeLoc, "expected a swizzle mode");
4659     }
4660 
4661     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
4662   }
4663 
4664   return false;
4665 }
4666 
4667 OperandMatchResultTy
4668 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
4669   SMLoc S = Parser.getTok().getLoc();
4670   int64_t Imm = 0;
4671 
4672   if (trySkipId("offset")) {
4673 
4674     bool Ok = false;
4675     if (skipToken(AsmToken::Colon, "expected a colon")) {
4676       if (trySkipId("swizzle")) {
4677         Ok = parseSwizzleMacro(Imm);
4678       } else {
4679         Ok = parseSwizzleOffset(Imm);
4680       }
4681     }
4682 
4683     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
4684 
4685     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
4686   } else {
4687     // Swizzle "offset" operand is optional.
4688     // If it is omitted, try parsing other optional operands.
4689     return parseOptionalOpr(Operands);
4690   }
4691 }
4692 
4693 bool
4694 AMDGPUOperand::isSwizzle() const {
4695   return isImmTy(ImmTySwizzle);
4696 }
4697 
4698 //===----------------------------------------------------------------------===//
4699 // VGPR Index Mode
4700 //===----------------------------------------------------------------------===//
4701 
4702 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
4703 
4704   using namespace llvm::AMDGPU::VGPRIndexMode;
4705 
4706   if (trySkipToken(AsmToken::RParen)) {
4707     return OFF;
4708   }
4709 
4710   int64_t Imm = 0;
4711 
4712   while (true) {
4713     unsigned Mode = 0;
4714     SMLoc S = Parser.getTok().getLoc();
4715 
4716     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
4717       if (trySkipId(IdSymbolic[ModeId])) {
4718         Mode = 1 << ModeId;
4719         break;
4720       }
4721     }
4722 
4723     if (Mode == 0) {
4724       Error(S, (Imm == 0)?
4725                "expected a VGPR index mode or a closing parenthesis" :
4726                "expected a VGPR index mode");
4727       break;
4728     }
4729 
4730     if (Imm & Mode) {
4731       Error(S, "duplicate VGPR index mode");
4732       break;
4733     }
4734     Imm |= Mode;
4735 
4736     if (trySkipToken(AsmToken::RParen))
4737       break;
4738     if (!skipToken(AsmToken::Comma,
4739                    "expected a comma or a closing parenthesis"))
4740       break;
4741   }
4742 
4743   return Imm;
4744 }
4745 
4746 OperandMatchResultTy
4747 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
4748 
4749   int64_t Imm = 0;
4750   SMLoc S = Parser.getTok().getLoc();
4751 
4752   if (getLexer().getKind() == AsmToken::Identifier &&
4753       Parser.getTok().getString() == "gpr_idx" &&
4754       getLexer().peekTok().is(AsmToken::LParen)) {
4755 
4756     Parser.Lex();
4757     Parser.Lex();
4758 
4759     // If parse failed, trigger an error but do not return error code
4760     // to avoid excessive error messages.
4761     Imm = parseGPRIdxMacro();
4762 
4763   } else {
4764     if (getParser().parseAbsoluteExpression(Imm))
4765       return MatchOperand_NoMatch;
4766     if (Imm < 0 || !isUInt<4>(Imm)) {
4767       Error(S, "invalid immediate: only 4-bit values are legal");
4768     }
4769   }
4770 
4771   Operands.push_back(
4772       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
4773   return MatchOperand_Success;
4774 }
4775 
4776 bool AMDGPUOperand::isGPRIdxMode() const {
4777   return isImmTy(ImmTyGprIdxMode);
4778 }
4779 
4780 //===----------------------------------------------------------------------===//
4781 // sopp branch targets
4782 //===----------------------------------------------------------------------===//
4783 
4784 OperandMatchResultTy
4785 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
4786   SMLoc S = Parser.getTok().getLoc();
4787 
4788   switch (getLexer().getKind()) {
4789     default: return MatchOperand_ParseFail;
4790     case AsmToken::Integer: {
4791       int64_t Imm;
4792       if (getParser().parseAbsoluteExpression(Imm))
4793         return MatchOperand_ParseFail;
4794       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
4795       return MatchOperand_Success;
4796     }
4797 
4798     case AsmToken::Identifier:
4799       Operands.push_back(AMDGPUOperand::CreateExpr(this,
4800           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
4801                                   Parser.getTok().getString()), getContext()), S));
4802       Parser.Lex();
4803       return MatchOperand_Success;
4804   }
4805 }
4806 
4807 //===----------------------------------------------------------------------===//
4808 // mubuf
4809 //===----------------------------------------------------------------------===//
4810 
4811 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
4812   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
4813 }
4814 
4815 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
4816   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
4817 }
4818 
4819 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
4820                                const OperandVector &Operands,
4821                                bool IsAtomic,
4822                                bool IsAtomicReturn,
4823                                bool IsLds) {
4824   bool IsLdsOpcode = IsLds;
4825   bool HasLdsModifier = false;
4826   OptionalImmIndexMap OptionalIdx;
4827   assert(IsAtomicReturn ? IsAtomic : true);
4828 
4829   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4830     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4831 
4832     // Add the register arguments
4833     if (Op.isReg()) {
4834       Op.addRegOperands(Inst, 1);
4835       continue;
4836     }
4837 
4838     // Handle the case where soffset is an immediate
4839     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
4840       Op.addImmOperands(Inst, 1);
4841       continue;
4842     }
4843 
4844     HasLdsModifier = Op.isLDS();
4845 
4846     // Handle tokens like 'offen' which are sometimes hard-coded into the
4847     // asm string.  There are no MCInst operands for these.
4848     if (Op.isToken()) {
4849       continue;
4850     }
4851     assert(Op.isImm());
4852 
4853     // Handle optional arguments
4854     OptionalIdx[Op.getImmTy()] = i;
4855   }
4856 
4857   // This is a workaround for an llvm quirk which may result in an
4858   // incorrect instruction selection. Lds and non-lds versions of
4859   // MUBUF instructions are identical except that lds versions
4860   // have mandatory 'lds' modifier. However this modifier follows
4861   // optional modifiers and llvm asm matcher regards this 'lds'
4862   // modifier as an optional one. As a result, an lds version
4863   // of opcode may be selected even if it has no 'lds' modifier.
4864   if (IsLdsOpcode && !HasLdsModifier) {
4865     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
4866     if (NoLdsOpcode != -1) { // Got lds version - correct it.
4867       Inst.setOpcode(NoLdsOpcode);
4868       IsLdsOpcode = false;
4869     }
4870   }
4871 
4872   // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns.
4873   if (IsAtomicReturn) {
4874     MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning.
4875     Inst.insert(I, *I);
4876   }
4877 
4878   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
4879   if (!IsAtomic) { // glc is hard-coded.
4880     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4881   }
4882   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4883 
4884   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
4885     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4886   }
4887 }
4888 
4889 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
4890   OptionalImmIndexMap OptionalIdx;
4891 
4892   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4893     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4894 
4895     // Add the register arguments
4896     if (Op.isReg()) {
4897       Op.addRegOperands(Inst, 1);
4898       continue;
4899     }
4900 
4901     // Handle the case where soffset is an immediate
4902     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
4903       Op.addImmOperands(Inst, 1);
4904       continue;
4905     }
4906 
4907     // Handle tokens like 'offen' which are sometimes hard-coded into the
4908     // asm string.  There are no MCInst operands for these.
4909     if (Op.isToken()) {
4910       continue;
4911     }
4912     assert(Op.isImm());
4913 
4914     // Handle optional arguments
4915     OptionalIdx[Op.getImmTy()] = i;
4916   }
4917 
4918   addOptionalImmOperand(Inst, Operands, OptionalIdx,
4919                         AMDGPUOperand::ImmTyOffset);
4920   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
4921   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4922   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4923   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4924 }
4925 
4926 //===----------------------------------------------------------------------===//
4927 // mimg
4928 //===----------------------------------------------------------------------===//
4929 
4930 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
4931                               bool IsAtomic) {
4932   unsigned I = 1;
4933   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4934   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4935     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4936   }
4937 
4938   if (IsAtomic) {
4939     // Add src, same as dst
4940     assert(Desc.getNumDefs() == 1);
4941     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
4942   }
4943 
4944   OptionalImmIndexMap OptionalIdx;
4945 
4946   for (unsigned E = Operands.size(); I != E; ++I) {
4947     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4948 
4949     // Add the register arguments
4950     if (Op.isReg()) {
4951       Op.addRegOperands(Inst, 1);
4952     } else if (Op.isImmModifier()) {
4953       OptionalIdx[Op.getImmTy()] = I;
4954     } else {
4955       llvm_unreachable("unexpected operand type");
4956     }
4957   }
4958 
4959   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
4960   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
4961   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4962   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4963   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
4964   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4965   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
4966   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
4967   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
4968 }
4969 
4970 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
4971   cvtMIMG(Inst, Operands, true);
4972 }
4973 
4974 //===----------------------------------------------------------------------===//
4975 // smrd
4976 //===----------------------------------------------------------------------===//
4977 
4978 bool AMDGPUOperand::isSMRDOffset8() const {
4979   return isImm() && isUInt<8>(getImm());
4980 }
4981 
4982 bool AMDGPUOperand::isSMRDOffset20() const {
4983   return isImm() && isUInt<20>(getImm());
4984 }
4985 
4986 bool AMDGPUOperand::isSMRDLiteralOffset() const {
4987   // 32-bit literals are only supported on CI and we only want to use them
4988   // when the offset is > 8-bits.
4989   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
4990 }
4991 
4992 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
4993   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4994 }
4995 
4996 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
4997   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4998 }
4999 
5000 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5001   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5002 }
5003 
5004 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
5005   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5006 }
5007 
5008 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
5009   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5010 }
5011 
5012 //===----------------------------------------------------------------------===//
5013 // vop3
5014 //===----------------------------------------------------------------------===//
5015 
5016 static bool ConvertOmodMul(int64_t &Mul) {
5017   if (Mul != 1 && Mul != 2 && Mul != 4)
5018     return false;
5019 
5020   Mul >>= 1;
5021   return true;
5022 }
5023 
5024 static bool ConvertOmodDiv(int64_t &Div) {
5025   if (Div == 1) {
5026     Div = 0;
5027     return true;
5028   }
5029 
5030   if (Div == 2) {
5031     Div = 3;
5032     return true;
5033   }
5034 
5035   return false;
5036 }
5037 
5038 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5039   if (BoundCtrl == 0) {
5040     BoundCtrl = 1;
5041     return true;
5042   }
5043 
5044   if (BoundCtrl == -1) {
5045     BoundCtrl = 0;
5046     return true;
5047   }
5048 
5049   return false;
5050 }
5051 
5052 // Note: the order in this table matches the order of operands in AsmString.
5053 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5054   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5055   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5056   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5057   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5058   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5059   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5060   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5061   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5062   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5063   {"dfmt",    AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5064   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5065   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5066   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5067   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5068   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5069   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5070   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5071   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5072   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5073   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5074   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5075   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5076   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5077   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5078   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5079   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5080   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5081   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5082   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5083   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5084   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5085   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5086   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5087   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5088   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5089   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5090   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
5091 };
5092 
5093 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5094   unsigned size = Operands.size();
5095   assert(size > 0);
5096 
5097   OperandMatchResultTy res = parseOptionalOpr(Operands);
5098 
5099   // This is a hack to enable hardcoded mandatory operands which follow
5100   // optional operands.
5101   //
5102   // Current design assumes that all operands after the first optional operand
5103   // are also optional. However implementation of some instructions violates
5104   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5105   //
5106   // To alleviate this problem, we have to (implicitly) parse extra operands
5107   // to make sure autogenerated parser of custom operands never hit hardcoded
5108   // mandatory operands.
5109 
5110   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5111 
5112     // We have parsed the first optional operand.
5113     // Parse as many operands as necessary to skip all mandatory operands.
5114 
5115     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5116       if (res != MatchOperand_Success ||
5117           getLexer().is(AsmToken::EndOfStatement)) break;
5118       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5119       res = parseOptionalOpr(Operands);
5120     }
5121   }
5122 
5123   return res;
5124 }
5125 
5126 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
5127   OperandMatchResultTy res;
5128   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
5129     // try to parse any optional operand here
5130     if (Op.IsBit) {
5131       res = parseNamedBit(Op.Name, Operands, Op.Type);
5132     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
5133       res = parseOModOperand(Operands);
5134     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
5135                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
5136                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
5137       res = parseSDWASel(Operands, Op.Name, Op.Type);
5138     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
5139       res = parseSDWADstUnused(Operands);
5140     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
5141                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
5142                Op.Type == AMDGPUOperand::ImmTyNegLo ||
5143                Op.Type == AMDGPUOperand::ImmTyNegHi) {
5144       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
5145                                         Op.ConvertResult);
5146     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) {
5147       res = parseDfmtNfmt(Operands);
5148     } else {
5149       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
5150     }
5151     if (res != MatchOperand_NoMatch) {
5152       return res;
5153     }
5154   }
5155   return MatchOperand_NoMatch;
5156 }
5157 
5158 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
5159   StringRef Name = Parser.getTok().getString();
5160   if (Name == "mul") {
5161     return parseIntWithPrefix("mul", Operands,
5162                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
5163   }
5164 
5165   if (Name == "div") {
5166     return parseIntWithPrefix("div", Operands,
5167                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
5168   }
5169 
5170   return MatchOperand_NoMatch;
5171 }
5172 
5173 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
5174   cvtVOP3P(Inst, Operands);
5175 
5176   int Opc = Inst.getOpcode();
5177 
5178   int SrcNum;
5179   const int Ops[] = { AMDGPU::OpName::src0,
5180                       AMDGPU::OpName::src1,
5181                       AMDGPU::OpName::src2 };
5182   for (SrcNum = 0;
5183        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
5184        ++SrcNum);
5185   assert(SrcNum > 0);
5186 
5187   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5188   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5189 
5190   if ((OpSel & (1 << SrcNum)) != 0) {
5191     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
5192     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
5193     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
5194   }
5195 }
5196 
5197 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
5198       // 1. This operand is input modifiers
5199   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
5200       // 2. This is not last operand
5201       && Desc.NumOperands > (OpNum + 1)
5202       // 3. Next operand is register class
5203       && Desc.OpInfo[OpNum + 1].RegClass != -1
5204       // 4. Next register is not tied to any other operand
5205       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
5206 }
5207 
5208 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
5209 {
5210   OptionalImmIndexMap OptionalIdx;
5211   unsigned Opc = Inst.getOpcode();
5212 
5213   unsigned I = 1;
5214   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5215   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5216     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5217   }
5218 
5219   for (unsigned E = Operands.size(); I != E; ++I) {
5220     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5221     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5222       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5223     } else if (Op.isInterpSlot() ||
5224                Op.isInterpAttr() ||
5225                Op.isAttrChan()) {
5226       Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
5227     } else if (Op.isImmModifier()) {
5228       OptionalIdx[Op.getImmTy()] = I;
5229     } else {
5230       llvm_unreachable("unhandled operand type");
5231     }
5232   }
5233 
5234   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
5235     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
5236   }
5237 
5238   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5239     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5240   }
5241 
5242   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5243     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5244   }
5245 }
5246 
5247 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
5248                               OptionalImmIndexMap &OptionalIdx) {
5249   unsigned Opc = Inst.getOpcode();
5250 
5251   unsigned I = 1;
5252   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5253   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5254     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5255   }
5256 
5257   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
5258     // This instruction has src modifiers
5259     for (unsigned E = Operands.size(); I != E; ++I) {
5260       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5261       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5262         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5263       } else if (Op.isImmModifier()) {
5264         OptionalIdx[Op.getImmTy()] = I;
5265       } else if (Op.isRegOrImm()) {
5266         Op.addRegOrImmOperands(Inst, 1);
5267       } else {
5268         llvm_unreachable("unhandled operand type");
5269       }
5270     }
5271   } else {
5272     // No src modifiers
5273     for (unsigned E = Operands.size(); I != E; ++I) {
5274       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5275       if (Op.isMod()) {
5276         OptionalIdx[Op.getImmTy()] = I;
5277       } else {
5278         Op.addRegOrImmOperands(Inst, 1);
5279       }
5280     }
5281   }
5282 
5283   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5284     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5285   }
5286 
5287   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5288     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5289   }
5290 
5291   // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906):
5292   // it has src2 register operand that is tied to dst operand
5293   // we don't allow modifiers for this operand in assembler so src2_modifiers
5294   // should be 0.
5295   if (Opc == AMDGPU::V_MAC_F32_e64_si ||
5296       Opc == AMDGPU::V_MAC_F32_e64_vi ||
5297       Opc == AMDGPU::V_MAC_F16_e64_vi ||
5298       Opc == AMDGPU::V_FMAC_F32_e64_vi) {
5299     auto it = Inst.begin();
5300     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5301     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5302     ++it;
5303     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5304   }
5305 }
5306 
5307 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5308   OptionalImmIndexMap OptionalIdx;
5309   cvtVOP3(Inst, Operands, OptionalIdx);
5310 }
5311 
5312 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5313                                const OperandVector &Operands) {
5314   OptionalImmIndexMap OptIdx;
5315   const int Opc = Inst.getOpcode();
5316   const MCInstrDesc &Desc = MII.get(Opc);
5317 
5318   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5319 
5320   cvtVOP3(Inst, Operands, OptIdx);
5321 
5322   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5323     assert(!IsPacked);
5324     Inst.addOperand(Inst.getOperand(0));
5325   }
5326 
5327   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5328   // instruction, and then figure out where to actually put the modifiers
5329 
5330   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
5331 
5332   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5333   if (OpSelHiIdx != -1) {
5334     int DefaultVal = IsPacked ? -1 : 0;
5335     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
5336                           DefaultVal);
5337   }
5338 
5339   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
5340   if (NegLoIdx != -1) {
5341     assert(IsPacked);
5342     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
5343     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
5344   }
5345 
5346   const int Ops[] = { AMDGPU::OpName::src0,
5347                       AMDGPU::OpName::src1,
5348                       AMDGPU::OpName::src2 };
5349   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
5350                          AMDGPU::OpName::src1_modifiers,
5351                          AMDGPU::OpName::src2_modifiers };
5352 
5353   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5354 
5355   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5356   unsigned OpSelHi = 0;
5357   unsigned NegLo = 0;
5358   unsigned NegHi = 0;
5359 
5360   if (OpSelHiIdx != -1) {
5361     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5362   }
5363 
5364   if (NegLoIdx != -1) {
5365     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
5366     NegLo = Inst.getOperand(NegLoIdx).getImm();
5367     NegHi = Inst.getOperand(NegHiIdx).getImm();
5368   }
5369 
5370   for (int J = 0; J < 3; ++J) {
5371     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
5372     if (OpIdx == -1)
5373       break;
5374 
5375     uint32_t ModVal = 0;
5376 
5377     if ((OpSel & (1 << J)) != 0)
5378       ModVal |= SISrcMods::OP_SEL_0;
5379 
5380     if ((OpSelHi & (1 << J)) != 0)
5381       ModVal |= SISrcMods::OP_SEL_1;
5382 
5383     if ((NegLo & (1 << J)) != 0)
5384       ModVal |= SISrcMods::NEG;
5385 
5386     if ((NegHi & (1 << J)) != 0)
5387       ModVal |= SISrcMods::NEG_HI;
5388 
5389     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
5390 
5391     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
5392   }
5393 }
5394 
5395 //===----------------------------------------------------------------------===//
5396 // dpp
5397 //===----------------------------------------------------------------------===//
5398 
5399 bool AMDGPUOperand::isDPPCtrl() const {
5400   using namespace AMDGPU::DPP;
5401 
5402   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
5403   if (result) {
5404     int64_t Imm = getImm();
5405     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
5406            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
5407            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
5408            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
5409            (Imm == DppCtrl::WAVE_SHL1) ||
5410            (Imm == DppCtrl::WAVE_ROL1) ||
5411            (Imm == DppCtrl::WAVE_SHR1) ||
5412            (Imm == DppCtrl::WAVE_ROR1) ||
5413            (Imm == DppCtrl::ROW_MIRROR) ||
5414            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
5415            (Imm == DppCtrl::BCAST15) ||
5416            (Imm == DppCtrl::BCAST31);
5417   }
5418   return false;
5419 }
5420 
5421 bool AMDGPUOperand::isS16Imm() const {
5422   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
5423 }
5424 
5425 bool AMDGPUOperand::isU16Imm() const {
5426   return isImm() && isUInt<16>(getImm());
5427 }
5428 
5429 OperandMatchResultTy
5430 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
5431   using namespace AMDGPU::DPP;
5432 
5433   SMLoc S = Parser.getTok().getLoc();
5434   StringRef Prefix;
5435   int64_t Int;
5436 
5437   if (getLexer().getKind() == AsmToken::Identifier) {
5438     Prefix = Parser.getTok().getString();
5439   } else {
5440     return MatchOperand_NoMatch;
5441   }
5442 
5443   if (Prefix == "row_mirror") {
5444     Int = DppCtrl::ROW_MIRROR;
5445     Parser.Lex();
5446   } else if (Prefix == "row_half_mirror") {
5447     Int = DppCtrl::ROW_HALF_MIRROR;
5448     Parser.Lex();
5449   } else {
5450     // Check to prevent parseDPPCtrlOps from eating invalid tokens
5451     if (Prefix != "quad_perm"
5452         && Prefix != "row_shl"
5453         && Prefix != "row_shr"
5454         && Prefix != "row_ror"
5455         && Prefix != "wave_shl"
5456         && Prefix != "wave_rol"
5457         && Prefix != "wave_shr"
5458         && Prefix != "wave_ror"
5459         && Prefix != "row_bcast") {
5460       return MatchOperand_NoMatch;
5461     }
5462 
5463     Parser.Lex();
5464     if (getLexer().isNot(AsmToken::Colon))
5465       return MatchOperand_ParseFail;
5466 
5467     if (Prefix == "quad_perm") {
5468       // quad_perm:[%d,%d,%d,%d]
5469       Parser.Lex();
5470       if (getLexer().isNot(AsmToken::LBrac))
5471         return MatchOperand_ParseFail;
5472       Parser.Lex();
5473 
5474       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
5475         return MatchOperand_ParseFail;
5476 
5477       for (int i = 0; i < 3; ++i) {
5478         if (getLexer().isNot(AsmToken::Comma))
5479           return MatchOperand_ParseFail;
5480         Parser.Lex();
5481 
5482         int64_t Temp;
5483         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
5484           return MatchOperand_ParseFail;
5485         const int shift = i*2 + 2;
5486         Int += (Temp << shift);
5487       }
5488 
5489       if (getLexer().isNot(AsmToken::RBrac))
5490         return MatchOperand_ParseFail;
5491       Parser.Lex();
5492     } else {
5493       // sel:%d
5494       Parser.Lex();
5495       if (getParser().parseAbsoluteExpression(Int))
5496         return MatchOperand_ParseFail;
5497 
5498       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
5499         Int |= DppCtrl::ROW_SHL0;
5500       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
5501         Int |= DppCtrl::ROW_SHR0;
5502       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
5503         Int |= DppCtrl::ROW_ROR0;
5504       } else if (Prefix == "wave_shl" && 1 == Int) {
5505         Int = DppCtrl::WAVE_SHL1;
5506       } else if (Prefix == "wave_rol" && 1 == Int) {
5507         Int = DppCtrl::WAVE_ROL1;
5508       } else if (Prefix == "wave_shr" && 1 == Int) {
5509         Int = DppCtrl::WAVE_SHR1;
5510       } else if (Prefix == "wave_ror" && 1 == Int) {
5511         Int = DppCtrl::WAVE_ROR1;
5512       } else if (Prefix == "row_bcast") {
5513         if (Int == 15) {
5514           Int = DppCtrl::BCAST15;
5515         } else if (Int == 31) {
5516           Int = DppCtrl::BCAST31;
5517         } else {
5518           return MatchOperand_ParseFail;
5519         }
5520       } else {
5521         return MatchOperand_ParseFail;
5522       }
5523     }
5524   }
5525 
5526   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
5527   return MatchOperand_Success;
5528 }
5529 
5530 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
5531   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
5532 }
5533 
5534 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
5535   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
5536 }
5537 
5538 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
5539   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
5540 }
5541 
5542 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
5543   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
5544 }
5545 
5546 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
5547   OptionalImmIndexMap OptionalIdx;
5548 
5549   unsigned I = 1;
5550   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5551   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5552     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5553   }
5554 
5555   for (unsigned E = Operands.size(); I != E; ++I) {
5556     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
5557                                             MCOI::TIED_TO);
5558     if (TiedTo != -1) {
5559       assert((unsigned)TiedTo < Inst.getNumOperands());
5560       // handle tied old or src2 for MAC instructions
5561       Inst.addOperand(Inst.getOperand(TiedTo));
5562     }
5563     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5564     // Add the register arguments
5565     if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5566       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
5567       // Skip it.
5568       continue;
5569     } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5570       Op.addRegWithFPInputModsOperands(Inst, 2);
5571     } else if (Op.isDPPCtrl()) {
5572       Op.addImmOperands(Inst, 1);
5573     } else if (Op.isImm()) {
5574       // Handle optional arguments
5575       OptionalIdx[Op.getImmTy()] = I;
5576     } else {
5577       llvm_unreachable("Invalid operand type");
5578     }
5579   }
5580 
5581   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
5582   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
5583   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
5584 }
5585 
5586 //===----------------------------------------------------------------------===//
5587 // sdwa
5588 //===----------------------------------------------------------------------===//
5589 
5590 OperandMatchResultTy
5591 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
5592                               AMDGPUOperand::ImmTy Type) {
5593   using namespace llvm::AMDGPU::SDWA;
5594 
5595   SMLoc S = Parser.getTok().getLoc();
5596   StringRef Value;
5597   OperandMatchResultTy res;
5598 
5599   res = parseStringWithPrefix(Prefix, Value);
5600   if (res != MatchOperand_Success) {
5601     return res;
5602   }
5603 
5604   int64_t Int;
5605   Int = StringSwitch<int64_t>(Value)
5606         .Case("BYTE_0", SdwaSel::BYTE_0)
5607         .Case("BYTE_1", SdwaSel::BYTE_1)
5608         .Case("BYTE_2", SdwaSel::BYTE_2)
5609         .Case("BYTE_3", SdwaSel::BYTE_3)
5610         .Case("WORD_0", SdwaSel::WORD_0)
5611         .Case("WORD_1", SdwaSel::WORD_1)
5612         .Case("DWORD", SdwaSel::DWORD)
5613         .Default(0xffffffff);
5614   Parser.Lex(); // eat last token
5615 
5616   if (Int == 0xffffffff) {
5617     return MatchOperand_ParseFail;
5618   }
5619 
5620   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
5621   return MatchOperand_Success;
5622 }
5623 
5624 OperandMatchResultTy
5625 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
5626   using namespace llvm::AMDGPU::SDWA;
5627 
5628   SMLoc S = Parser.getTok().getLoc();
5629   StringRef Value;
5630   OperandMatchResultTy res;
5631 
5632   res = parseStringWithPrefix("dst_unused", Value);
5633   if (res != MatchOperand_Success) {
5634     return res;
5635   }
5636 
5637   int64_t Int;
5638   Int = StringSwitch<int64_t>(Value)
5639         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
5640         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
5641         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
5642         .Default(0xffffffff);
5643   Parser.Lex(); // eat last token
5644 
5645   if (Int == 0xffffffff) {
5646     return MatchOperand_ParseFail;
5647   }
5648 
5649   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
5650   return MatchOperand_Success;
5651 }
5652 
5653 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
5654   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
5655 }
5656 
5657 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
5658   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
5659 }
5660 
5661 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
5662   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
5663 }
5664 
5665 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
5666   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
5667 }
5668 
5669 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
5670                               uint64_t BasicInstType, bool skipVcc) {
5671   using namespace llvm::AMDGPU::SDWA;
5672 
5673   OptionalImmIndexMap OptionalIdx;
5674   bool skippedVcc = false;
5675 
5676   unsigned I = 1;
5677   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5678   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5679     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5680   }
5681 
5682   for (unsigned E = Operands.size(); I != E; ++I) {
5683     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5684     if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5685       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
5686       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
5687       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
5688       // Skip VCC only if we didn't skip it on previous iteration.
5689       if (BasicInstType == SIInstrFlags::VOP2 &&
5690           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
5691         skippedVcc = true;
5692         continue;
5693       } else if (BasicInstType == SIInstrFlags::VOPC &&
5694                  Inst.getNumOperands() == 0) {
5695         skippedVcc = true;
5696         continue;
5697       }
5698     }
5699     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5700       Op.addRegOrImmWithInputModsOperands(Inst, 2);
5701     } else if (Op.isImm()) {
5702       // Handle optional arguments
5703       OptionalIdx[Op.getImmTy()] = I;
5704     } else {
5705       llvm_unreachable("Invalid operand type");
5706     }
5707     skippedVcc = false;
5708   }
5709 
5710   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
5711       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
5712     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
5713     switch (BasicInstType) {
5714     case SIInstrFlags::VOP1:
5715       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5716       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5717         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5718       }
5719       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5720       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5721       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5722       break;
5723 
5724     case SIInstrFlags::VOP2:
5725       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5726       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5727         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5728       }
5729       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5730       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5731       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5732       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5733       break;
5734 
5735     case SIInstrFlags::VOPC:
5736       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5737       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5738       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5739       break;
5740 
5741     default:
5742       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
5743     }
5744   }
5745 
5746   // special case v_mac_{f16, f32}:
5747   // it has src2 register operand that is tied to dst operand
5748   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
5749       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
5750     auto it = Inst.begin();
5751     std::advance(
5752       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
5753     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5754   }
5755 }
5756 
5757 /// Force static initialization.
5758 extern "C" void LLVMInitializeAMDGPUAsmParser() {
5759   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
5760   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
5761 }
5762 
5763 #define GET_REGISTER_MATCHER
5764 #define GET_MATCHER_IMPLEMENTATION
5765 #define GET_MNEMONIC_SPELL_CHECKER
5766 #include "AMDGPUGenAsmMatcher.inc"
5767 
5768 // This fuction should be defined after auto-generated include so that we have
5769 // MatchClassKind enum defined
5770 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
5771                                                      unsigned Kind) {
5772   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
5773   // But MatchInstructionImpl() expects to meet token and fails to validate
5774   // operand. This method checks if we are given immediate operand but expect to
5775   // get corresponding token.
5776   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
5777   switch (Kind) {
5778   case MCK_addr64:
5779     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
5780   case MCK_gds:
5781     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
5782   case MCK_lds:
5783     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
5784   case MCK_glc:
5785     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
5786   case MCK_idxen:
5787     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
5788   case MCK_offen:
5789     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
5790   case MCK_SSrcB32:
5791     // When operands have expression values, they will return true for isToken,
5792     // because it is not possible to distinguish between a token and an
5793     // expression at parse time. MatchInstructionImpl() will always try to
5794     // match an operand as a token, when isToken returns true, and when the
5795     // name of the expression is not a valid token, the match will fail,
5796     // so we need to handle it here.
5797     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
5798   case MCK_SSrcF32:
5799     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
5800   case MCK_SoppBrTarget:
5801     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
5802   case MCK_VReg32OrOff:
5803     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
5804   case MCK_InterpSlot:
5805     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
5806   case MCK_Attr:
5807     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
5808   case MCK_AttrChan:
5809     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
5810   default:
5811     return Match_InvalidOperand;
5812   }
5813 }
5814 
5815 //===----------------------------------------------------------------------===//
5816 // endpgm
5817 //===----------------------------------------------------------------------===//
5818 
5819 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
5820   SMLoc S = Parser.getTok().getLoc();
5821   int64_t Imm = 0;
5822 
5823   if (!parseExpr(Imm)) {
5824     // The operand is optional, if not present default to 0
5825     Imm = 0;
5826   }
5827 
5828   if (!isUInt<16>(Imm)) {
5829     Error(S, "expected a 16-bit value");
5830     return MatchOperand_ParseFail;
5831   }
5832 
5833   Operands.push_back(
5834       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
5835   return MatchOperand_Success;
5836 }
5837 
5838 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
5839