1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTyTFE,
147     ImmTyD16,
148     ImmTyClampSI,
149     ImmTyOModSI,
150     ImmTyDPP8,
151     ImmTyDppCtrl,
152     ImmTyDppRowMask,
153     ImmTyDppBankMask,
154     ImmTyDppBoundCtrl,
155     ImmTyDppFi,
156     ImmTySdwaDstSel,
157     ImmTySdwaSrc0Sel,
158     ImmTySdwaSrc1Sel,
159     ImmTySdwaDstUnused,
160     ImmTyDMask,
161     ImmTyDim,
162     ImmTyUNorm,
163     ImmTyDA,
164     ImmTyR128A16,
165     ImmTyLWE,
166     ImmTyExpTgt,
167     ImmTyExpCompr,
168     ImmTyExpVM,
169     ImmTyFORMAT,
170     ImmTyHwreg,
171     ImmTyOff,
172     ImmTySendMsg,
173     ImmTyInterpSlot,
174     ImmTyInterpAttr,
175     ImmTyAttrChan,
176     ImmTyOpSel,
177     ImmTyOpSelHi,
178     ImmTyNegLo,
179     ImmTyNegHi,
180     ImmTySwizzle,
181     ImmTyGprIdxMode,
182     ImmTyEndpgm,
183     ImmTyHigh
184   };
185 
186 private:
187   struct TokOp {
188     const char *Data;
189     unsigned Length;
190   };
191 
192   struct ImmOp {
193     int64_t Val;
194     ImmTy Type;
195     bool IsFPImm;
196     Modifiers Mods;
197   };
198 
199   struct RegOp {
200     unsigned RegNo;
201     Modifiers Mods;
202   };
203 
204   union {
205     TokOp Tok;
206     ImmOp Imm;
207     RegOp Reg;
208     const MCExpr *Expr;
209   };
210 
211 public:
212   bool isToken() const override {
213     if (Kind == Token)
214       return true;
215 
216     if (Kind != Expression || !Expr)
217       return false;
218 
219     // When parsing operands, we can't always tell if something was meant to be
220     // a token, like 'gds', or an expression that references a global variable.
221     // In this case, we assume the string is an expression, and if we need to
222     // interpret is a token, then we treat the symbol name as the token.
223     return isa<MCSymbolRefExpr>(Expr);
224   }
225 
226   bool isImm() const override {
227     return Kind == Immediate;
228   }
229 
230   bool isInlinableImm(MVT type) const;
231   bool isLiteralImm(MVT type) const;
232 
233   bool isRegKind() const {
234     return Kind == Register;
235   }
236 
237   bool isReg() const override {
238     return isRegKind() && !hasModifiers();
239   }
240 
241   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
242     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
243   }
244 
245   bool isRegOrImmWithInt16InputMods() const {
246     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
247   }
248 
249   bool isRegOrImmWithInt32InputMods() const {
250     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
251   }
252 
253   bool isRegOrImmWithInt64InputMods() const {
254     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
255   }
256 
257   bool isRegOrImmWithFP16InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
259   }
260 
261   bool isRegOrImmWithFP32InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
263   }
264 
265   bool isRegOrImmWithFP64InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
267   }
268 
269   bool isVReg() const {
270     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
271            isRegClass(AMDGPU::VReg_64RegClassID) ||
272            isRegClass(AMDGPU::VReg_96RegClassID) ||
273            isRegClass(AMDGPU::VReg_128RegClassID) ||
274            isRegClass(AMDGPU::VReg_256RegClassID) ||
275            isRegClass(AMDGPU::VReg_512RegClassID);
276   }
277 
278   bool isVReg32() const {
279     return isRegClass(AMDGPU::VGPR_32RegClassID);
280   }
281 
282   bool isVReg32OrOff() const {
283     return isOff() || isVReg32();
284   }
285 
286   bool isSDWAOperand(MVT type) const;
287   bool isSDWAFP16Operand() const;
288   bool isSDWAFP32Operand() const;
289   bool isSDWAInt16Operand() const;
290   bool isSDWAInt32Operand() const;
291 
292   bool isImmTy(ImmTy ImmT) const {
293     return isImm() && Imm.Type == ImmT;
294   }
295 
296   bool isImmModifier() const {
297     return isImm() && Imm.Type != ImmTyNone;
298   }
299 
300   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
301   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
302   bool isDMask() const { return isImmTy(ImmTyDMask); }
303   bool isDim() const { return isImmTy(ImmTyDim); }
304   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
305   bool isDA() const { return isImmTy(ImmTyDA); }
306   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
307   bool isLWE() const { return isImmTy(ImmTyLWE); }
308   bool isOff() const { return isImmTy(ImmTyOff); }
309   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
310   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
311   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
312   bool isOffen() const { return isImmTy(ImmTyOffen); }
313   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
314   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
315   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
316   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
317   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
318 
319   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
320   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
321   bool isGDS() const { return isImmTy(ImmTyGDS); }
322   bool isLDS() const { return isImmTy(ImmTyLDS); }
323   bool isDLC() const { return isImmTy(ImmTyDLC); }
324   bool isGLC() const { return isImmTy(ImmTyGLC); }
325   bool isSLC() const { return isImmTy(ImmTySLC); }
326   bool isTFE() const { return isImmTy(ImmTyTFE); }
327   bool isD16() const { return isImmTy(ImmTyD16); }
328   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
329   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
330   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
331   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
332   bool isFI() const { return isImmTy(ImmTyDppFi); }
333   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
334   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
335   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
336   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
337   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
338   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
339   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
340   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
341   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
342   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
343   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
344   bool isHigh() const { return isImmTy(ImmTyHigh); }
345 
346   bool isMod() const {
347     return isClampSI() || isOModSI();
348   }
349 
350   bool isRegOrImm() const {
351     return isReg() || isImm();
352   }
353 
354   bool isRegClass(unsigned RCID) const;
355 
356   bool isInlineValue() const;
357 
358   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
359     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
360   }
361 
362   bool isSCSrcB16() const {
363     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
364   }
365 
366   bool isSCSrcV2B16() const {
367     return isSCSrcB16();
368   }
369 
370   bool isSCSrcB32() const {
371     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
372   }
373 
374   bool isSCSrcB64() const {
375     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
376   }
377 
378   bool isBoolReg() const;
379 
380   bool isSCSrcF16() const {
381     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
382   }
383 
384   bool isSCSrcV2F16() const {
385     return isSCSrcF16();
386   }
387 
388   bool isSCSrcF32() const {
389     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
390   }
391 
392   bool isSCSrcF64() const {
393     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
394   }
395 
396   bool isSSrcB32() const {
397     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
398   }
399 
400   bool isSSrcB16() const {
401     return isSCSrcB16() || isLiteralImm(MVT::i16);
402   }
403 
404   bool isSSrcV2B16() const {
405     llvm_unreachable("cannot happen");
406     return isSSrcB16();
407   }
408 
409   bool isSSrcB64() const {
410     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
411     // See isVSrc64().
412     return isSCSrcB64() || isLiteralImm(MVT::i64);
413   }
414 
415   bool isSSrcF32() const {
416     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
417   }
418 
419   bool isSSrcF64() const {
420     return isSCSrcB64() || isLiteralImm(MVT::f64);
421   }
422 
423   bool isSSrcF16() const {
424     return isSCSrcB16() || isLiteralImm(MVT::f16);
425   }
426 
427   bool isSSrcV2F16() const {
428     llvm_unreachable("cannot happen");
429     return isSSrcF16();
430   }
431 
432   bool isSSrcOrLdsB32() const {
433     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
434            isLiteralImm(MVT::i32) || isExpr();
435   }
436 
437   bool isVCSrcB32() const {
438     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
439   }
440 
441   bool isVCSrcB64() const {
442     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
443   }
444 
445   bool isVCSrcB16() const {
446     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
447   }
448 
449   bool isVCSrcV2B16() const {
450     return isVCSrcB16();
451   }
452 
453   bool isVCSrcF32() const {
454     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
455   }
456 
457   bool isVCSrcF64() const {
458     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
459   }
460 
461   bool isVCSrcF16() const {
462     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
463   }
464 
465   bool isVCSrcV2F16() const {
466     return isVCSrcF16();
467   }
468 
469   bool isVSrcB32() const {
470     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
471   }
472 
473   bool isVSrcB64() const {
474     return isVCSrcF64() || isLiteralImm(MVT::i64);
475   }
476 
477   bool isVSrcB16() const {
478     return isVCSrcF16() || isLiteralImm(MVT::i16);
479   }
480 
481   bool isVSrcV2B16() const {
482     return isVSrcB16() || isLiteralImm(MVT::v2i16);
483   }
484 
485   bool isVSrcF32() const {
486     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
487   }
488 
489   bool isVSrcF64() const {
490     return isVCSrcF64() || isLiteralImm(MVT::f64);
491   }
492 
493   bool isVSrcF16() const {
494     return isVCSrcF16() || isLiteralImm(MVT::f16);
495   }
496 
497   bool isVSrcV2F16() const {
498     return isVSrcF16() || isLiteralImm(MVT::v2f16);
499   }
500 
501   bool isKImmFP32() const {
502     return isLiteralImm(MVT::f32);
503   }
504 
505   bool isKImmFP16() const {
506     return isLiteralImm(MVT::f16);
507   }
508 
509   bool isMem() const override {
510     return false;
511   }
512 
513   bool isExpr() const {
514     return Kind == Expression;
515   }
516 
517   bool isSoppBrTarget() const {
518     return isExpr() || isImm();
519   }
520 
521   bool isSWaitCnt() const;
522   bool isHwreg() const;
523   bool isSendMsg() const;
524   bool isSwizzle() const;
525   bool isSMRDOffset8() const;
526   bool isSMRDOffset20() const;
527   bool isSMRDLiteralOffset() const;
528   bool isDPP8() const;
529   bool isDPPCtrl() const;
530   bool isGPRIdxMode() const;
531   bool isS16Imm() const;
532   bool isU16Imm() const;
533   bool isEndpgm() const;
534 
535   StringRef getExpressionAsToken() const {
536     assert(isExpr());
537     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
538     return S->getSymbol().getName();
539   }
540 
541   StringRef getToken() const {
542     assert(isToken());
543 
544     if (Kind == Expression)
545       return getExpressionAsToken();
546 
547     return StringRef(Tok.Data, Tok.Length);
548   }
549 
550   int64_t getImm() const {
551     assert(isImm());
552     return Imm.Val;
553   }
554 
555   ImmTy getImmTy() const {
556     assert(isImm());
557     return Imm.Type;
558   }
559 
560   unsigned getReg() const override {
561     assert(isRegKind());
562     return Reg.RegNo;
563   }
564 
565   SMLoc getStartLoc() const override {
566     return StartLoc;
567   }
568 
569   SMLoc getEndLoc() const override {
570     return EndLoc;
571   }
572 
573   SMRange getLocRange() const {
574     return SMRange(StartLoc, EndLoc);
575   }
576 
577   Modifiers getModifiers() const {
578     assert(isRegKind() || isImmTy(ImmTyNone));
579     return isRegKind() ? Reg.Mods : Imm.Mods;
580   }
581 
582   void setModifiers(Modifiers Mods) {
583     assert(isRegKind() || isImmTy(ImmTyNone));
584     if (isRegKind())
585       Reg.Mods = Mods;
586     else
587       Imm.Mods = Mods;
588   }
589 
590   bool hasModifiers() const {
591     return getModifiers().hasModifiers();
592   }
593 
594   bool hasFPModifiers() const {
595     return getModifiers().hasFPModifiers();
596   }
597 
598   bool hasIntModifiers() const {
599     return getModifiers().hasIntModifiers();
600   }
601 
602   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
603 
604   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
605 
606   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
607 
608   template <unsigned Bitwidth>
609   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
610 
611   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
612     addKImmFPOperands<16>(Inst, N);
613   }
614 
615   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
616     addKImmFPOperands<32>(Inst, N);
617   }
618 
619   void addRegOperands(MCInst &Inst, unsigned N) const;
620 
621   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
622     addRegOperands(Inst, N);
623   }
624 
625   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
626     if (isRegKind())
627       addRegOperands(Inst, N);
628     else if (isExpr())
629       Inst.addOperand(MCOperand::createExpr(Expr));
630     else
631       addImmOperands(Inst, N);
632   }
633 
634   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
635     Modifiers Mods = getModifiers();
636     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
637     if (isRegKind()) {
638       addRegOperands(Inst, N);
639     } else {
640       addImmOperands(Inst, N, false);
641     }
642   }
643 
644   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
645     assert(!hasIntModifiers());
646     addRegOrImmWithInputModsOperands(Inst, N);
647   }
648 
649   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
650     assert(!hasFPModifiers());
651     addRegOrImmWithInputModsOperands(Inst, N);
652   }
653 
654   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
655     Modifiers Mods = getModifiers();
656     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
657     assert(isRegKind());
658     addRegOperands(Inst, N);
659   }
660 
661   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
662     assert(!hasIntModifiers());
663     addRegWithInputModsOperands(Inst, N);
664   }
665 
666   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
667     assert(!hasFPModifiers());
668     addRegWithInputModsOperands(Inst, N);
669   }
670 
671   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
672     if (isImm())
673       addImmOperands(Inst, N);
674     else {
675       assert(isExpr());
676       Inst.addOperand(MCOperand::createExpr(Expr));
677     }
678   }
679 
680   static void printImmTy(raw_ostream& OS, ImmTy Type) {
681     switch (Type) {
682     case ImmTyNone: OS << "None"; break;
683     case ImmTyGDS: OS << "GDS"; break;
684     case ImmTyLDS: OS << "LDS"; break;
685     case ImmTyOffen: OS << "Offen"; break;
686     case ImmTyIdxen: OS << "Idxen"; break;
687     case ImmTyAddr64: OS << "Addr64"; break;
688     case ImmTyOffset: OS << "Offset"; break;
689     case ImmTyInstOffset: OS << "InstOffset"; break;
690     case ImmTyOffset0: OS << "Offset0"; break;
691     case ImmTyOffset1: OS << "Offset1"; break;
692     case ImmTyDLC: OS << "DLC"; break;
693     case ImmTyGLC: OS << "GLC"; break;
694     case ImmTySLC: OS << "SLC"; break;
695     case ImmTyTFE: OS << "TFE"; break;
696     case ImmTyD16: OS << "D16"; break;
697     case ImmTyFORMAT: OS << "FORMAT"; break;
698     case ImmTyClampSI: OS << "ClampSI"; break;
699     case ImmTyOModSI: OS << "OModSI"; break;
700     case ImmTyDPP8: OS << "DPP8"; break;
701     case ImmTyDppCtrl: OS << "DppCtrl"; break;
702     case ImmTyDppRowMask: OS << "DppRowMask"; break;
703     case ImmTyDppBankMask: OS << "DppBankMask"; break;
704     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
705     case ImmTyDppFi: OS << "FI"; break;
706     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
707     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
708     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
709     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
710     case ImmTyDMask: OS << "DMask"; break;
711     case ImmTyDim: OS << "Dim"; break;
712     case ImmTyUNorm: OS << "UNorm"; break;
713     case ImmTyDA: OS << "DA"; break;
714     case ImmTyR128A16: OS << "R128A16"; break;
715     case ImmTyLWE: OS << "LWE"; break;
716     case ImmTyOff: OS << "Off"; break;
717     case ImmTyExpTgt: OS << "ExpTgt"; break;
718     case ImmTyExpCompr: OS << "ExpCompr"; break;
719     case ImmTyExpVM: OS << "ExpVM"; break;
720     case ImmTyHwreg: OS << "Hwreg"; break;
721     case ImmTySendMsg: OS << "SendMsg"; break;
722     case ImmTyInterpSlot: OS << "InterpSlot"; break;
723     case ImmTyInterpAttr: OS << "InterpAttr"; break;
724     case ImmTyAttrChan: OS << "AttrChan"; break;
725     case ImmTyOpSel: OS << "OpSel"; break;
726     case ImmTyOpSelHi: OS << "OpSelHi"; break;
727     case ImmTyNegLo: OS << "NegLo"; break;
728     case ImmTyNegHi: OS << "NegHi"; break;
729     case ImmTySwizzle: OS << "Swizzle"; break;
730     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
731     case ImmTyHigh: OS << "High"; break;
732     case ImmTyEndpgm:
733       OS << "Endpgm";
734       break;
735     }
736   }
737 
738   void print(raw_ostream &OS) const override {
739     switch (Kind) {
740     case Register:
741       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
742       break;
743     case Immediate:
744       OS << '<' << getImm();
745       if (getImmTy() != ImmTyNone) {
746         OS << " type: "; printImmTy(OS, getImmTy());
747       }
748       OS << " mods: " << Imm.Mods << '>';
749       break;
750     case Token:
751       OS << '\'' << getToken() << '\'';
752       break;
753     case Expression:
754       OS << "<expr " << *Expr << '>';
755       break;
756     }
757   }
758 
759   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
760                                       int64_t Val, SMLoc Loc,
761                                       ImmTy Type = ImmTyNone,
762                                       bool IsFPImm = false) {
763     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
764     Op->Imm.Val = Val;
765     Op->Imm.IsFPImm = IsFPImm;
766     Op->Imm.Type = Type;
767     Op->Imm.Mods = Modifiers();
768     Op->StartLoc = Loc;
769     Op->EndLoc = Loc;
770     return Op;
771   }
772 
773   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
774                                         StringRef Str, SMLoc Loc,
775                                         bool HasExplicitEncodingSize = true) {
776     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
777     Res->Tok.Data = Str.data();
778     Res->Tok.Length = Str.size();
779     Res->StartLoc = Loc;
780     Res->EndLoc = Loc;
781     return Res;
782   }
783 
784   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
785                                       unsigned RegNo, SMLoc S,
786                                       SMLoc E) {
787     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
788     Op->Reg.RegNo = RegNo;
789     Op->Reg.Mods = Modifiers();
790     Op->StartLoc = S;
791     Op->EndLoc = E;
792     return Op;
793   }
794 
795   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
796                                        const class MCExpr *Expr, SMLoc S) {
797     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
798     Op->Expr = Expr;
799     Op->StartLoc = S;
800     Op->EndLoc = S;
801     return Op;
802   }
803 };
804 
805 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
806   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
807   return OS;
808 }
809 
810 //===----------------------------------------------------------------------===//
811 // AsmParser
812 //===----------------------------------------------------------------------===//
813 
814 // Holds info related to the current kernel, e.g. count of SGPRs used.
815 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
816 // .amdgpu_hsa_kernel or at EOF.
817 class KernelScopeInfo {
818   int SgprIndexUnusedMin = -1;
819   int VgprIndexUnusedMin = -1;
820   MCContext *Ctx = nullptr;
821 
822   void usesSgprAt(int i) {
823     if (i >= SgprIndexUnusedMin) {
824       SgprIndexUnusedMin = ++i;
825       if (Ctx) {
826         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
827         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
828       }
829     }
830   }
831 
832   void usesVgprAt(int i) {
833     if (i >= VgprIndexUnusedMin) {
834       VgprIndexUnusedMin = ++i;
835       if (Ctx) {
836         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
837         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
838       }
839     }
840   }
841 
842 public:
843   KernelScopeInfo() = default;
844 
845   void initialize(MCContext &Context) {
846     Ctx = &Context;
847     usesSgprAt(SgprIndexUnusedMin = -1);
848     usesVgprAt(VgprIndexUnusedMin = -1);
849   }
850 
851   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
852     switch (RegKind) {
853       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
854       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
855       default: break;
856     }
857   }
858 };
859 
860 class AMDGPUAsmParser : public MCTargetAsmParser {
861   MCAsmParser &Parser;
862 
863   // Number of extra operands parsed after the first optional operand.
864   // This may be necessary to skip hardcoded mandatory operands.
865   static const unsigned MAX_OPR_LOOKAHEAD = 8;
866 
867   unsigned ForcedEncodingSize = 0;
868   bool ForcedDPP = false;
869   bool ForcedSDWA = false;
870   KernelScopeInfo KernelScope;
871 
872   /// @name Auto-generated Match Functions
873   /// {
874 
875 #define GET_ASSEMBLER_HEADER
876 #include "AMDGPUGenAsmMatcher.inc"
877 
878   /// }
879 
880 private:
881   bool ParseAsAbsoluteExpression(uint32_t &Ret);
882   bool OutOfRangeError(SMRange Range);
883   /// Calculate VGPR/SGPR blocks required for given target, reserved
884   /// registers, and user-specified NextFreeXGPR values.
885   ///
886   /// \param Features [in] Target features, used for bug corrections.
887   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
888   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
889   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
890   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
891   /// descriptor field, if valid.
892   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
893   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
894   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
895   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
896   /// \param VGPRBlocks [out] Result VGPR block count.
897   /// \param SGPRBlocks [out] Result SGPR block count.
898   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
899                           bool FlatScrUsed, bool XNACKUsed,
900                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
901                           SMRange VGPRRange, unsigned NextFreeSGPR,
902                           SMRange SGPRRange, unsigned &VGPRBlocks,
903                           unsigned &SGPRBlocks);
904   bool ParseDirectiveAMDGCNTarget();
905   bool ParseDirectiveAMDHSAKernel();
906   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
907   bool ParseDirectiveHSACodeObjectVersion();
908   bool ParseDirectiveHSACodeObjectISA();
909   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
910   bool ParseDirectiveAMDKernelCodeT();
911   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
912   bool ParseDirectiveAMDGPUHsaKernel();
913 
914   bool ParseDirectiveISAVersion();
915   bool ParseDirectiveHSAMetadata();
916   bool ParseDirectivePALMetadataBegin();
917   bool ParseDirectivePALMetadata();
918 
919   /// Common code to parse out a block of text (typically YAML) between start and
920   /// end directives.
921   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
922                            const char *AssemblerDirectiveEnd,
923                            std::string &CollectString);
924 
925   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
926                              RegisterKind RegKind, unsigned Reg1,
927                              unsigned RegNum);
928   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
929                            unsigned& RegNum, unsigned& RegWidth,
930                            unsigned *DwordRegIndex);
931   bool isRegister();
932   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
933   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
934   void initializeGprCountSymbol(RegisterKind RegKind);
935   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
936                              unsigned RegWidth);
937   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
938                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
939   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
940                  bool IsGdsHardcoded);
941 
942 public:
943   enum AMDGPUMatchResultTy {
944     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
945   };
946   enum OperandMode {
947     OperandMode_Default,
948     OperandMode_NSA,
949   };
950 
951   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
952 
953   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
954                const MCInstrInfo &MII,
955                const MCTargetOptions &Options)
956       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
957     MCAsmParserExtension::Initialize(Parser);
958 
959     if (getFeatureBits().none()) {
960       // Set default features.
961       copySTI().ToggleFeature("southern-islands");
962     }
963 
964     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
965 
966     {
967       // TODO: make those pre-defined variables read-only.
968       // Currently there is none suitable machinery in the core llvm-mc for this.
969       // MCSymbol::isRedefinable is intended for another purpose, and
970       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
971       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
972       MCContext &Ctx = getContext();
973       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
974         MCSymbol *Sym =
975             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
976         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
977         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
978         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
979         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
980         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
981       } else {
982         MCSymbol *Sym =
983             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
984         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
985         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
986         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
987         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
988         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
989       }
990       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
991         initializeGprCountSymbol(IS_VGPR);
992         initializeGprCountSymbol(IS_SGPR);
993       } else
994         KernelScope.initialize(getContext());
995     }
996   }
997 
998   bool hasXNACK() const {
999     return AMDGPU::hasXNACK(getSTI());
1000   }
1001 
1002   bool hasMIMG_R128() const {
1003     return AMDGPU::hasMIMG_R128(getSTI());
1004   }
1005 
1006   bool hasPackedD16() const {
1007     return AMDGPU::hasPackedD16(getSTI());
1008   }
1009 
1010   bool isSI() const {
1011     return AMDGPU::isSI(getSTI());
1012   }
1013 
1014   bool isCI() const {
1015     return AMDGPU::isCI(getSTI());
1016   }
1017 
1018   bool isVI() const {
1019     return AMDGPU::isVI(getSTI());
1020   }
1021 
1022   bool isGFX9() const {
1023     return AMDGPU::isGFX9(getSTI());
1024   }
1025 
1026   bool isGFX10() const {
1027     return AMDGPU::isGFX10(getSTI());
1028   }
1029 
1030   bool hasInv2PiInlineImm() const {
1031     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1032   }
1033 
1034   bool hasFlatOffsets() const {
1035     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1036   }
1037 
1038   bool hasSGPR102_SGPR103() const {
1039     return !isVI() && !isGFX9();
1040   }
1041 
1042   bool hasSGPR104_SGPR105() const {
1043     return isGFX10();
1044   }
1045 
1046   bool hasIntClamp() const {
1047     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1048   }
1049 
1050   AMDGPUTargetStreamer &getTargetStreamer() {
1051     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1052     return static_cast<AMDGPUTargetStreamer &>(TS);
1053   }
1054 
1055   const MCRegisterInfo *getMRI() const {
1056     // We need this const_cast because for some reason getContext() is not const
1057     // in MCAsmParser.
1058     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1059   }
1060 
1061   const MCInstrInfo *getMII() const {
1062     return &MII;
1063   }
1064 
1065   const FeatureBitset &getFeatureBits() const {
1066     return getSTI().getFeatureBits();
1067   }
1068 
1069   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1070   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1071   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1072 
1073   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1074   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1075   bool isForcedDPP() const { return ForcedDPP; }
1076   bool isForcedSDWA() const { return ForcedSDWA; }
1077   ArrayRef<unsigned> getMatchedVariants() const;
1078 
1079   std::unique_ptr<AMDGPUOperand> parseRegister();
1080   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1081   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1082   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1083                                       unsigned Kind) override;
1084   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1085                                OperandVector &Operands, MCStreamer &Out,
1086                                uint64_t &ErrorInfo,
1087                                bool MatchingInlineAsm) override;
1088   bool ParseDirective(AsmToken DirectiveID) override;
1089   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1090                                     OperandMode Mode = OperandMode_Default);
1091   StringRef parseMnemonicSuffix(StringRef Name);
1092   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1093                         SMLoc NameLoc, OperandVector &Operands) override;
1094   //bool ProcessInstruction(MCInst &Inst);
1095 
1096   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1097 
1098   OperandMatchResultTy
1099   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1100                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1101                      bool (*ConvertResult)(int64_t &) = nullptr);
1102 
1103   OperandMatchResultTy
1104   parseOperandArrayWithPrefix(const char *Prefix,
1105                               OperandVector &Operands,
1106                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1107                               bool (*ConvertResult)(int64_t&) = nullptr);
1108 
1109   OperandMatchResultTy
1110   parseNamedBit(const char *Name, OperandVector &Operands,
1111                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1112   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1113                                              StringRef &Value);
1114 
1115   bool isModifier();
1116   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1117   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1118   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1119   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1120   bool parseSP3NegModifier();
1121   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1122   OperandMatchResultTy parseReg(OperandVector &Operands);
1123   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1124   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1125   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1126   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1127   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1128   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1129   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1130 
1131   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1132   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1133   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1134   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1135 
1136   bool parseCnt(int64_t &IntVal);
1137   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1138   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1139 
1140 private:
1141   struct OperandInfoTy {
1142     int64_t Id;
1143     bool IsSymbolic = false;
1144 
1145     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1146   };
1147 
1148   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1149   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1150   void validateHwreg(const OperandInfoTy &HwReg,
1151                      const int64_t Offset,
1152                      const int64_t Width,
1153                      const SMLoc Loc);
1154 
1155   void errorExpTgt();
1156   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1157 
1158   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1159   bool validateSOPLiteral(const MCInst &Inst) const;
1160   bool validateConstantBusLimitations(const MCInst &Inst);
1161   bool validateEarlyClobberLimitations(const MCInst &Inst);
1162   bool validateIntClampSupported(const MCInst &Inst);
1163   bool validateMIMGAtomicDMask(const MCInst &Inst);
1164   bool validateMIMGGatherDMask(const MCInst &Inst);
1165   bool validateMIMGDataSize(const MCInst &Inst);
1166   bool validateMIMGAddrSize(const MCInst &Inst);
1167   bool validateMIMGD16(const MCInst &Inst);
1168   bool validateMIMGDim(const MCInst &Inst);
1169   bool validateLdsDirect(const MCInst &Inst);
1170   bool validateOpSel(const MCInst &Inst);
1171   bool validateVccOperand(unsigned Reg) const;
1172   bool validateVOP3Literal(const MCInst &Inst) const;
1173   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1174   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1175   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1176 
1177   bool isId(const StringRef Id) const;
1178   bool isId(const AsmToken &Token, const StringRef Id) const;
1179   bool isToken(const AsmToken::TokenKind Kind) const;
1180   bool trySkipId(const StringRef Id);
1181   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1182   bool trySkipToken(const AsmToken::TokenKind Kind);
1183   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1184   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1185   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1186   AsmToken::TokenKind getTokenKind() const;
1187   bool parseExpr(int64_t &Imm);
1188   StringRef getTokenStr() const;
1189   AsmToken peekToken();
1190   AsmToken getToken() const;
1191   SMLoc getLoc() const;
1192   void lex();
1193 
1194 public:
1195   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1196   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1197 
1198   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1199   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1200   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1201   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1202   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1203   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1204 
1205   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1206                             const unsigned MinVal,
1207                             const unsigned MaxVal,
1208                             const StringRef ErrMsg);
1209   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1210   bool parseSwizzleOffset(int64_t &Imm);
1211   bool parseSwizzleMacro(int64_t &Imm);
1212   bool parseSwizzleQuadPerm(int64_t &Imm);
1213   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1214   bool parseSwizzleBroadcast(int64_t &Imm);
1215   bool parseSwizzleSwap(int64_t &Imm);
1216   bool parseSwizzleReverse(int64_t &Imm);
1217 
1218   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1219   int64_t parseGPRIdxMacro();
1220 
1221   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1222   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1223   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1224   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1225   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1226 
1227   AMDGPUOperand::Ptr defaultDLC() const;
1228   AMDGPUOperand::Ptr defaultGLC() const;
1229   AMDGPUOperand::Ptr defaultSLC() const;
1230 
1231   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1232   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1233   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1234   AMDGPUOperand::Ptr defaultOffsetU12() const;
1235   AMDGPUOperand::Ptr defaultOffsetS13() const;
1236 
1237   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1238 
1239   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1240                OptionalImmIndexMap &OptionalIdx);
1241   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1242   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1243   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1244 
1245   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1246 
1247   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1248                bool IsAtomic = false);
1249   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1250 
1251   OperandMatchResultTy parseDim(OperandVector &Operands);
1252   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1253   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1254   AMDGPUOperand::Ptr defaultRowMask() const;
1255   AMDGPUOperand::Ptr defaultBankMask() const;
1256   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1257   AMDGPUOperand::Ptr defaultFI() const;
1258   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1259   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1260 
1261   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1262                                     AMDGPUOperand::ImmTy Type);
1263   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1264   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1265   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1266   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1267   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1268   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1269                 uint64_t BasicInstType, bool skipVcc = false);
1270 
1271   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1272   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1273 };
1274 
1275 struct OptionalOperand {
1276   const char *Name;
1277   AMDGPUOperand::ImmTy Type;
1278   bool IsBit;
1279   bool (*ConvertResult)(int64_t&);
1280 };
1281 
1282 } // end anonymous namespace
1283 
1284 // May be called with integer type with equivalent bitwidth.
1285 static const fltSemantics *getFltSemantics(unsigned Size) {
1286   switch (Size) {
1287   case 4:
1288     return &APFloat::IEEEsingle();
1289   case 8:
1290     return &APFloat::IEEEdouble();
1291   case 2:
1292     return &APFloat::IEEEhalf();
1293   default:
1294     llvm_unreachable("unsupported fp type");
1295   }
1296 }
1297 
1298 static const fltSemantics *getFltSemantics(MVT VT) {
1299   return getFltSemantics(VT.getSizeInBits() / 8);
1300 }
1301 
1302 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1303   switch (OperandType) {
1304   case AMDGPU::OPERAND_REG_IMM_INT32:
1305   case AMDGPU::OPERAND_REG_IMM_FP32:
1306   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1307   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1308     return &APFloat::IEEEsingle();
1309   case AMDGPU::OPERAND_REG_IMM_INT64:
1310   case AMDGPU::OPERAND_REG_IMM_FP64:
1311   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1312   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1313     return &APFloat::IEEEdouble();
1314   case AMDGPU::OPERAND_REG_IMM_INT16:
1315   case AMDGPU::OPERAND_REG_IMM_FP16:
1316   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1317   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1318   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1319   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1320   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1321   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1322     return &APFloat::IEEEhalf();
1323   default:
1324     llvm_unreachable("unsupported fp type");
1325   }
1326 }
1327 
1328 //===----------------------------------------------------------------------===//
1329 // Operand
1330 //===----------------------------------------------------------------------===//
1331 
1332 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1333   bool Lost;
1334 
1335   // Convert literal to single precision
1336   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1337                                                APFloat::rmNearestTiesToEven,
1338                                                &Lost);
1339   // We allow precision lost but not overflow or underflow
1340   if (Status != APFloat::opOK &&
1341       Lost &&
1342       ((Status & APFloat::opOverflow)  != 0 ||
1343        (Status & APFloat::opUnderflow) != 0)) {
1344     return false;
1345   }
1346 
1347   return true;
1348 }
1349 
1350 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1351   return isUIntN(Size, Val) || isIntN(Size, Val);
1352 }
1353 
1354 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1355 
1356   // This is a hack to enable named inline values like
1357   // shared_base with both 32-bit and 64-bit operands.
1358   // Note that these values are defined as
1359   // 32-bit operands only.
1360   if (isInlineValue()) {
1361     return true;
1362   }
1363 
1364   if (!isImmTy(ImmTyNone)) {
1365     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1366     return false;
1367   }
1368   // TODO: We should avoid using host float here. It would be better to
1369   // check the float bit values which is what a few other places do.
1370   // We've had bot failures before due to weird NaN support on mips hosts.
1371 
1372   APInt Literal(64, Imm.Val);
1373 
1374   if (Imm.IsFPImm) { // We got fp literal token
1375     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1376       return AMDGPU::isInlinableLiteral64(Imm.Val,
1377                                           AsmParser->hasInv2PiInlineImm());
1378     }
1379 
1380     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1381     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1382       return false;
1383 
1384     if (type.getScalarSizeInBits() == 16) {
1385       return AMDGPU::isInlinableLiteral16(
1386         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1387         AsmParser->hasInv2PiInlineImm());
1388     }
1389 
1390     // Check if single precision literal is inlinable
1391     return AMDGPU::isInlinableLiteral32(
1392       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1393       AsmParser->hasInv2PiInlineImm());
1394   }
1395 
1396   // We got int literal token.
1397   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1398     return AMDGPU::isInlinableLiteral64(Imm.Val,
1399                                         AsmParser->hasInv2PiInlineImm());
1400   }
1401 
1402   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1403     return false;
1404   }
1405 
1406   if (type.getScalarSizeInBits() == 16) {
1407     return AMDGPU::isInlinableLiteral16(
1408       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1409       AsmParser->hasInv2PiInlineImm());
1410   }
1411 
1412   return AMDGPU::isInlinableLiteral32(
1413     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1414     AsmParser->hasInv2PiInlineImm());
1415 }
1416 
1417 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1418   // Check that this immediate can be added as literal
1419   if (!isImmTy(ImmTyNone)) {
1420     return false;
1421   }
1422 
1423   if (!Imm.IsFPImm) {
1424     // We got int literal token.
1425 
1426     if (type == MVT::f64 && hasFPModifiers()) {
1427       // Cannot apply fp modifiers to int literals preserving the same semantics
1428       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1429       // disable these cases.
1430       return false;
1431     }
1432 
1433     unsigned Size = type.getSizeInBits();
1434     if (Size == 64)
1435       Size = 32;
1436 
1437     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1438     // types.
1439     return isSafeTruncation(Imm.Val, Size);
1440   }
1441 
1442   // We got fp literal token
1443   if (type == MVT::f64) { // Expected 64-bit fp operand
1444     // We would set low 64-bits of literal to zeroes but we accept this literals
1445     return true;
1446   }
1447 
1448   if (type == MVT::i64) { // Expected 64-bit int operand
1449     // We don't allow fp literals in 64-bit integer instructions. It is
1450     // unclear how we should encode them.
1451     return false;
1452   }
1453 
1454   // We allow fp literals with f16x2 operands assuming that the specified
1455   // literal goes into the lower half and the upper half is zero. We also
1456   // require that the literal may be losslesly converted to f16.
1457   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1458                      (type == MVT::v2i16)? MVT::i16 : type;
1459 
1460   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1461   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1462 }
1463 
1464 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1465   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1466 }
1467 
1468 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1469   if (AsmParser->isVI())
1470     return isVReg32();
1471   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1472     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1473   else
1474     return false;
1475 }
1476 
1477 bool AMDGPUOperand::isSDWAFP16Operand() const {
1478   return isSDWAOperand(MVT::f16);
1479 }
1480 
1481 bool AMDGPUOperand::isSDWAFP32Operand() const {
1482   return isSDWAOperand(MVT::f32);
1483 }
1484 
1485 bool AMDGPUOperand::isSDWAInt16Operand() const {
1486   return isSDWAOperand(MVT::i16);
1487 }
1488 
1489 bool AMDGPUOperand::isSDWAInt32Operand() const {
1490   return isSDWAOperand(MVT::i32);
1491 }
1492 
1493 bool AMDGPUOperand::isBoolReg() const {
1494   return AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
1495     isSCSrcB64() : isSCSrcB32();
1496 }
1497 
1498 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1499 {
1500   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1501   assert(Size == 2 || Size == 4 || Size == 8);
1502 
1503   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1504 
1505   if (Imm.Mods.Abs) {
1506     Val &= ~FpSignMask;
1507   }
1508   if (Imm.Mods.Neg) {
1509     Val ^= FpSignMask;
1510   }
1511 
1512   return Val;
1513 }
1514 
1515 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1516   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1517                              Inst.getNumOperands())) {
1518     addLiteralImmOperand(Inst, Imm.Val,
1519                          ApplyModifiers &
1520                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1521   } else {
1522     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1523     Inst.addOperand(MCOperand::createImm(Imm.Val));
1524   }
1525 }
1526 
1527 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1528   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1529   auto OpNum = Inst.getNumOperands();
1530   // Check that this operand accepts literals
1531   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1532 
1533   if (ApplyModifiers) {
1534     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1535     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1536     Val = applyInputFPModifiers(Val, Size);
1537   }
1538 
1539   APInt Literal(64, Val);
1540   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1541 
1542   if (Imm.IsFPImm) { // We got fp literal token
1543     switch (OpTy) {
1544     case AMDGPU::OPERAND_REG_IMM_INT64:
1545     case AMDGPU::OPERAND_REG_IMM_FP64:
1546     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1547     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1548       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1549                                        AsmParser->hasInv2PiInlineImm())) {
1550         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1551         return;
1552       }
1553 
1554       // Non-inlineable
1555       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1556         // For fp operands we check if low 32 bits are zeros
1557         if (Literal.getLoBits(32) != 0) {
1558           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1559           "Can't encode literal as exact 64-bit floating-point operand. "
1560           "Low 32-bits will be set to zero");
1561         }
1562 
1563         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1564         return;
1565       }
1566 
1567       // We don't allow fp literals in 64-bit integer instructions. It is
1568       // unclear how we should encode them. This case should be checked earlier
1569       // in predicate methods (isLiteralImm())
1570       llvm_unreachable("fp literal in 64-bit integer instruction.");
1571 
1572     case AMDGPU::OPERAND_REG_IMM_INT32:
1573     case AMDGPU::OPERAND_REG_IMM_FP32:
1574     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1575     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1576     case AMDGPU::OPERAND_REG_IMM_INT16:
1577     case AMDGPU::OPERAND_REG_IMM_FP16:
1578     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1579     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1580     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1581     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1582     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1583     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1584       bool lost;
1585       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1586       // Convert literal to single precision
1587       FPLiteral.convert(*getOpFltSemantics(OpTy),
1588                         APFloat::rmNearestTiesToEven, &lost);
1589       // We allow precision lost but not overflow or underflow. This should be
1590       // checked earlier in isLiteralImm()
1591 
1592       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1593       Inst.addOperand(MCOperand::createImm(ImmVal));
1594       return;
1595     }
1596     default:
1597       llvm_unreachable("invalid operand size");
1598     }
1599 
1600     return;
1601   }
1602 
1603   // We got int literal token.
1604   // Only sign extend inline immediates.
1605   switch (OpTy) {
1606   case AMDGPU::OPERAND_REG_IMM_INT32:
1607   case AMDGPU::OPERAND_REG_IMM_FP32:
1608   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1609   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1610   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1611   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1612     if (isSafeTruncation(Val, 32) &&
1613         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1614                                      AsmParser->hasInv2PiInlineImm())) {
1615       Inst.addOperand(MCOperand::createImm(Val));
1616       return;
1617     }
1618 
1619     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1620     return;
1621 
1622   case AMDGPU::OPERAND_REG_IMM_INT64:
1623   case AMDGPU::OPERAND_REG_IMM_FP64:
1624   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1625   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1626     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1627       Inst.addOperand(MCOperand::createImm(Val));
1628       return;
1629     }
1630 
1631     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1632     return;
1633 
1634   case AMDGPU::OPERAND_REG_IMM_INT16:
1635   case AMDGPU::OPERAND_REG_IMM_FP16:
1636   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1637   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1638     if (isSafeTruncation(Val, 16) &&
1639         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1640                                      AsmParser->hasInv2PiInlineImm())) {
1641       Inst.addOperand(MCOperand::createImm(Val));
1642       return;
1643     }
1644 
1645     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1646     return;
1647 
1648   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1649   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1650     assert(isSafeTruncation(Val, 16));
1651     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1652                                         AsmParser->hasInv2PiInlineImm()));
1653 
1654     Inst.addOperand(MCOperand::createImm(Val));
1655     return;
1656   }
1657   default:
1658     llvm_unreachable("invalid operand size");
1659   }
1660 }
1661 
1662 template <unsigned Bitwidth>
1663 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1664   APInt Literal(64, Imm.Val);
1665 
1666   if (!Imm.IsFPImm) {
1667     // We got int literal token.
1668     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1669     return;
1670   }
1671 
1672   bool Lost;
1673   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1674   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1675                     APFloat::rmNearestTiesToEven, &Lost);
1676   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1677 }
1678 
1679 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1680   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1681 }
1682 
1683 static bool isInlineValue(unsigned Reg) {
1684   switch (Reg) {
1685   case AMDGPU::SRC_SHARED_BASE:
1686   case AMDGPU::SRC_SHARED_LIMIT:
1687   case AMDGPU::SRC_PRIVATE_BASE:
1688   case AMDGPU::SRC_PRIVATE_LIMIT:
1689   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1690     return true;
1691   case AMDGPU::SRC_VCCZ:
1692   case AMDGPU::SRC_EXECZ:
1693   case AMDGPU::SRC_SCC:
1694     return true;
1695   default:
1696     return false;
1697   }
1698 }
1699 
1700 bool AMDGPUOperand::isInlineValue() const {
1701   return isRegKind() && ::isInlineValue(getReg());
1702 }
1703 
1704 //===----------------------------------------------------------------------===//
1705 // AsmParser
1706 //===----------------------------------------------------------------------===//
1707 
1708 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1709   if (Is == IS_VGPR) {
1710     switch (RegWidth) {
1711       default: return -1;
1712       case 1: return AMDGPU::VGPR_32RegClassID;
1713       case 2: return AMDGPU::VReg_64RegClassID;
1714       case 3: return AMDGPU::VReg_96RegClassID;
1715       case 4: return AMDGPU::VReg_128RegClassID;
1716       case 8: return AMDGPU::VReg_256RegClassID;
1717       case 16: return AMDGPU::VReg_512RegClassID;
1718     }
1719   } else if (Is == IS_TTMP) {
1720     switch (RegWidth) {
1721       default: return -1;
1722       case 1: return AMDGPU::TTMP_32RegClassID;
1723       case 2: return AMDGPU::TTMP_64RegClassID;
1724       case 4: return AMDGPU::TTMP_128RegClassID;
1725       case 8: return AMDGPU::TTMP_256RegClassID;
1726       case 16: return AMDGPU::TTMP_512RegClassID;
1727     }
1728   } else if (Is == IS_SGPR) {
1729     switch (RegWidth) {
1730       default: return -1;
1731       case 1: return AMDGPU::SGPR_32RegClassID;
1732       case 2: return AMDGPU::SGPR_64RegClassID;
1733       case 4: return AMDGPU::SGPR_128RegClassID;
1734       case 8: return AMDGPU::SGPR_256RegClassID;
1735       case 16: return AMDGPU::SGPR_512RegClassID;
1736     }
1737   }
1738   return -1;
1739 }
1740 
1741 static unsigned getSpecialRegForName(StringRef RegName) {
1742   return StringSwitch<unsigned>(RegName)
1743     .Case("exec", AMDGPU::EXEC)
1744     .Case("vcc", AMDGPU::VCC)
1745     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1746     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1747     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1748     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1749     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1750     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1751     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1752     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1753     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1754     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1755     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1756     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1757     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1758     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1759     .Case("m0", AMDGPU::M0)
1760     .Case("vccz", AMDGPU::SRC_VCCZ)
1761     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1762     .Case("execz", AMDGPU::SRC_EXECZ)
1763     .Case("src_execz", AMDGPU::SRC_EXECZ)
1764     .Case("scc", AMDGPU::SRC_SCC)
1765     .Case("src_scc", AMDGPU::SRC_SCC)
1766     .Case("tba", AMDGPU::TBA)
1767     .Case("tma", AMDGPU::TMA)
1768     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1769     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1770     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1771     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1772     .Case("vcc_lo", AMDGPU::VCC_LO)
1773     .Case("vcc_hi", AMDGPU::VCC_HI)
1774     .Case("exec_lo", AMDGPU::EXEC_LO)
1775     .Case("exec_hi", AMDGPU::EXEC_HI)
1776     .Case("tma_lo", AMDGPU::TMA_LO)
1777     .Case("tma_hi", AMDGPU::TMA_HI)
1778     .Case("tba_lo", AMDGPU::TBA_LO)
1779     .Case("tba_hi", AMDGPU::TBA_HI)
1780     .Case("null", AMDGPU::SGPR_NULL)
1781     .Default(0);
1782 }
1783 
1784 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1785                                     SMLoc &EndLoc) {
1786   auto R = parseRegister();
1787   if (!R) return true;
1788   assert(R->isReg());
1789   RegNo = R->getReg();
1790   StartLoc = R->getStartLoc();
1791   EndLoc = R->getEndLoc();
1792   return false;
1793 }
1794 
1795 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1796                                             RegisterKind RegKind, unsigned Reg1,
1797                                             unsigned RegNum) {
1798   switch (RegKind) {
1799   case IS_SPECIAL:
1800     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1801       Reg = AMDGPU::EXEC;
1802       RegWidth = 2;
1803       return true;
1804     }
1805     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1806       Reg = AMDGPU::FLAT_SCR;
1807       RegWidth = 2;
1808       return true;
1809     }
1810     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1811       Reg = AMDGPU::XNACK_MASK;
1812       RegWidth = 2;
1813       return true;
1814     }
1815     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1816       Reg = AMDGPU::VCC;
1817       RegWidth = 2;
1818       return true;
1819     }
1820     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1821       Reg = AMDGPU::TBA;
1822       RegWidth = 2;
1823       return true;
1824     }
1825     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1826       Reg = AMDGPU::TMA;
1827       RegWidth = 2;
1828       return true;
1829     }
1830     return false;
1831   case IS_VGPR:
1832   case IS_SGPR:
1833   case IS_TTMP:
1834     if (Reg1 != Reg + RegWidth) {
1835       return false;
1836     }
1837     RegWidth++;
1838     return true;
1839   default:
1840     llvm_unreachable("unexpected register kind");
1841   }
1842 }
1843 
1844 static const StringRef Registers[] = {
1845   { "v" },
1846   { "s" },
1847   { "ttmp" },
1848 };
1849 
1850 bool
1851 AMDGPUAsmParser::isRegister(const AsmToken &Token,
1852                             const AsmToken &NextToken) const {
1853 
1854   // A list of consecutive registers: [s0,s1,s2,s3]
1855   if (Token.is(AsmToken::LBrac))
1856     return true;
1857 
1858   if (!Token.is(AsmToken::Identifier))
1859     return false;
1860 
1861   // A single register like s0 or a range of registers like s[0:1]
1862 
1863   StringRef RegName = Token.getString();
1864 
1865   for (StringRef Reg : Registers) {
1866     if (RegName.startswith(Reg)) {
1867       if (Reg.size() < RegName.size()) {
1868         unsigned RegNum;
1869         // A single register with an index: rXX
1870         if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
1871           return true;
1872       } else {
1873         // A range of registers: r[XX:YY].
1874         if (NextToken.is(AsmToken::LBrac))
1875           return true;
1876       }
1877     }
1878   }
1879 
1880   return getSpecialRegForName(RegName);
1881 }
1882 
1883 bool
1884 AMDGPUAsmParser::isRegister()
1885 {
1886   return isRegister(getToken(), peekToken());
1887 }
1888 
1889 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1890                                           unsigned &RegNum, unsigned &RegWidth,
1891                                           unsigned *DwordRegIndex) {
1892   if (DwordRegIndex) { *DwordRegIndex = 0; }
1893   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1894   if (getLexer().is(AsmToken::Identifier)) {
1895     StringRef RegName = Parser.getTok().getString();
1896     if ((Reg = getSpecialRegForName(RegName))) {
1897       Parser.Lex();
1898       RegKind = IS_SPECIAL;
1899     } else {
1900       unsigned RegNumIndex = 0;
1901       if (RegName[0] == 'v') {
1902         RegNumIndex = 1;
1903         RegKind = IS_VGPR;
1904       } else if (RegName[0] == 's') {
1905         RegNumIndex = 1;
1906         RegKind = IS_SGPR;
1907       } else if (RegName.startswith("ttmp")) {
1908         RegNumIndex = strlen("ttmp");
1909         RegKind = IS_TTMP;
1910       } else {
1911         return false;
1912       }
1913       if (RegName.size() > RegNumIndex) {
1914         // Single 32-bit register: vXX.
1915         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1916           return false;
1917         Parser.Lex();
1918         RegWidth = 1;
1919       } else {
1920         // Range of registers: v[XX:YY]. ":YY" is optional.
1921         Parser.Lex();
1922         int64_t RegLo, RegHi;
1923         if (getLexer().isNot(AsmToken::LBrac))
1924           return false;
1925         Parser.Lex();
1926 
1927         if (getParser().parseAbsoluteExpression(RegLo))
1928           return false;
1929 
1930         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1931         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1932           return false;
1933         Parser.Lex();
1934 
1935         if (isRBrace) {
1936           RegHi = RegLo;
1937         } else {
1938           if (getParser().parseAbsoluteExpression(RegHi))
1939             return false;
1940 
1941           if (getLexer().isNot(AsmToken::RBrac))
1942             return false;
1943           Parser.Lex();
1944         }
1945         RegNum = (unsigned) RegLo;
1946         RegWidth = (RegHi - RegLo) + 1;
1947       }
1948     }
1949   } else if (getLexer().is(AsmToken::LBrac)) {
1950     // List of consecutive registers: [s0,s1,s2,s3]
1951     Parser.Lex();
1952     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1953       return false;
1954     if (RegWidth != 1)
1955       return false;
1956     RegisterKind RegKind1;
1957     unsigned Reg1, RegNum1, RegWidth1;
1958     do {
1959       if (getLexer().is(AsmToken::Comma)) {
1960         Parser.Lex();
1961       } else if (getLexer().is(AsmToken::RBrac)) {
1962         Parser.Lex();
1963         break;
1964       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1965         if (RegWidth1 != 1) {
1966           return false;
1967         }
1968         if (RegKind1 != RegKind) {
1969           return false;
1970         }
1971         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1972           return false;
1973         }
1974       } else {
1975         return false;
1976       }
1977     } while (true);
1978   } else {
1979     return false;
1980   }
1981   switch (RegKind) {
1982   case IS_SPECIAL:
1983     RegNum = 0;
1984     RegWidth = 1;
1985     break;
1986   case IS_VGPR:
1987   case IS_SGPR:
1988   case IS_TTMP:
1989   {
1990     unsigned Size = 1;
1991     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1992       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1993       Size = std::min(RegWidth, 4u);
1994     }
1995     if (RegNum % Size != 0)
1996       return false;
1997     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1998     RegNum = RegNum / Size;
1999     int RCID = getRegClass(RegKind, RegWidth);
2000     if (RCID == -1)
2001       return false;
2002     const MCRegisterClass RC = TRI->getRegClass(RCID);
2003     if (RegNum >= RC.getNumRegs())
2004       return false;
2005     Reg = RC.getRegister(RegNum);
2006     break;
2007   }
2008 
2009   default:
2010     llvm_unreachable("unexpected register kind");
2011   }
2012 
2013   if (!subtargetHasRegister(*TRI, Reg))
2014     return false;
2015   return true;
2016 }
2017 
2018 Optional<StringRef>
2019 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2020   switch (RegKind) {
2021   case IS_VGPR:
2022     return StringRef(".amdgcn.next_free_vgpr");
2023   case IS_SGPR:
2024     return StringRef(".amdgcn.next_free_sgpr");
2025   default:
2026     return None;
2027   }
2028 }
2029 
2030 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2031   auto SymbolName = getGprCountSymbolName(RegKind);
2032   assert(SymbolName && "initializing invalid register kind");
2033   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2034   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2035 }
2036 
2037 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2038                                             unsigned DwordRegIndex,
2039                                             unsigned RegWidth) {
2040   // Symbols are only defined for GCN targets
2041   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2042     return true;
2043 
2044   auto SymbolName = getGprCountSymbolName(RegKind);
2045   if (!SymbolName)
2046     return true;
2047   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2048 
2049   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2050   int64_t OldCount;
2051 
2052   if (!Sym->isVariable())
2053     return !Error(getParser().getTok().getLoc(),
2054                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2055   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2056     return !Error(
2057         getParser().getTok().getLoc(),
2058         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2059 
2060   if (OldCount <= NewMax)
2061     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2062 
2063   return true;
2064 }
2065 
2066 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2067   const auto &Tok = Parser.getTok();
2068   SMLoc StartLoc = Tok.getLoc();
2069   SMLoc EndLoc = Tok.getEndLoc();
2070   RegisterKind RegKind;
2071   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2072 
2073   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2074     //FIXME: improve error messages (bug 41303).
2075     Error(StartLoc, "not a valid operand.");
2076     return nullptr;
2077   }
2078   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2079     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2080       return nullptr;
2081   } else
2082     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2083   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2084 }
2085 
2086 OperandMatchResultTy
2087 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2088   // TODO: add syntactic sugar for 1/(2*PI)
2089 
2090   assert(!isRegister());
2091   assert(!isModifier());
2092 
2093   const auto& Tok = getToken();
2094   const auto& NextTok = peekToken();
2095   bool IsReal = Tok.is(AsmToken::Real);
2096   SMLoc S = getLoc();
2097   bool Negate = false;
2098 
2099   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2100     lex();
2101     IsReal = true;
2102     Negate = true;
2103   }
2104 
2105   if (IsReal) {
2106     // Floating-point expressions are not supported.
2107     // Can only allow floating-point literals with an
2108     // optional sign.
2109 
2110     StringRef Num = getTokenStr();
2111     lex();
2112 
2113     APFloat RealVal(APFloat::IEEEdouble());
2114     auto roundMode = APFloat::rmNearestTiesToEven;
2115     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2116       return MatchOperand_ParseFail;
2117     }
2118     if (Negate)
2119       RealVal.changeSign();
2120 
2121     Operands.push_back(
2122       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2123                                AMDGPUOperand::ImmTyNone, true));
2124 
2125     return MatchOperand_Success;
2126 
2127   } else {
2128     int64_t IntVal;
2129     const MCExpr *Expr;
2130     SMLoc S = getLoc();
2131 
2132     if (HasSP3AbsModifier) {
2133       // This is a workaround for handling expressions
2134       // as arguments of SP3 'abs' modifier, for example:
2135       //     |1.0|
2136       //     |-1|
2137       //     |1+x|
2138       // This syntax is not compatible with syntax of standard
2139       // MC expressions (due to the trailing '|').
2140       SMLoc EndLoc;
2141       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2142         return MatchOperand_ParseFail;
2143     } else {
2144       if (Parser.parseExpression(Expr))
2145         return MatchOperand_ParseFail;
2146     }
2147 
2148     if (Expr->evaluateAsAbsolute(IntVal)) {
2149       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2150     } else {
2151       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2152     }
2153 
2154     return MatchOperand_Success;
2155   }
2156 
2157   return MatchOperand_NoMatch;
2158 }
2159 
2160 OperandMatchResultTy
2161 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2162   if (!isRegister())
2163     return MatchOperand_NoMatch;
2164 
2165   if (auto R = parseRegister()) {
2166     assert(R->isReg());
2167     Operands.push_back(std::move(R));
2168     return MatchOperand_Success;
2169   }
2170   return MatchOperand_ParseFail;
2171 }
2172 
2173 OperandMatchResultTy
2174 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2175   auto res = parseReg(Operands);
2176   if (res != MatchOperand_NoMatch) {
2177     return res;
2178   } else if (isModifier()) {
2179     return MatchOperand_NoMatch;
2180   } else {
2181     return parseImm(Operands, HasSP3AbsMod);
2182   }
2183 }
2184 
2185 bool
2186 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2187   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2188     const auto &str = Token.getString();
2189     return str == "abs" || str == "neg" || str == "sext";
2190   }
2191   return false;
2192 }
2193 
2194 bool
2195 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2196   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2197 }
2198 
2199 bool
2200 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2201   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2202 }
2203 
2204 bool
2205 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2206   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2207 }
2208 
2209 // Check if this is an operand modifier or an opcode modifier
2210 // which may look like an expression but it is not. We should
2211 // avoid parsing these modifiers as expressions. Currently
2212 // recognized sequences are:
2213 //   |...|
2214 //   abs(...)
2215 //   neg(...)
2216 //   sext(...)
2217 //   -reg
2218 //   -|...|
2219 //   -abs(...)
2220 //   name:...
2221 // Note that simple opcode modifiers like 'gds' may be parsed as
2222 // expressions; this is a special case. See getExpressionAsToken.
2223 //
2224 bool
2225 AMDGPUAsmParser::isModifier() {
2226 
2227   AsmToken Tok = getToken();
2228   AsmToken NextToken[2];
2229   peekTokens(NextToken);
2230 
2231   return isOperandModifier(Tok, NextToken[0]) ||
2232          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2233          isOpcodeModifierWithVal(Tok, NextToken[0]);
2234 }
2235 
2236 // Check if the current token is an SP3 'neg' modifier.
2237 // Currently this modifier is allowed in the following context:
2238 //
2239 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2240 // 2. Before an 'abs' modifier: -abs(...)
2241 // 3. Before an SP3 'abs' modifier: -|...|
2242 //
2243 // In all other cases "-" is handled as a part
2244 // of an expression that follows the sign.
2245 //
2246 // Note: When "-" is followed by an integer literal,
2247 // this is interpreted as integer negation rather
2248 // than a floating-point NEG modifier applied to N.
2249 // Beside being contr-intuitive, such use of floating-point
2250 // NEG modifier would have resulted in different meaning
2251 // of integer literals used with VOP1/2/C and VOP3,
2252 // for example:
2253 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2254 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2255 // Negative fp literals with preceding "-" are
2256 // handled likewise for unifomtity
2257 //
2258 bool
2259 AMDGPUAsmParser::parseSP3NegModifier() {
2260 
2261   AsmToken NextToken[2];
2262   peekTokens(NextToken);
2263 
2264   if (isToken(AsmToken::Minus) &&
2265       (isRegister(NextToken[0], NextToken[1]) ||
2266        NextToken[0].is(AsmToken::Pipe) ||
2267        isId(NextToken[0], "abs"))) {
2268     lex();
2269     return true;
2270   }
2271 
2272   return false;
2273 }
2274 
2275 OperandMatchResultTy
2276 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2277                                               bool AllowImm) {
2278   bool Neg, SP3Neg;
2279   bool Abs, SP3Abs;
2280   SMLoc Loc;
2281 
2282   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2283   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2284     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2285     return MatchOperand_ParseFail;
2286   }
2287 
2288   SP3Neg = parseSP3NegModifier();
2289 
2290   Loc = getLoc();
2291   Neg = trySkipId("neg");
2292   if (Neg && SP3Neg) {
2293     Error(Loc, "expected register or immediate");
2294     return MatchOperand_ParseFail;
2295   }
2296   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2297     return MatchOperand_ParseFail;
2298 
2299   Abs = trySkipId("abs");
2300   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2301     return MatchOperand_ParseFail;
2302 
2303   Loc = getLoc();
2304   SP3Abs = trySkipToken(AsmToken::Pipe);
2305   if (Abs && SP3Abs) {
2306     Error(Loc, "expected register or immediate");
2307     return MatchOperand_ParseFail;
2308   }
2309 
2310   OperandMatchResultTy Res;
2311   if (AllowImm) {
2312     Res = parseRegOrImm(Operands, SP3Abs);
2313   } else {
2314     Res = parseReg(Operands);
2315   }
2316   if (Res != MatchOperand_Success) {
2317     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2318   }
2319 
2320   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2321     return MatchOperand_ParseFail;
2322   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2323     return MatchOperand_ParseFail;
2324   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2325     return MatchOperand_ParseFail;
2326 
2327   AMDGPUOperand::Modifiers Mods;
2328   Mods.Abs = Abs || SP3Abs;
2329   Mods.Neg = Neg || SP3Neg;
2330 
2331   if (Mods.hasFPModifiers()) {
2332     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2333     if (Op.isExpr()) {
2334       Error(Op.getStartLoc(), "expected an absolute expression");
2335       return MatchOperand_ParseFail;
2336     }
2337     Op.setModifiers(Mods);
2338   }
2339   return MatchOperand_Success;
2340 }
2341 
2342 OperandMatchResultTy
2343 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2344                                                bool AllowImm) {
2345   bool Sext = trySkipId("sext");
2346   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2347     return MatchOperand_ParseFail;
2348 
2349   OperandMatchResultTy Res;
2350   if (AllowImm) {
2351     Res = parseRegOrImm(Operands);
2352   } else {
2353     Res = parseReg(Operands);
2354   }
2355   if (Res != MatchOperand_Success) {
2356     return Sext? MatchOperand_ParseFail : Res;
2357   }
2358 
2359   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2360     return MatchOperand_ParseFail;
2361 
2362   AMDGPUOperand::Modifiers Mods;
2363   Mods.Sext = Sext;
2364 
2365   if (Mods.hasIntModifiers()) {
2366     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2367     if (Op.isExpr()) {
2368       Error(Op.getStartLoc(), "expected an absolute expression");
2369       return MatchOperand_ParseFail;
2370     }
2371     Op.setModifiers(Mods);
2372   }
2373 
2374   return MatchOperand_Success;
2375 }
2376 
2377 OperandMatchResultTy
2378 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2379   return parseRegOrImmWithFPInputMods(Operands, false);
2380 }
2381 
2382 OperandMatchResultTy
2383 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2384   return parseRegOrImmWithIntInputMods(Operands, false);
2385 }
2386 
2387 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2388   auto Loc = getLoc();
2389   if (trySkipId("off")) {
2390     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2391                                                 AMDGPUOperand::ImmTyOff, false));
2392     return MatchOperand_Success;
2393   }
2394 
2395   if (!isRegister())
2396     return MatchOperand_NoMatch;
2397 
2398   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2399   if (Reg) {
2400     Operands.push_back(std::move(Reg));
2401     return MatchOperand_Success;
2402   }
2403 
2404   return MatchOperand_ParseFail;
2405 
2406 }
2407 
2408 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2409   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2410 
2411   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2412       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2413       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2414       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2415     return Match_InvalidOperand;
2416 
2417   if ((TSFlags & SIInstrFlags::VOP3) &&
2418       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2419       getForcedEncodingSize() != 64)
2420     return Match_PreferE32;
2421 
2422   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2423       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2424     // v_mac_f32/16 allow only dst_sel == DWORD;
2425     auto OpNum =
2426         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2427     const auto &Op = Inst.getOperand(OpNum);
2428     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2429       return Match_InvalidOperand;
2430     }
2431   }
2432 
2433   if (TSFlags & SIInstrFlags::FLAT) {
2434     // FIXME: Produces error without correct column reported.
2435     auto Opcode = Inst.getOpcode();
2436     auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
2437 
2438     const auto &Op = Inst.getOperand(OpNum);
2439     if (!hasFlatOffsets() && Op.getImm() != 0)
2440       return Match_InvalidOperand;
2441 
2442     // GFX10: Address offset is 12-bit signed byte offset. Must be positive for
2443     // FLAT segment. For FLAT segment MSB is ignored and forced to zero.
2444     if (isGFX10()) {
2445       if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
2446         if (!isInt<12>(Op.getImm()))
2447           return Match_InvalidOperand;
2448       } else {
2449         if (!isUInt<11>(Op.getImm()))
2450           return Match_InvalidOperand;
2451       }
2452     }
2453   }
2454 
2455   return Match_Success;
2456 }
2457 
2458 // What asm variants we should check
2459 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2460   if (getForcedEncodingSize() == 32) {
2461     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2462     return makeArrayRef(Variants);
2463   }
2464 
2465   if (isForcedVOP3()) {
2466     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2467     return makeArrayRef(Variants);
2468   }
2469 
2470   if (isForcedSDWA()) {
2471     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2472                                         AMDGPUAsmVariants::SDWA9};
2473     return makeArrayRef(Variants);
2474   }
2475 
2476   if (isForcedDPP()) {
2477     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2478     return makeArrayRef(Variants);
2479   }
2480 
2481   static const unsigned Variants[] = {
2482     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2483     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2484   };
2485 
2486   return makeArrayRef(Variants);
2487 }
2488 
2489 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2490   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2491   const unsigned Num = Desc.getNumImplicitUses();
2492   for (unsigned i = 0; i < Num; ++i) {
2493     unsigned Reg = Desc.ImplicitUses[i];
2494     switch (Reg) {
2495     case AMDGPU::FLAT_SCR:
2496     case AMDGPU::VCC:
2497     case AMDGPU::VCC_LO:
2498     case AMDGPU::VCC_HI:
2499     case AMDGPU::M0:
2500     case AMDGPU::SGPR_NULL:
2501       return Reg;
2502     default:
2503       break;
2504     }
2505   }
2506   return AMDGPU::NoRegister;
2507 }
2508 
2509 // NB: This code is correct only when used to check constant
2510 // bus limitations because GFX7 support no f16 inline constants.
2511 // Note that there are no cases when a GFX7 opcode violates
2512 // constant bus limitations due to the use of an f16 constant.
2513 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2514                                        unsigned OpIdx) const {
2515   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2516 
2517   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2518     return false;
2519   }
2520 
2521   const MCOperand &MO = Inst.getOperand(OpIdx);
2522 
2523   int64_t Val = MO.getImm();
2524   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2525 
2526   switch (OpSize) { // expected operand size
2527   case 8:
2528     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2529   case 4:
2530     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2531   case 2: {
2532     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2533     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2534         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2535         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2536         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2537       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2538     } else {
2539       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2540     }
2541   }
2542   default:
2543     llvm_unreachable("invalid operand size");
2544   }
2545 }
2546 
2547 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2548   const MCOperand &MO = Inst.getOperand(OpIdx);
2549   if (MO.isImm()) {
2550     return !isInlineConstant(Inst, OpIdx);
2551   }
2552   return !MO.isReg() ||
2553          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2554 }
2555 
2556 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2557   const unsigned Opcode = Inst.getOpcode();
2558   const MCInstrDesc &Desc = MII.get(Opcode);
2559   unsigned ConstantBusUseCount = 0;
2560   unsigned NumLiterals = 0;
2561   unsigned LiteralSize;
2562 
2563   if (Desc.TSFlags &
2564       (SIInstrFlags::VOPC |
2565        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2566        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2567        SIInstrFlags::SDWA)) {
2568     // Check special imm operands (used by madmk, etc)
2569     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2570       ++ConstantBusUseCount;
2571     }
2572 
2573     SmallDenseSet<unsigned> SGPRsUsed;
2574     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2575     if (SGPRUsed != AMDGPU::NoRegister) {
2576       SGPRsUsed.insert(SGPRUsed);
2577       ++ConstantBusUseCount;
2578     }
2579 
2580     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2581     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2582     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2583 
2584     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2585 
2586     for (int OpIdx : OpIndices) {
2587       if (OpIdx == -1) break;
2588 
2589       const MCOperand &MO = Inst.getOperand(OpIdx);
2590       if (usesConstantBus(Inst, OpIdx)) {
2591         if (MO.isReg()) {
2592           const unsigned Reg = mc2PseudoReg(MO.getReg());
2593           // Pairs of registers with a partial intersections like these
2594           //   s0, s[0:1]
2595           //   flat_scratch_lo, flat_scratch
2596           //   flat_scratch_lo, flat_scratch_hi
2597           // are theoretically valid but they are disabled anyway.
2598           // Note that this code mimics SIInstrInfo::verifyInstruction
2599           if (!SGPRsUsed.count(Reg)) {
2600             SGPRsUsed.insert(Reg);
2601             ++ConstantBusUseCount;
2602           }
2603           SGPRUsed = Reg;
2604         } else { // Expression or a literal
2605 
2606           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2607             continue; // special operand like VINTERP attr_chan
2608 
2609           // An instruction may use only one literal.
2610           // This has been validated on the previous step.
2611           // See validateVOP3Literal.
2612           // This literal may be used as more than one operand.
2613           // If all these operands are of the same size,
2614           // this literal counts as one scalar value.
2615           // Otherwise it counts as 2 scalar values.
2616           // See "GFX10 Shader Programming", section 3.6.2.3.
2617 
2618           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2619           if (Size < 4) Size = 4;
2620 
2621           if (NumLiterals == 0) {
2622             NumLiterals = 1;
2623             LiteralSize = Size;
2624           } else if (LiteralSize != Size) {
2625             NumLiterals = 2;
2626           }
2627         }
2628       }
2629     }
2630   }
2631   ConstantBusUseCount += NumLiterals;
2632 
2633   if (isGFX10())
2634     return ConstantBusUseCount <= 2;
2635 
2636   return ConstantBusUseCount <= 1;
2637 }
2638 
2639 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2640   const unsigned Opcode = Inst.getOpcode();
2641   const MCInstrDesc &Desc = MII.get(Opcode);
2642 
2643   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2644   if (DstIdx == -1 ||
2645       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2646     return true;
2647   }
2648 
2649   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2650 
2651   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2652   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2653   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2654 
2655   assert(DstIdx != -1);
2656   const MCOperand &Dst = Inst.getOperand(DstIdx);
2657   assert(Dst.isReg());
2658   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2659 
2660   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2661 
2662   for (int SrcIdx : SrcIndices) {
2663     if (SrcIdx == -1) break;
2664     const MCOperand &Src = Inst.getOperand(SrcIdx);
2665     if (Src.isReg()) {
2666       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2667       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2668         return false;
2669       }
2670     }
2671   }
2672 
2673   return true;
2674 }
2675 
2676 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2677 
2678   const unsigned Opc = Inst.getOpcode();
2679   const MCInstrDesc &Desc = MII.get(Opc);
2680 
2681   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2682     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2683     assert(ClampIdx != -1);
2684     return Inst.getOperand(ClampIdx).getImm() == 0;
2685   }
2686 
2687   return true;
2688 }
2689 
2690 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2691 
2692   const unsigned Opc = Inst.getOpcode();
2693   const MCInstrDesc &Desc = MII.get(Opc);
2694 
2695   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2696     return true;
2697 
2698   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2699   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2700   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2701 
2702   assert(VDataIdx != -1);
2703   assert(DMaskIdx != -1);
2704   assert(TFEIdx != -1);
2705 
2706   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2707   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2708   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2709   if (DMask == 0)
2710     DMask = 1;
2711 
2712   unsigned DataSize =
2713     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2714   if (hasPackedD16()) {
2715     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2716     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2717       DataSize = (DataSize + 1) / 2;
2718   }
2719 
2720   return (VDataSize / 4) == DataSize + TFESize;
2721 }
2722 
2723 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2724   const unsigned Opc = Inst.getOpcode();
2725   const MCInstrDesc &Desc = MII.get(Opc);
2726 
2727   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2728     return true;
2729 
2730   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2731   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2732       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2733   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2734   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2735   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2736 
2737   assert(VAddr0Idx != -1);
2738   assert(SrsrcIdx != -1);
2739   assert(DimIdx != -1);
2740   assert(SrsrcIdx > VAddr0Idx);
2741 
2742   unsigned Dim = Inst.getOperand(DimIdx).getImm();
2743   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2744   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2745   unsigned VAddrSize =
2746       IsNSA ? SrsrcIdx - VAddr0Idx
2747             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2748 
2749   unsigned AddrSize = BaseOpcode->NumExtraArgs +
2750                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2751                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2752                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2753   if (!IsNSA) {
2754     if (AddrSize > 8)
2755       AddrSize = 16;
2756     else if (AddrSize > 4)
2757       AddrSize = 8;
2758   }
2759 
2760   return VAddrSize == AddrSize;
2761 }
2762 
2763 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2764 
2765   const unsigned Opc = Inst.getOpcode();
2766   const MCInstrDesc &Desc = MII.get(Opc);
2767 
2768   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2769     return true;
2770   if (!Desc.mayLoad() || !Desc.mayStore())
2771     return true; // Not atomic
2772 
2773   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2774   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2775 
2776   // This is an incomplete check because image_atomic_cmpswap
2777   // may only use 0x3 and 0xf while other atomic operations
2778   // may use 0x1 and 0x3. However these limitations are
2779   // verified when we check that dmask matches dst size.
2780   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2781 }
2782 
2783 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2784 
2785   const unsigned Opc = Inst.getOpcode();
2786   const MCInstrDesc &Desc = MII.get(Opc);
2787 
2788   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2789     return true;
2790 
2791   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2792   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2793 
2794   // GATHER4 instructions use dmask in a different fashion compared to
2795   // other MIMG instructions. The only useful DMASK values are
2796   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2797   // (red,red,red,red) etc.) The ISA document doesn't mention
2798   // this.
2799   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2800 }
2801 
2802 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2803 
2804   const unsigned Opc = Inst.getOpcode();
2805   const MCInstrDesc &Desc = MII.get(Opc);
2806 
2807   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2808     return true;
2809 
2810   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2811   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2812     if (isCI() || isSI())
2813       return false;
2814   }
2815 
2816   return true;
2817 }
2818 
2819 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
2820   const unsigned Opc = Inst.getOpcode();
2821   const MCInstrDesc &Desc = MII.get(Opc);
2822 
2823   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2824     return true;
2825 
2826   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2827   if (DimIdx < 0)
2828     return true;
2829 
2830   long Imm = Inst.getOperand(DimIdx).getImm();
2831   if (Imm < 0 || Imm >= 8)
2832     return false;
2833 
2834   return true;
2835 }
2836 
2837 static bool IsRevOpcode(const unsigned Opcode)
2838 {
2839   switch (Opcode) {
2840   case AMDGPU::V_SUBREV_F32_e32:
2841   case AMDGPU::V_SUBREV_F32_e64:
2842   case AMDGPU::V_SUBREV_F32_e32_gfx10:
2843   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
2844   case AMDGPU::V_SUBREV_F32_e32_vi:
2845   case AMDGPU::V_SUBREV_F32_e64_gfx10:
2846   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
2847   case AMDGPU::V_SUBREV_F32_e64_vi:
2848 
2849   case AMDGPU::V_SUBREV_I32_e32:
2850   case AMDGPU::V_SUBREV_I32_e64:
2851   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
2852   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
2853 
2854   case AMDGPU::V_SUBBREV_U32_e32:
2855   case AMDGPU::V_SUBBREV_U32_e64:
2856   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
2857   case AMDGPU::V_SUBBREV_U32_e32_vi:
2858   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
2859   case AMDGPU::V_SUBBREV_U32_e64_vi:
2860 
2861   case AMDGPU::V_SUBREV_U32_e32:
2862   case AMDGPU::V_SUBREV_U32_e64:
2863   case AMDGPU::V_SUBREV_U32_e32_gfx9:
2864   case AMDGPU::V_SUBREV_U32_e32_vi:
2865   case AMDGPU::V_SUBREV_U32_e64_gfx9:
2866   case AMDGPU::V_SUBREV_U32_e64_vi:
2867 
2868   case AMDGPU::V_SUBREV_F16_e32:
2869   case AMDGPU::V_SUBREV_F16_e64:
2870   case AMDGPU::V_SUBREV_F16_e32_gfx10:
2871   case AMDGPU::V_SUBREV_F16_e32_vi:
2872   case AMDGPU::V_SUBREV_F16_e64_gfx10:
2873   case AMDGPU::V_SUBREV_F16_e64_vi:
2874 
2875   case AMDGPU::V_SUBREV_U16_e32:
2876   case AMDGPU::V_SUBREV_U16_e64:
2877   case AMDGPU::V_SUBREV_U16_e32_vi:
2878   case AMDGPU::V_SUBREV_U16_e64_vi:
2879 
2880   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
2881   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
2882   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
2883 
2884   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
2885   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
2886 
2887   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
2888   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
2889 
2890   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
2891   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
2892 
2893   case AMDGPU::V_LSHRREV_B32_e32:
2894   case AMDGPU::V_LSHRREV_B32_e64:
2895   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
2896   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
2897   case AMDGPU::V_LSHRREV_B32_e32_vi:
2898   case AMDGPU::V_LSHRREV_B32_e64_vi:
2899   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
2900   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
2901 
2902   case AMDGPU::V_ASHRREV_I32_e32:
2903   case AMDGPU::V_ASHRREV_I32_e64:
2904   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
2905   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
2906   case AMDGPU::V_ASHRREV_I32_e32_vi:
2907   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
2908   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
2909   case AMDGPU::V_ASHRREV_I32_e64_vi:
2910 
2911   case AMDGPU::V_LSHLREV_B32_e32:
2912   case AMDGPU::V_LSHLREV_B32_e64:
2913   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
2914   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
2915   case AMDGPU::V_LSHLREV_B32_e32_vi:
2916   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
2917   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
2918   case AMDGPU::V_LSHLREV_B32_e64_vi:
2919 
2920   case AMDGPU::V_LSHLREV_B16_e32:
2921   case AMDGPU::V_LSHLREV_B16_e64:
2922   case AMDGPU::V_LSHLREV_B16_e32_vi:
2923   case AMDGPU::V_LSHLREV_B16_e64_vi:
2924   case AMDGPU::V_LSHLREV_B16_gfx10:
2925 
2926   case AMDGPU::V_LSHRREV_B16_e32:
2927   case AMDGPU::V_LSHRREV_B16_e64:
2928   case AMDGPU::V_LSHRREV_B16_e32_vi:
2929   case AMDGPU::V_LSHRREV_B16_e64_vi:
2930   case AMDGPU::V_LSHRREV_B16_gfx10:
2931 
2932   case AMDGPU::V_ASHRREV_I16_e32:
2933   case AMDGPU::V_ASHRREV_I16_e64:
2934   case AMDGPU::V_ASHRREV_I16_e32_vi:
2935   case AMDGPU::V_ASHRREV_I16_e64_vi:
2936   case AMDGPU::V_ASHRREV_I16_gfx10:
2937 
2938   case AMDGPU::V_LSHLREV_B64:
2939   case AMDGPU::V_LSHLREV_B64_gfx10:
2940   case AMDGPU::V_LSHLREV_B64_vi:
2941 
2942   case AMDGPU::V_LSHRREV_B64:
2943   case AMDGPU::V_LSHRREV_B64_gfx10:
2944   case AMDGPU::V_LSHRREV_B64_vi:
2945 
2946   case AMDGPU::V_ASHRREV_I64:
2947   case AMDGPU::V_ASHRREV_I64_gfx10:
2948   case AMDGPU::V_ASHRREV_I64_vi:
2949 
2950   case AMDGPU::V_PK_LSHLREV_B16:
2951   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
2952   case AMDGPU::V_PK_LSHLREV_B16_vi:
2953 
2954   case AMDGPU::V_PK_LSHRREV_B16:
2955   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
2956   case AMDGPU::V_PK_LSHRREV_B16_vi:
2957   case AMDGPU::V_PK_ASHRREV_I16:
2958   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
2959   case AMDGPU::V_PK_ASHRREV_I16_vi:
2960     return true;
2961   default:
2962     return false;
2963   }
2964 }
2965 
2966 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
2967 
2968   using namespace SIInstrFlags;
2969   const unsigned Opcode = Inst.getOpcode();
2970   const MCInstrDesc &Desc = MII.get(Opcode);
2971 
2972   // lds_direct register is defined so that it can be used
2973   // with 9-bit operands only. Ignore encodings which do not accept these.
2974   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
2975     return true;
2976 
2977   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2978   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2979   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2980 
2981   const int SrcIndices[] = { Src1Idx, Src2Idx };
2982 
2983   // lds_direct cannot be specified as either src1 or src2.
2984   for (int SrcIdx : SrcIndices) {
2985     if (SrcIdx == -1) break;
2986     const MCOperand &Src = Inst.getOperand(SrcIdx);
2987     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
2988       return false;
2989     }
2990   }
2991 
2992   if (Src0Idx == -1)
2993     return true;
2994 
2995   const MCOperand &Src = Inst.getOperand(Src0Idx);
2996   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
2997     return true;
2998 
2999   // lds_direct is specified as src0. Check additional limitations.
3000   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3001 }
3002 
3003 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3004   unsigned Opcode = Inst.getOpcode();
3005   const MCInstrDesc &Desc = MII.get(Opcode);
3006   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3007     return true;
3008 
3009   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3010   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3011 
3012   const int OpIndices[] = { Src0Idx, Src1Idx };
3013 
3014   unsigned NumLiterals = 0;
3015   uint32_t LiteralValue;
3016 
3017   for (int OpIdx : OpIndices) {
3018     if (OpIdx == -1) break;
3019 
3020     const MCOperand &MO = Inst.getOperand(OpIdx);
3021     if (MO.isImm() &&
3022         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3023         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
3024         !isInlineConstant(Inst, OpIdx)) {
3025       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3026       if (NumLiterals == 0 || LiteralValue != Value) {
3027         LiteralValue = Value;
3028         ++NumLiterals;
3029       }
3030     }
3031   }
3032 
3033   return NumLiterals <= 1;
3034 }
3035 
3036 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3037   const unsigned Opc = Inst.getOpcode();
3038   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3039       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3040     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3041     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3042 
3043     if (OpSel & ~3)
3044       return false;
3045   }
3046   return true;
3047 }
3048 
3049 // Check if VCC register matches wavefront size
3050 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3051   auto FB = getFeatureBits();
3052   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3053     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3054 }
3055 
3056 // VOP3 literal is only allowed in GFX10+ and only one can be used
3057 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3058   unsigned Opcode = Inst.getOpcode();
3059   const MCInstrDesc &Desc = MII.get(Opcode);
3060   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3061     return true;
3062 
3063   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3064   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3065   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3066 
3067   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3068 
3069   unsigned NumLiterals = 0;
3070   uint32_t LiteralValue;
3071 
3072   for (int OpIdx : OpIndices) {
3073     if (OpIdx == -1) break;
3074 
3075     const MCOperand &MO = Inst.getOperand(OpIdx);
3076     if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
3077       continue;
3078 
3079     if (!isInlineConstant(Inst, OpIdx)) {
3080       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3081       if (NumLiterals == 0 || LiteralValue != Value) {
3082         LiteralValue = Value;
3083         ++NumLiterals;
3084       }
3085     }
3086   }
3087 
3088   return !NumLiterals ||
3089          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3090 }
3091 
3092 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3093                                           const SMLoc &IDLoc) {
3094   if (!validateLdsDirect(Inst)) {
3095     Error(IDLoc,
3096       "invalid use of lds_direct");
3097     return false;
3098   }
3099   if (!validateSOPLiteral(Inst)) {
3100     Error(IDLoc,
3101       "only one literal operand is allowed");
3102     return false;
3103   }
3104   if (!validateVOP3Literal(Inst)) {
3105     Error(IDLoc,
3106       "invalid literal operand");
3107     return false;
3108   }
3109   if (!validateConstantBusLimitations(Inst)) {
3110     Error(IDLoc,
3111       "invalid operand (violates constant bus restrictions)");
3112     return false;
3113   }
3114   if (!validateEarlyClobberLimitations(Inst)) {
3115     Error(IDLoc,
3116       "destination must be different than all sources");
3117     return false;
3118   }
3119   if (!validateIntClampSupported(Inst)) {
3120     Error(IDLoc,
3121       "integer clamping is not supported on this GPU");
3122     return false;
3123   }
3124   if (!validateOpSel(Inst)) {
3125     Error(IDLoc,
3126       "invalid op_sel operand");
3127     return false;
3128   }
3129   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3130   if (!validateMIMGD16(Inst)) {
3131     Error(IDLoc,
3132       "d16 modifier is not supported on this GPU");
3133     return false;
3134   }
3135   if (!validateMIMGDim(Inst)) {
3136     Error(IDLoc, "dim modifier is required on this GPU");
3137     return false;
3138   }
3139   if (!validateMIMGDataSize(Inst)) {
3140     Error(IDLoc,
3141       "image data size does not match dmask and tfe");
3142     return false;
3143   }
3144   if (!validateMIMGAddrSize(Inst)) {
3145     Error(IDLoc,
3146       "image address size does not match dim and a16");
3147     return false;
3148   }
3149   if (!validateMIMGAtomicDMask(Inst)) {
3150     Error(IDLoc,
3151       "invalid atomic image dmask");
3152     return false;
3153   }
3154   if (!validateMIMGGatherDMask(Inst)) {
3155     Error(IDLoc,
3156       "invalid image_gather dmask: only one bit must be set");
3157     return false;
3158   }
3159 
3160   return true;
3161 }
3162 
3163 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3164                                             const FeatureBitset &FBS,
3165                                             unsigned VariantID = 0);
3166 
3167 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3168                                               OperandVector &Operands,
3169                                               MCStreamer &Out,
3170                                               uint64_t &ErrorInfo,
3171                                               bool MatchingInlineAsm) {
3172   MCInst Inst;
3173   unsigned Result = Match_Success;
3174   for (auto Variant : getMatchedVariants()) {
3175     uint64_t EI;
3176     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3177                                   Variant);
3178     // We order match statuses from least to most specific. We use most specific
3179     // status as resulting
3180     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3181     if ((R == Match_Success) ||
3182         (R == Match_PreferE32) ||
3183         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3184         (R == Match_InvalidOperand && Result != Match_MissingFeature
3185                                    && Result != Match_PreferE32) ||
3186         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3187                                    && Result != Match_MissingFeature
3188                                    && Result != Match_PreferE32)) {
3189       Result = R;
3190       ErrorInfo = EI;
3191     }
3192     if (R == Match_Success)
3193       break;
3194   }
3195 
3196   switch (Result) {
3197   default: break;
3198   case Match_Success:
3199     if (!validateInstruction(Inst, IDLoc)) {
3200       return true;
3201     }
3202     Inst.setLoc(IDLoc);
3203     Out.EmitInstruction(Inst, getSTI());
3204     return false;
3205 
3206   case Match_MissingFeature:
3207     return Error(IDLoc, "instruction not supported on this GPU");
3208 
3209   case Match_MnemonicFail: {
3210     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3211     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3212         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3213     return Error(IDLoc, "invalid instruction" + Suggestion,
3214                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3215   }
3216 
3217   case Match_InvalidOperand: {
3218     SMLoc ErrorLoc = IDLoc;
3219     if (ErrorInfo != ~0ULL) {
3220       if (ErrorInfo >= Operands.size()) {
3221         return Error(IDLoc, "too few operands for instruction");
3222       }
3223       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3224       if (ErrorLoc == SMLoc())
3225         ErrorLoc = IDLoc;
3226     }
3227     return Error(ErrorLoc, "invalid operand for instruction");
3228   }
3229 
3230   case Match_PreferE32:
3231     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3232                         "should be encoded as e32");
3233   }
3234   llvm_unreachable("Implement any new match types added!");
3235 }
3236 
3237 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3238   int64_t Tmp = -1;
3239   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3240     return true;
3241   }
3242   if (getParser().parseAbsoluteExpression(Tmp)) {
3243     return true;
3244   }
3245   Ret = static_cast<uint32_t>(Tmp);
3246   return false;
3247 }
3248 
3249 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3250                                                uint32_t &Minor) {
3251   if (ParseAsAbsoluteExpression(Major))
3252     return TokError("invalid major version");
3253 
3254   if (getLexer().isNot(AsmToken::Comma))
3255     return TokError("minor version number required, comma expected");
3256   Lex();
3257 
3258   if (ParseAsAbsoluteExpression(Minor))
3259     return TokError("invalid minor version");
3260 
3261   return false;
3262 }
3263 
3264 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3265   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3266     return TokError("directive only supported for amdgcn architecture");
3267 
3268   std::string Target;
3269 
3270   SMLoc TargetStart = getTok().getLoc();
3271   if (getParser().parseEscapedString(Target))
3272     return true;
3273   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3274 
3275   std::string ExpectedTarget;
3276   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3277   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3278 
3279   if (Target != ExpectedTargetOS.str())
3280     return getParser().Error(TargetRange.Start, "target must match options",
3281                              TargetRange);
3282 
3283   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3284   return false;
3285 }
3286 
3287 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3288   return getParser().Error(Range.Start, "value out of range", Range);
3289 }
3290 
3291 bool AMDGPUAsmParser::calculateGPRBlocks(
3292     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3293     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3294     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3295     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3296   // TODO(scott.linder): These calculations are duplicated from
3297   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3298   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3299 
3300   unsigned NumVGPRs = NextFreeVGPR;
3301   unsigned NumSGPRs = NextFreeSGPR;
3302 
3303   if (Version.Major >= 10)
3304     NumSGPRs = 0;
3305   else {
3306     unsigned MaxAddressableNumSGPRs =
3307         IsaInfo::getAddressableNumSGPRs(&getSTI());
3308 
3309     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3310         NumSGPRs > MaxAddressableNumSGPRs)
3311       return OutOfRangeError(SGPRRange);
3312 
3313     NumSGPRs +=
3314         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3315 
3316     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3317         NumSGPRs > MaxAddressableNumSGPRs)
3318       return OutOfRangeError(SGPRRange);
3319 
3320     if (Features.test(FeatureSGPRInitBug))
3321       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3322   }
3323 
3324   VGPRBlocks =
3325       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3326   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3327 
3328   return false;
3329 }
3330 
3331 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3332   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3333     return TokError("directive only supported for amdgcn architecture");
3334 
3335   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3336     return TokError("directive only supported for amdhsa OS");
3337 
3338   StringRef KernelName;
3339   if (getParser().parseIdentifier(KernelName))
3340     return true;
3341 
3342   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3343 
3344   StringSet<> Seen;
3345 
3346   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3347 
3348   SMRange VGPRRange;
3349   uint64_t NextFreeVGPR = 0;
3350   SMRange SGPRRange;
3351   uint64_t NextFreeSGPR = 0;
3352   unsigned UserSGPRCount = 0;
3353   bool ReserveVCC = true;
3354   bool ReserveFlatScr = true;
3355   bool ReserveXNACK = hasXNACK();
3356   Optional<bool> EnableWavefrontSize32;
3357 
3358   while (true) {
3359     while (getLexer().is(AsmToken::EndOfStatement))
3360       Lex();
3361 
3362     if (getLexer().isNot(AsmToken::Identifier))
3363       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3364 
3365     StringRef ID = getTok().getIdentifier();
3366     SMRange IDRange = getTok().getLocRange();
3367     Lex();
3368 
3369     if (ID == ".end_amdhsa_kernel")
3370       break;
3371 
3372     if (Seen.find(ID) != Seen.end())
3373       return TokError(".amdhsa_ directives cannot be repeated");
3374     Seen.insert(ID);
3375 
3376     SMLoc ValStart = getTok().getLoc();
3377     int64_t IVal;
3378     if (getParser().parseAbsoluteExpression(IVal))
3379       return true;
3380     SMLoc ValEnd = getTok().getLoc();
3381     SMRange ValRange = SMRange(ValStart, ValEnd);
3382 
3383     if (IVal < 0)
3384       return OutOfRangeError(ValRange);
3385 
3386     uint64_t Val = IVal;
3387 
3388 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3389   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3390     return OutOfRangeError(RANGE);                                             \
3391   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3392 
3393     if (ID == ".amdhsa_group_segment_fixed_size") {
3394       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3395         return OutOfRangeError(ValRange);
3396       KD.group_segment_fixed_size = Val;
3397     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3398       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3399         return OutOfRangeError(ValRange);
3400       KD.private_segment_fixed_size = Val;
3401     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3402       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3403                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3404                        Val, ValRange);
3405       UserSGPRCount += 4;
3406     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3407       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3408                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3409                        ValRange);
3410       UserSGPRCount += 2;
3411     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3412       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3413                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3414                        ValRange);
3415       UserSGPRCount += 2;
3416     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3417       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3418                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3419                        Val, ValRange);
3420       UserSGPRCount += 2;
3421     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3422       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3423                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3424                        ValRange);
3425       UserSGPRCount += 2;
3426     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3427       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3428                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3429                        ValRange);
3430       UserSGPRCount += 2;
3431     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3432       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3433                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3434                        Val, ValRange);
3435       UserSGPRCount += 1;
3436     } else if (ID == ".amdhsa_wavefront_size32") {
3437       if (IVersion.Major < 10)
3438         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3439                                  IDRange);
3440       EnableWavefrontSize32 = Val;
3441       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3442                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3443                        Val, ValRange);
3444     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3445       PARSE_BITS_ENTRY(
3446           KD.compute_pgm_rsrc2,
3447           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3448           ValRange);
3449     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3450       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3451                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3452                        ValRange);
3453     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3454       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3455                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3456                        ValRange);
3457     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3458       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3459                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3460                        ValRange);
3461     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3462       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3463                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3464                        ValRange);
3465     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3466       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3467                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3468                        ValRange);
3469     } else if (ID == ".amdhsa_next_free_vgpr") {
3470       VGPRRange = ValRange;
3471       NextFreeVGPR = Val;
3472     } else if (ID == ".amdhsa_next_free_sgpr") {
3473       SGPRRange = ValRange;
3474       NextFreeSGPR = Val;
3475     } else if (ID == ".amdhsa_reserve_vcc") {
3476       if (!isUInt<1>(Val))
3477         return OutOfRangeError(ValRange);
3478       ReserveVCC = Val;
3479     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3480       if (IVersion.Major < 7)
3481         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3482                                  IDRange);
3483       if (!isUInt<1>(Val))
3484         return OutOfRangeError(ValRange);
3485       ReserveFlatScr = Val;
3486     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3487       if (IVersion.Major < 8)
3488         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3489                                  IDRange);
3490       if (!isUInt<1>(Val))
3491         return OutOfRangeError(ValRange);
3492       ReserveXNACK = Val;
3493     } else if (ID == ".amdhsa_float_round_mode_32") {
3494       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3495                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3496     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3497       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3498                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3499     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3500       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3501                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3502     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3503       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3504                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3505                        ValRange);
3506     } else if (ID == ".amdhsa_dx10_clamp") {
3507       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3508                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3509     } else if (ID == ".amdhsa_ieee_mode") {
3510       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3511                        Val, ValRange);
3512     } else if (ID == ".amdhsa_fp16_overflow") {
3513       if (IVersion.Major < 9)
3514         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3515                                  IDRange);
3516       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3517                        ValRange);
3518     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3519       if (IVersion.Major < 10)
3520         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3521                                  IDRange);
3522       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3523                        ValRange);
3524     } else if (ID == ".amdhsa_memory_ordered") {
3525       if (IVersion.Major < 10)
3526         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3527                                  IDRange);
3528       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3529                        ValRange);
3530     } else if (ID == ".amdhsa_forward_progress") {
3531       if (IVersion.Major < 10)
3532         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3533                                  IDRange);
3534       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3535                        ValRange);
3536     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3537       PARSE_BITS_ENTRY(
3538           KD.compute_pgm_rsrc2,
3539           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3540           ValRange);
3541     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3542       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3543                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3544                        Val, ValRange);
3545     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3546       PARSE_BITS_ENTRY(
3547           KD.compute_pgm_rsrc2,
3548           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3549           ValRange);
3550     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3551       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3552                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3553                        Val, ValRange);
3554     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3555       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3556                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3557                        Val, ValRange);
3558     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3559       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3560                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3561                        Val, ValRange);
3562     } else if (ID == ".amdhsa_exception_int_div_zero") {
3563       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3564                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3565                        Val, ValRange);
3566     } else {
3567       return getParser().Error(IDRange.Start,
3568                                "unknown .amdhsa_kernel directive", IDRange);
3569     }
3570 
3571 #undef PARSE_BITS_ENTRY
3572   }
3573 
3574   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3575     return TokError(".amdhsa_next_free_vgpr directive is required");
3576 
3577   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3578     return TokError(".amdhsa_next_free_sgpr directive is required");
3579 
3580   unsigned VGPRBlocks;
3581   unsigned SGPRBlocks;
3582   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3583                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3584                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3585                          SGPRBlocks))
3586     return true;
3587 
3588   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3589           VGPRBlocks))
3590     return OutOfRangeError(VGPRRange);
3591   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3592                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3593 
3594   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3595           SGPRBlocks))
3596     return OutOfRangeError(SGPRRange);
3597   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3598                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3599                   SGPRBlocks);
3600 
3601   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3602     return TokError("too many user SGPRs enabled");
3603   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3604                   UserSGPRCount);
3605 
3606   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3607       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3608       ReserveFlatScr, ReserveXNACK);
3609   return false;
3610 }
3611 
3612 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3613   uint32_t Major;
3614   uint32_t Minor;
3615 
3616   if (ParseDirectiveMajorMinor(Major, Minor))
3617     return true;
3618 
3619   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3620   return false;
3621 }
3622 
3623 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3624   uint32_t Major;
3625   uint32_t Minor;
3626   uint32_t Stepping;
3627   StringRef VendorName;
3628   StringRef ArchName;
3629 
3630   // If this directive has no arguments, then use the ISA version for the
3631   // targeted GPU.
3632   if (getLexer().is(AsmToken::EndOfStatement)) {
3633     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3634     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3635                                                       ISA.Stepping,
3636                                                       "AMD", "AMDGPU");
3637     return false;
3638   }
3639 
3640   if (ParseDirectiveMajorMinor(Major, Minor))
3641     return true;
3642 
3643   if (getLexer().isNot(AsmToken::Comma))
3644     return TokError("stepping version number required, comma expected");
3645   Lex();
3646 
3647   if (ParseAsAbsoluteExpression(Stepping))
3648     return TokError("invalid stepping version");
3649 
3650   if (getLexer().isNot(AsmToken::Comma))
3651     return TokError("vendor name required, comma expected");
3652   Lex();
3653 
3654   if (getLexer().isNot(AsmToken::String))
3655     return TokError("invalid vendor name");
3656 
3657   VendorName = getLexer().getTok().getStringContents();
3658   Lex();
3659 
3660   if (getLexer().isNot(AsmToken::Comma))
3661     return TokError("arch name required, comma expected");
3662   Lex();
3663 
3664   if (getLexer().isNot(AsmToken::String))
3665     return TokError("invalid arch name");
3666 
3667   ArchName = getLexer().getTok().getStringContents();
3668   Lex();
3669 
3670   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3671                                                     VendorName, ArchName);
3672   return false;
3673 }
3674 
3675 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3676                                                amd_kernel_code_t &Header) {
3677   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3678   // assembly for backwards compatibility.
3679   if (ID == "max_scratch_backing_memory_byte_size") {
3680     Parser.eatToEndOfStatement();
3681     return false;
3682   }
3683 
3684   SmallString<40> ErrStr;
3685   raw_svector_ostream Err(ErrStr);
3686   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3687     return TokError(Err.str());
3688   }
3689   Lex();
3690 
3691   if (ID == "enable_wavefront_size32") {
3692     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
3693       if (!isGFX10())
3694         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
3695       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3696         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
3697     } else {
3698       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3699         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
3700     }
3701   }
3702 
3703   if (ID == "wavefront_size") {
3704     if (Header.wavefront_size == 5) {
3705       if (!isGFX10())
3706         return TokError("wavefront_size=5 is only allowed on GFX10+");
3707       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3708         return TokError("wavefront_size=5 requires +WavefrontSize32");
3709     } else if (Header.wavefront_size == 6) {
3710       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3711         return TokError("wavefront_size=6 requires +WavefrontSize64");
3712     }
3713   }
3714 
3715   if (ID == "enable_wgp_mode") {
3716     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
3717       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3718   }
3719 
3720   if (ID == "enable_mem_ordered") {
3721     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
3722       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3723   }
3724 
3725   if (ID == "enable_fwd_progress") {
3726     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
3727       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3728   }
3729 
3730   return false;
3731 }
3732 
3733 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3734   amd_kernel_code_t Header;
3735   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3736 
3737   while (true) {
3738     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3739     // will set the current token to EndOfStatement.
3740     while(getLexer().is(AsmToken::EndOfStatement))
3741       Lex();
3742 
3743     if (getLexer().isNot(AsmToken::Identifier))
3744       return TokError("expected value identifier or .end_amd_kernel_code_t");
3745 
3746     StringRef ID = getLexer().getTok().getIdentifier();
3747     Lex();
3748 
3749     if (ID == ".end_amd_kernel_code_t")
3750       break;
3751 
3752     if (ParseAMDKernelCodeTValue(ID, Header))
3753       return true;
3754   }
3755 
3756   getTargetStreamer().EmitAMDKernelCodeT(Header);
3757 
3758   return false;
3759 }
3760 
3761 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3762   if (getLexer().isNot(AsmToken::Identifier))
3763     return TokError("expected symbol name");
3764 
3765   StringRef KernelName = Parser.getTok().getString();
3766 
3767   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3768                                            ELF::STT_AMDGPU_HSA_KERNEL);
3769   Lex();
3770   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3771     KernelScope.initialize(getContext());
3772   return false;
3773 }
3774 
3775 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3776   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3777     return Error(getParser().getTok().getLoc(),
3778                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3779                  "architectures");
3780   }
3781 
3782   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3783 
3784   std::string ISAVersionStringFromSTI;
3785   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3786   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3787 
3788   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3789     return Error(getParser().getTok().getLoc(),
3790                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3791                  "arguments specified through the command line");
3792   }
3793 
3794   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3795   Lex();
3796 
3797   return false;
3798 }
3799 
3800 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3801   const char *AssemblerDirectiveBegin;
3802   const char *AssemblerDirectiveEnd;
3803   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3804       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3805           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3806                             HSAMD::V3::AssemblerDirectiveEnd)
3807           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3808                             HSAMD::AssemblerDirectiveEnd);
3809 
3810   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3811     return Error(getParser().getTok().getLoc(),
3812                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
3813                  "not available on non-amdhsa OSes")).str());
3814   }
3815 
3816   std::string HSAMetadataString;
3817   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
3818                           HSAMetadataString))
3819     return true;
3820 
3821   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3822     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3823       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3824   } else {
3825     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3826       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3827   }
3828 
3829   return false;
3830 }
3831 
3832 /// Common code to parse out a block of text (typically YAML) between start and
3833 /// end directives.
3834 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
3835                                           const char *AssemblerDirectiveEnd,
3836                                           std::string &CollectString) {
3837 
3838   raw_string_ostream CollectStream(CollectString);
3839 
3840   getLexer().setSkipSpace(false);
3841 
3842   bool FoundEnd = false;
3843   while (!getLexer().is(AsmToken::Eof)) {
3844     while (getLexer().is(AsmToken::Space)) {
3845       CollectStream << getLexer().getTok().getString();
3846       Lex();
3847     }
3848 
3849     if (getLexer().is(AsmToken::Identifier)) {
3850       StringRef ID = getLexer().getTok().getIdentifier();
3851       if (ID == AssemblerDirectiveEnd) {
3852         Lex();
3853         FoundEnd = true;
3854         break;
3855       }
3856     }
3857 
3858     CollectStream << Parser.parseStringToEndOfStatement()
3859                   << getContext().getAsmInfo()->getSeparatorString();
3860 
3861     Parser.eatToEndOfStatement();
3862   }
3863 
3864   getLexer().setSkipSpace(true);
3865 
3866   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3867     return TokError(Twine("expected directive ") +
3868                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
3869   }
3870 
3871   CollectStream.flush();
3872   return false;
3873 }
3874 
3875 /// Parse the assembler directive for new MsgPack-format PAL metadata.
3876 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
3877   std::string String;
3878   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
3879                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
3880     return true;
3881 
3882   auto PALMetadata = getTargetStreamer().getPALMetadata();
3883   if (!PALMetadata->setFromString(String))
3884     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
3885   return false;
3886 }
3887 
3888 /// Parse the assembler directive for old linear-format PAL metadata.
3889 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3890   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3891     return Error(getParser().getTok().getLoc(),
3892                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3893                  "not available on non-amdpal OSes")).str());
3894   }
3895 
3896   auto PALMetadata = getTargetStreamer().getPALMetadata();
3897   PALMetadata->setLegacy();
3898   for (;;) {
3899     uint32_t Key, Value;
3900     if (ParseAsAbsoluteExpression(Key)) {
3901       return TokError(Twine("invalid value in ") +
3902                       Twine(PALMD::AssemblerDirective));
3903     }
3904     if (getLexer().isNot(AsmToken::Comma)) {
3905       return TokError(Twine("expected an even number of values in ") +
3906                       Twine(PALMD::AssemblerDirective));
3907     }
3908     Lex();
3909     if (ParseAsAbsoluteExpression(Value)) {
3910       return TokError(Twine("invalid value in ") +
3911                       Twine(PALMD::AssemblerDirective));
3912     }
3913     PALMetadata->setRegister(Key, Value);
3914     if (getLexer().isNot(AsmToken::Comma))
3915       break;
3916     Lex();
3917   }
3918   return false;
3919 }
3920 
3921 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3922   StringRef IDVal = DirectiveID.getString();
3923 
3924   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3925     if (IDVal == ".amdgcn_target")
3926       return ParseDirectiveAMDGCNTarget();
3927 
3928     if (IDVal == ".amdhsa_kernel")
3929       return ParseDirectiveAMDHSAKernel();
3930 
3931     // TODO: Restructure/combine with PAL metadata directive.
3932     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3933       return ParseDirectiveHSAMetadata();
3934   } else {
3935     if (IDVal == ".hsa_code_object_version")
3936       return ParseDirectiveHSACodeObjectVersion();
3937 
3938     if (IDVal == ".hsa_code_object_isa")
3939       return ParseDirectiveHSACodeObjectISA();
3940 
3941     if (IDVal == ".amd_kernel_code_t")
3942       return ParseDirectiveAMDKernelCodeT();
3943 
3944     if (IDVal == ".amdgpu_hsa_kernel")
3945       return ParseDirectiveAMDGPUHsaKernel();
3946 
3947     if (IDVal == ".amd_amdgpu_isa")
3948       return ParseDirectiveISAVersion();
3949 
3950     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3951       return ParseDirectiveHSAMetadata();
3952   }
3953 
3954   if (IDVal == PALMD::AssemblerDirectiveBegin)
3955     return ParseDirectivePALMetadataBegin();
3956 
3957   if (IDVal == PALMD::AssemblerDirective)
3958     return ParseDirectivePALMetadata();
3959 
3960   return true;
3961 }
3962 
3963 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
3964                                            unsigned RegNo) const {
3965 
3966   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
3967        R.isValid(); ++R) {
3968     if (*R == RegNo)
3969       return isGFX9() || isGFX10();
3970   }
3971 
3972   // GFX10 has 2 more SGPRs 104 and 105.
3973   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
3974        R.isValid(); ++R) {
3975     if (*R == RegNo)
3976       return hasSGPR104_SGPR105();
3977   }
3978 
3979   switch (RegNo) {
3980   case AMDGPU::SRC_SHARED_BASE:
3981   case AMDGPU::SRC_SHARED_LIMIT:
3982   case AMDGPU::SRC_PRIVATE_BASE:
3983   case AMDGPU::SRC_PRIVATE_LIMIT:
3984   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
3985     return !isCI() && !isSI() && !isVI();
3986   case AMDGPU::TBA:
3987   case AMDGPU::TBA_LO:
3988   case AMDGPU::TBA_HI:
3989   case AMDGPU::TMA:
3990   case AMDGPU::TMA_LO:
3991   case AMDGPU::TMA_HI:
3992     return !isGFX9() && !isGFX10();
3993   case AMDGPU::XNACK_MASK:
3994   case AMDGPU::XNACK_MASK_LO:
3995   case AMDGPU::XNACK_MASK_HI:
3996     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
3997   case AMDGPU::SGPR_NULL:
3998     return isGFX10();
3999   default:
4000     break;
4001   }
4002 
4003   if (isCI())
4004     return true;
4005 
4006   if (isSI() || isGFX10()) {
4007     // No flat_scr on SI.
4008     // On GFX10 flat scratch is not a valid register operand and can only be
4009     // accessed with s_setreg/s_getreg.
4010     switch (RegNo) {
4011     case AMDGPU::FLAT_SCR:
4012     case AMDGPU::FLAT_SCR_LO:
4013     case AMDGPU::FLAT_SCR_HI:
4014       return false;
4015     default:
4016       return true;
4017     }
4018   }
4019 
4020   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4021   // SI/CI have.
4022   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4023        R.isValid(); ++R) {
4024     if (*R == RegNo)
4025       return hasSGPR102_SGPR103();
4026   }
4027 
4028   return true;
4029 }
4030 
4031 OperandMatchResultTy
4032 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4033                               OperandMode Mode) {
4034   // Try to parse with a custom parser
4035   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4036 
4037   // If we successfully parsed the operand or if there as an error parsing,
4038   // we are done.
4039   //
4040   // If we are parsing after we reach EndOfStatement then this means we
4041   // are appending default values to the Operands list.  This is only done
4042   // by custom parser, so we shouldn't continue on to the generic parsing.
4043   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4044       getLexer().is(AsmToken::EndOfStatement))
4045     return ResTy;
4046 
4047   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4048     unsigned Prefix = Operands.size();
4049     SMLoc LBraceLoc = getTok().getLoc();
4050     Parser.Lex(); // eat the '['
4051 
4052     for (;;) {
4053       ResTy = parseReg(Operands);
4054       if (ResTy != MatchOperand_Success)
4055         return ResTy;
4056 
4057       if (getLexer().is(AsmToken::RBrac))
4058         break;
4059 
4060       if (getLexer().isNot(AsmToken::Comma))
4061         return MatchOperand_ParseFail;
4062       Parser.Lex();
4063     }
4064 
4065     if (Operands.size() - Prefix > 1) {
4066       Operands.insert(Operands.begin() + Prefix,
4067                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4068       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4069                                                     getTok().getLoc()));
4070     }
4071 
4072     Parser.Lex(); // eat the ']'
4073     return MatchOperand_Success;
4074   }
4075 
4076   return parseRegOrImm(Operands);
4077 }
4078 
4079 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4080   // Clear any forced encodings from the previous instruction.
4081   setForcedEncodingSize(0);
4082   setForcedDPP(false);
4083   setForcedSDWA(false);
4084 
4085   if (Name.endswith("_e64")) {
4086     setForcedEncodingSize(64);
4087     return Name.substr(0, Name.size() - 4);
4088   } else if (Name.endswith("_e32")) {
4089     setForcedEncodingSize(32);
4090     return Name.substr(0, Name.size() - 4);
4091   } else if (Name.endswith("_dpp")) {
4092     setForcedDPP(true);
4093     return Name.substr(0, Name.size() - 4);
4094   } else if (Name.endswith("_sdwa")) {
4095     setForcedSDWA(true);
4096     return Name.substr(0, Name.size() - 5);
4097   }
4098   return Name;
4099 }
4100 
4101 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4102                                        StringRef Name,
4103                                        SMLoc NameLoc, OperandVector &Operands) {
4104   // Add the instruction mnemonic
4105   Name = parseMnemonicSuffix(Name);
4106   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4107 
4108   bool IsMIMG = Name.startswith("image_");
4109 
4110   while (!getLexer().is(AsmToken::EndOfStatement)) {
4111     OperandMode Mode = OperandMode_Default;
4112     if (IsMIMG && isGFX10() && Operands.size() == 2)
4113       Mode = OperandMode_NSA;
4114     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4115 
4116     // Eat the comma or space if there is one.
4117     if (getLexer().is(AsmToken::Comma))
4118       Parser.Lex();
4119 
4120     switch (Res) {
4121       case MatchOperand_Success: break;
4122       case MatchOperand_ParseFail:
4123         // FIXME: use real operand location rather than the current location.
4124         Error(getLexer().getLoc(), "failed parsing operand.");
4125         while (!getLexer().is(AsmToken::EndOfStatement)) {
4126           Parser.Lex();
4127         }
4128         return true;
4129       case MatchOperand_NoMatch:
4130         // FIXME: use real operand location rather than the current location.
4131         Error(getLexer().getLoc(), "not a valid operand.");
4132         while (!getLexer().is(AsmToken::EndOfStatement)) {
4133           Parser.Lex();
4134         }
4135         return true;
4136     }
4137   }
4138 
4139   return false;
4140 }
4141 
4142 //===----------------------------------------------------------------------===//
4143 // Utility functions
4144 //===----------------------------------------------------------------------===//
4145 
4146 OperandMatchResultTy
4147 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4148 
4149   if (!trySkipId(Prefix, AsmToken::Colon))
4150     return MatchOperand_NoMatch;
4151 
4152   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4153 }
4154 
4155 OperandMatchResultTy
4156 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4157                                     AMDGPUOperand::ImmTy ImmTy,
4158                                     bool (*ConvertResult)(int64_t&)) {
4159   SMLoc S = getLoc();
4160   int64_t Value = 0;
4161 
4162   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4163   if (Res != MatchOperand_Success)
4164     return Res;
4165 
4166   if (ConvertResult && !ConvertResult(Value)) {
4167     Error(S, "invalid " + StringRef(Prefix) + " value.");
4168   }
4169 
4170   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4171   return MatchOperand_Success;
4172 }
4173 
4174 OperandMatchResultTy
4175 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4176                                              OperandVector &Operands,
4177                                              AMDGPUOperand::ImmTy ImmTy,
4178                                              bool (*ConvertResult)(int64_t&)) {
4179   SMLoc S = getLoc();
4180   if (!trySkipId(Prefix, AsmToken::Colon))
4181     return MatchOperand_NoMatch;
4182 
4183   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4184     return MatchOperand_ParseFail;
4185 
4186   unsigned Val = 0;
4187   const unsigned MaxSize = 4;
4188 
4189   // FIXME: How to verify the number of elements matches the number of src
4190   // operands?
4191   for (int I = 0; ; ++I) {
4192     int64_t Op;
4193     SMLoc Loc = getLoc();
4194     if (!parseExpr(Op))
4195       return MatchOperand_ParseFail;
4196 
4197     if (Op != 0 && Op != 1) {
4198       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4199       return MatchOperand_ParseFail;
4200     }
4201 
4202     Val |= (Op << I);
4203 
4204     if (trySkipToken(AsmToken::RBrac))
4205       break;
4206 
4207     if (I + 1 == MaxSize) {
4208       Error(getLoc(), "expected a closing square bracket");
4209       return MatchOperand_ParseFail;
4210     }
4211 
4212     if (!skipToken(AsmToken::Comma, "expected a comma"))
4213       return MatchOperand_ParseFail;
4214   }
4215 
4216   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4217   return MatchOperand_Success;
4218 }
4219 
4220 OperandMatchResultTy
4221 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4222                                AMDGPUOperand::ImmTy ImmTy) {
4223   int64_t Bit = 0;
4224   SMLoc S = Parser.getTok().getLoc();
4225 
4226   // We are at the end of the statement, and this is a default argument, so
4227   // use a default value.
4228   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4229     switch(getLexer().getKind()) {
4230       case AsmToken::Identifier: {
4231         StringRef Tok = Parser.getTok().getString();
4232         if (Tok == Name) {
4233           if (Tok == "r128" && isGFX9())
4234             Error(S, "r128 modifier is not supported on this GPU");
4235           if (Tok == "a16" && !isGFX9())
4236             Error(S, "a16 modifier is not supported on this GPU");
4237           Bit = 1;
4238           Parser.Lex();
4239         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4240           Bit = 0;
4241           Parser.Lex();
4242         } else {
4243           return MatchOperand_NoMatch;
4244         }
4245         break;
4246       }
4247       default:
4248         return MatchOperand_NoMatch;
4249     }
4250   }
4251 
4252   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4253     return MatchOperand_ParseFail;
4254 
4255   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4256   return MatchOperand_Success;
4257 }
4258 
4259 static void addOptionalImmOperand(
4260   MCInst& Inst, const OperandVector& Operands,
4261   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4262   AMDGPUOperand::ImmTy ImmT,
4263   int64_t Default = 0) {
4264   auto i = OptionalIdx.find(ImmT);
4265   if (i != OptionalIdx.end()) {
4266     unsigned Idx = i->second;
4267     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4268   } else {
4269     Inst.addOperand(MCOperand::createImm(Default));
4270   }
4271 }
4272 
4273 OperandMatchResultTy
4274 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4275   if (getLexer().isNot(AsmToken::Identifier)) {
4276     return MatchOperand_NoMatch;
4277   }
4278   StringRef Tok = Parser.getTok().getString();
4279   if (Tok != Prefix) {
4280     return MatchOperand_NoMatch;
4281   }
4282 
4283   Parser.Lex();
4284   if (getLexer().isNot(AsmToken::Colon)) {
4285     return MatchOperand_ParseFail;
4286   }
4287 
4288   Parser.Lex();
4289   if (getLexer().isNot(AsmToken::Identifier)) {
4290     return MatchOperand_ParseFail;
4291   }
4292 
4293   Value = Parser.getTok().getString();
4294   return MatchOperand_Success;
4295 }
4296 
4297 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4298 // values to live in a joint format operand in the MCInst encoding.
4299 OperandMatchResultTy
4300 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4301   SMLoc S = Parser.getTok().getLoc();
4302   int64_t Dfmt = 0, Nfmt = 0;
4303   // dfmt and nfmt can appear in either order, and each is optional.
4304   bool GotDfmt = false, GotNfmt = false;
4305   while (!GotDfmt || !GotNfmt) {
4306     if (!GotDfmt) {
4307       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4308       if (Res != MatchOperand_NoMatch) {
4309         if (Res != MatchOperand_Success)
4310           return Res;
4311         if (Dfmt >= 16) {
4312           Error(Parser.getTok().getLoc(), "out of range dfmt");
4313           return MatchOperand_ParseFail;
4314         }
4315         GotDfmt = true;
4316         Parser.Lex();
4317         continue;
4318       }
4319     }
4320     if (!GotNfmt) {
4321       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4322       if (Res != MatchOperand_NoMatch) {
4323         if (Res != MatchOperand_Success)
4324           return Res;
4325         if (Nfmt >= 8) {
4326           Error(Parser.getTok().getLoc(), "out of range nfmt");
4327           return MatchOperand_ParseFail;
4328         }
4329         GotNfmt = true;
4330         Parser.Lex();
4331         continue;
4332       }
4333     }
4334     break;
4335   }
4336   if (!GotDfmt && !GotNfmt)
4337     return MatchOperand_NoMatch;
4338   auto Format = Dfmt | Nfmt << 4;
4339   Operands.push_back(
4340       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4341   return MatchOperand_Success;
4342 }
4343 
4344 //===----------------------------------------------------------------------===//
4345 // ds
4346 //===----------------------------------------------------------------------===//
4347 
4348 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4349                                     const OperandVector &Operands) {
4350   OptionalImmIndexMap OptionalIdx;
4351 
4352   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4353     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4354 
4355     // Add the register arguments
4356     if (Op.isReg()) {
4357       Op.addRegOperands(Inst, 1);
4358       continue;
4359     }
4360 
4361     // Handle optional arguments
4362     OptionalIdx[Op.getImmTy()] = i;
4363   }
4364 
4365   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4366   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4367   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4368 
4369   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4370 }
4371 
4372 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4373                                 bool IsGdsHardcoded) {
4374   OptionalImmIndexMap OptionalIdx;
4375 
4376   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4377     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4378 
4379     // Add the register arguments
4380     if (Op.isReg()) {
4381       Op.addRegOperands(Inst, 1);
4382       continue;
4383     }
4384 
4385     if (Op.isToken() && Op.getToken() == "gds") {
4386       IsGdsHardcoded = true;
4387       continue;
4388     }
4389 
4390     // Handle optional arguments
4391     OptionalIdx[Op.getImmTy()] = i;
4392   }
4393 
4394   AMDGPUOperand::ImmTy OffsetType =
4395     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4396      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4397      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4398                                                       AMDGPUOperand::ImmTyOffset;
4399 
4400   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4401 
4402   if (!IsGdsHardcoded) {
4403     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4404   }
4405   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4406 }
4407 
4408 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4409   OptionalImmIndexMap OptionalIdx;
4410 
4411   unsigned OperandIdx[4];
4412   unsigned EnMask = 0;
4413   int SrcIdx = 0;
4414 
4415   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4416     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4417 
4418     // Add the register arguments
4419     if (Op.isReg()) {
4420       assert(SrcIdx < 4);
4421       OperandIdx[SrcIdx] = Inst.size();
4422       Op.addRegOperands(Inst, 1);
4423       ++SrcIdx;
4424       continue;
4425     }
4426 
4427     if (Op.isOff()) {
4428       assert(SrcIdx < 4);
4429       OperandIdx[SrcIdx] = Inst.size();
4430       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4431       ++SrcIdx;
4432       continue;
4433     }
4434 
4435     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4436       Op.addImmOperands(Inst, 1);
4437       continue;
4438     }
4439 
4440     if (Op.isToken() && Op.getToken() == "done")
4441       continue;
4442 
4443     // Handle optional arguments
4444     OptionalIdx[Op.getImmTy()] = i;
4445   }
4446 
4447   assert(SrcIdx == 4);
4448 
4449   bool Compr = false;
4450   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4451     Compr = true;
4452     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4453     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4454     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4455   }
4456 
4457   for (auto i = 0; i < SrcIdx; ++i) {
4458     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4459       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4460     }
4461   }
4462 
4463   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4464   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4465 
4466   Inst.addOperand(MCOperand::createImm(EnMask));
4467 }
4468 
4469 //===----------------------------------------------------------------------===//
4470 // s_waitcnt
4471 //===----------------------------------------------------------------------===//
4472 
4473 static bool
4474 encodeCnt(
4475   const AMDGPU::IsaVersion ISA,
4476   int64_t &IntVal,
4477   int64_t CntVal,
4478   bool Saturate,
4479   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4480   unsigned (*decode)(const IsaVersion &Version, unsigned))
4481 {
4482   bool Failed = false;
4483 
4484   IntVal = encode(ISA, IntVal, CntVal);
4485   if (CntVal != decode(ISA, IntVal)) {
4486     if (Saturate) {
4487       IntVal = encode(ISA, IntVal, -1);
4488     } else {
4489       Failed = true;
4490     }
4491   }
4492   return Failed;
4493 }
4494 
4495 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4496 
4497   SMLoc CntLoc = getLoc();
4498   StringRef CntName = getTokenStr();
4499 
4500   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4501       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4502     return false;
4503 
4504   int64_t CntVal;
4505   SMLoc ValLoc = getLoc();
4506   if (!parseExpr(CntVal))
4507     return false;
4508 
4509   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4510 
4511   bool Failed = true;
4512   bool Sat = CntName.endswith("_sat");
4513 
4514   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4515     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4516   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4517     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4518   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4519     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4520   } else {
4521     Error(CntLoc, "invalid counter name " + CntName);
4522     return false;
4523   }
4524 
4525   if (Failed) {
4526     Error(ValLoc, "too large value for " + CntName);
4527     return false;
4528   }
4529 
4530   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4531     return false;
4532 
4533   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4534     if (isToken(AsmToken::EndOfStatement)) {
4535       Error(getLoc(), "expected a counter name");
4536       return false;
4537     }
4538   }
4539 
4540   return true;
4541 }
4542 
4543 OperandMatchResultTy
4544 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4545   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4546   int64_t Waitcnt = getWaitcntBitMask(ISA);
4547   SMLoc S = getLoc();
4548 
4549   // If parse failed, do not return error code
4550   // to avoid excessive error messages.
4551   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4552     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4553   } else {
4554     parseExpr(Waitcnt);
4555   }
4556 
4557   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4558   return MatchOperand_Success;
4559 }
4560 
4561 bool
4562 AMDGPUOperand::isSWaitCnt() const {
4563   return isImm();
4564 }
4565 
4566 //===----------------------------------------------------------------------===//
4567 // hwreg
4568 //===----------------------------------------------------------------------===//
4569 
4570 bool
4571 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4572                                 int64_t &Offset,
4573                                 int64_t &Width) {
4574   using namespace llvm::AMDGPU::Hwreg;
4575 
4576   // The register may be specified by name or using a numeric code
4577   if (isToken(AsmToken::Identifier) &&
4578       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4579     HwReg.IsSymbolic = true;
4580     lex(); // skip message name
4581   } else if (!parseExpr(HwReg.Id)) {
4582     return false;
4583   }
4584 
4585   if (trySkipToken(AsmToken::RParen))
4586     return true;
4587 
4588   // parse optional params
4589   return
4590     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
4591     parseExpr(Offset) &&
4592     skipToken(AsmToken::Comma, "expected a comma") &&
4593     parseExpr(Width) &&
4594     skipToken(AsmToken::RParen, "expected a closing parenthesis");
4595 }
4596 
4597 void
4598 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
4599                                const int64_t Offset,
4600                                const int64_t Width,
4601                                const SMLoc Loc) {
4602 
4603   using namespace llvm::AMDGPU::Hwreg;
4604 
4605   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
4606     Error(Loc, "specified hardware register is not supported on this GPU");
4607   } else if (!isValidHwreg(HwReg.Id)) {
4608     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
4609   } else if (!isValidHwregOffset(Offset)) {
4610     Error(Loc, "invalid bit offset: only 5-bit values are legal");
4611   } else if (!isValidHwregWidth(Width)) {
4612     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
4613   }
4614 }
4615 
4616 OperandMatchResultTy
4617 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4618   using namespace llvm::AMDGPU::Hwreg;
4619 
4620   int64_t ImmVal = 0;
4621   SMLoc Loc = getLoc();
4622 
4623   // If parse failed, do not return error code
4624   // to avoid excessive error messages.
4625   if (trySkipId("hwreg", AsmToken::LParen)) {
4626     OperandInfoTy HwReg(ID_UNKNOWN_);
4627     int64_t Offset = OFFSET_DEFAULT_;
4628     int64_t Width = WIDTH_DEFAULT_;
4629     if (parseHwregBody(HwReg, Offset, Width)) {
4630       validateHwreg(HwReg, Offset, Width, Loc);
4631       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
4632     }
4633   } else if (parseExpr(ImmVal)) {
4634     if (ImmVal < 0 || !isUInt<16>(ImmVal))
4635       Error(Loc, "invalid immediate: only 16-bit values are legal");
4636   }
4637 
4638   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
4639   return MatchOperand_Success;
4640 }
4641 
4642 bool AMDGPUOperand::isHwreg() const {
4643   return isImmTy(ImmTyHwreg);
4644 }
4645 
4646 //===----------------------------------------------------------------------===//
4647 // sendmsg
4648 //===----------------------------------------------------------------------===//
4649 
4650 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
4651   using namespace llvm::AMDGPU::SendMsg;
4652 
4653   if (Parser.getTok().getString() != "sendmsg")
4654     return true;
4655   Parser.Lex();
4656 
4657   if (getLexer().isNot(AsmToken::LParen))
4658     return true;
4659   Parser.Lex();
4660 
4661   if (getLexer().is(AsmToken::Identifier)) {
4662     Msg.IsSymbolic = true;
4663     Msg.Id = ID_UNKNOWN_;
4664     const std::string tok = Parser.getTok().getString();
4665     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
4666       switch(i) {
4667         default: continue; // Omit gaps.
4668         case ID_GS_ALLOC_REQ:
4669           if (isSI() || isCI() || isVI())
4670             continue;
4671           break;
4672         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:
4673         case ID_SYSMSG: break;
4674       }
4675       if (tok == IdSymbolic[i]) {
4676         Msg.Id = i;
4677         break;
4678       }
4679     }
4680     Parser.Lex();
4681   } else {
4682     Msg.IsSymbolic = false;
4683     if (getLexer().isNot(AsmToken::Integer))
4684       return true;
4685     if (getParser().parseAbsoluteExpression(Msg.Id))
4686       return true;
4687     if (getLexer().is(AsmToken::Integer))
4688       if (getParser().parseAbsoluteExpression(Msg.Id))
4689         Msg.Id = ID_UNKNOWN_;
4690   }
4691   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
4692     return false;
4693 
4694   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
4695     if (getLexer().isNot(AsmToken::RParen))
4696       return true;
4697     Parser.Lex();
4698     return false;
4699   }
4700 
4701   if (getLexer().isNot(AsmToken::Comma))
4702     return true;
4703   Parser.Lex();
4704 
4705   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
4706   Operation.Id = ID_UNKNOWN_;
4707   if (getLexer().is(AsmToken::Identifier)) {
4708     Operation.IsSymbolic = true;
4709     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
4710     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
4711     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
4712     const StringRef Tok = Parser.getTok().getString();
4713     for (int i = F; i < L; ++i) {
4714       if (Tok == S[i]) {
4715         Operation.Id = i;
4716         break;
4717       }
4718     }
4719     Parser.Lex();
4720   } else {
4721     Operation.IsSymbolic = false;
4722     if (getLexer().isNot(AsmToken::Integer))
4723       return true;
4724     if (getParser().parseAbsoluteExpression(Operation.Id))
4725       return true;
4726   }
4727 
4728   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4729     // Stream id is optional.
4730     if (getLexer().is(AsmToken::RParen)) {
4731       Parser.Lex();
4732       return false;
4733     }
4734 
4735     if (getLexer().isNot(AsmToken::Comma))
4736       return true;
4737     Parser.Lex();
4738 
4739     if (getLexer().isNot(AsmToken::Integer))
4740       return true;
4741     if (getParser().parseAbsoluteExpression(StreamId))
4742       return true;
4743   }
4744 
4745   if (getLexer().isNot(AsmToken::RParen))
4746     return true;
4747   Parser.Lex();
4748   return false;
4749 }
4750 
4751 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
4752   if (getLexer().getKind() != AsmToken::Identifier)
4753     return MatchOperand_NoMatch;
4754 
4755   StringRef Str = Parser.getTok().getString();
4756   int Slot = StringSwitch<int>(Str)
4757     .Case("p10", 0)
4758     .Case("p20", 1)
4759     .Case("p0", 2)
4760     .Default(-1);
4761 
4762   SMLoc S = Parser.getTok().getLoc();
4763   if (Slot == -1)
4764     return MatchOperand_ParseFail;
4765 
4766   Parser.Lex();
4767   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
4768                                               AMDGPUOperand::ImmTyInterpSlot));
4769   return MatchOperand_Success;
4770 }
4771 
4772 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
4773   if (getLexer().getKind() != AsmToken::Identifier)
4774     return MatchOperand_NoMatch;
4775 
4776   StringRef Str = Parser.getTok().getString();
4777   if (!Str.startswith("attr"))
4778     return MatchOperand_NoMatch;
4779 
4780   StringRef Chan = Str.take_back(2);
4781   int AttrChan = StringSwitch<int>(Chan)
4782     .Case(".x", 0)
4783     .Case(".y", 1)
4784     .Case(".z", 2)
4785     .Case(".w", 3)
4786     .Default(-1);
4787   if (AttrChan == -1)
4788     return MatchOperand_ParseFail;
4789 
4790   Str = Str.drop_back(2).drop_front(4);
4791 
4792   uint8_t Attr;
4793   if (Str.getAsInteger(10, Attr))
4794     return MatchOperand_ParseFail;
4795 
4796   SMLoc S = Parser.getTok().getLoc();
4797   Parser.Lex();
4798   if (Attr > 63) {
4799     Error(S, "out of bounds attr");
4800     return MatchOperand_Success;
4801   }
4802 
4803   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4804 
4805   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4806                                               AMDGPUOperand::ImmTyInterpAttr));
4807   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4808                                               AMDGPUOperand::ImmTyAttrChan));
4809   return MatchOperand_Success;
4810 }
4811 
4812 void AMDGPUAsmParser::errorExpTgt() {
4813   Error(Parser.getTok().getLoc(), "invalid exp target");
4814 }
4815 
4816 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4817                                                       uint8_t &Val) {
4818   if (Str == "null") {
4819     Val = 9;
4820     return MatchOperand_Success;
4821   }
4822 
4823   if (Str.startswith("mrt")) {
4824     Str = Str.drop_front(3);
4825     if (Str == "z") { // == mrtz
4826       Val = 8;
4827       return MatchOperand_Success;
4828     }
4829 
4830     if (Str.getAsInteger(10, Val))
4831       return MatchOperand_ParseFail;
4832 
4833     if (Val > 7)
4834       errorExpTgt();
4835 
4836     return MatchOperand_Success;
4837   }
4838 
4839   if (Str.startswith("pos")) {
4840     Str = Str.drop_front(3);
4841     if (Str.getAsInteger(10, Val))
4842       return MatchOperand_ParseFail;
4843 
4844     if (Val > 4 || (Val == 4 && !isGFX10()))
4845       errorExpTgt();
4846 
4847     Val += 12;
4848     return MatchOperand_Success;
4849   }
4850 
4851   if (isGFX10() && Str == "prim") {
4852     Val = 20;
4853     return MatchOperand_Success;
4854   }
4855 
4856   if (Str.startswith("param")) {
4857     Str = Str.drop_front(5);
4858     if (Str.getAsInteger(10, Val))
4859       return MatchOperand_ParseFail;
4860 
4861     if (Val >= 32)
4862       errorExpTgt();
4863 
4864     Val += 32;
4865     return MatchOperand_Success;
4866   }
4867 
4868   if (Str.startswith("invalid_target_")) {
4869     Str = Str.drop_front(15);
4870     if (Str.getAsInteger(10, Val))
4871       return MatchOperand_ParseFail;
4872 
4873     errorExpTgt();
4874     return MatchOperand_Success;
4875   }
4876 
4877   return MatchOperand_NoMatch;
4878 }
4879 
4880 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4881   uint8_t Val;
4882   StringRef Str = Parser.getTok().getString();
4883 
4884   auto Res = parseExpTgtImpl(Str, Val);
4885   if (Res != MatchOperand_Success)
4886     return Res;
4887 
4888   SMLoc S = Parser.getTok().getLoc();
4889   Parser.Lex();
4890 
4891   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4892                                               AMDGPUOperand::ImmTyExpTgt));
4893   return MatchOperand_Success;
4894 }
4895 
4896 OperandMatchResultTy
4897 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4898   using namespace llvm::AMDGPU::SendMsg;
4899 
4900   int64_t Imm16Val = 0;
4901   SMLoc S = Parser.getTok().getLoc();
4902 
4903   switch(getLexer().getKind()) {
4904   default:
4905     return MatchOperand_NoMatch;
4906   case AsmToken::Integer:
4907     // The operand can be an integer value.
4908     if (getParser().parseAbsoluteExpression(Imm16Val))
4909       return MatchOperand_NoMatch;
4910     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4911       Error(S, "invalid immediate: only 16-bit values are legal");
4912       // Do not return error code, but create an imm operand anyway and proceed
4913       // to the next operand, if any. That avoids unneccessary error messages.
4914     }
4915     break;
4916   case AsmToken::Identifier: {
4917       OperandInfoTy Msg(ID_UNKNOWN_);
4918       OperandInfoTy Operation(OP_UNKNOWN_);
4919       int64_t StreamId = STREAM_ID_DEFAULT_;
4920       if (parseSendMsgConstruct(Msg, Operation, StreamId))
4921         return MatchOperand_ParseFail;
4922       do {
4923         // Validate and encode message ID.
4924         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4925                 || (Msg.Id == ID_GS_ALLOC_REQ && !isSI() && !isCI() && !isVI())
4926                 || Msg.Id == ID_SYSMSG)) {
4927           if (Msg.IsSymbolic)
4928             Error(S, "invalid/unsupported symbolic name of message");
4929           else
4930             Error(S, "invalid/unsupported code of message");
4931           break;
4932         }
4933         Imm16Val = (Msg.Id << ID_SHIFT_);
4934         // Validate and encode operation ID.
4935         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4936           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4937             if (Operation.IsSymbolic)
4938               Error(S, "invalid symbolic name of GS_OP");
4939             else
4940               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4941             break;
4942           }
4943           if (Operation.Id == OP_GS_NOP
4944               && Msg.Id != ID_GS_DONE) {
4945             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
4946             break;
4947           }
4948           Imm16Val |= (Operation.Id << OP_SHIFT_);
4949         }
4950         if (Msg.Id == ID_SYSMSG) {
4951           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
4952             if (Operation.IsSymbolic)
4953               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
4954             else
4955               Error(S, "invalid/unsupported code of SYSMSG_OP");
4956             break;
4957           }
4958           Imm16Val |= (Operation.Id << OP_SHIFT_);
4959         }
4960         // Validate and encode stream ID.
4961         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4962           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
4963             Error(S, "invalid stream id: only 2-bit values are legal");
4964             break;
4965           }
4966           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
4967         }
4968       } while (false);
4969     }
4970     break;
4971   }
4972   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
4973   return MatchOperand_Success;
4974 }
4975 
4976 bool AMDGPUOperand::isSendMsg() const {
4977   return isImmTy(ImmTySendMsg);
4978 }
4979 
4980 //===----------------------------------------------------------------------===//
4981 // parser helpers
4982 //===----------------------------------------------------------------------===//
4983 
4984 bool
4985 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
4986   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
4987 }
4988 
4989 bool
4990 AMDGPUAsmParser::isId(const StringRef Id) const {
4991   return isId(getToken(), Id);
4992 }
4993 
4994 bool
4995 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
4996   return getTokenKind() == Kind;
4997 }
4998 
4999 bool
5000 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5001   if (isId(Id)) {
5002     lex();
5003     return true;
5004   }
5005   return false;
5006 }
5007 
5008 bool
5009 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5010   if (isId(Id) && peekToken().is(Kind)) {
5011     lex();
5012     lex();
5013     return true;
5014   }
5015   return false;
5016 }
5017 
5018 bool
5019 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5020   if (isToken(Kind)) {
5021     lex();
5022     return true;
5023   }
5024   return false;
5025 }
5026 
5027 bool
5028 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5029                            const StringRef ErrMsg) {
5030   if (!trySkipToken(Kind)) {
5031     Error(getLoc(), ErrMsg);
5032     return false;
5033   }
5034   return true;
5035 }
5036 
5037 bool
5038 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5039   return !getParser().parseAbsoluteExpression(Imm);
5040 }
5041 
5042 bool
5043 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5044   if (isToken(AsmToken::String)) {
5045     Val = getToken().getStringContents();
5046     lex();
5047     return true;
5048   } else {
5049     Error(getLoc(), ErrMsg);
5050     return false;
5051   }
5052 }
5053 
5054 AsmToken
5055 AMDGPUAsmParser::getToken() const {
5056   return Parser.getTok();
5057 }
5058 
5059 AsmToken
5060 AMDGPUAsmParser::peekToken() {
5061   return getLexer().peekTok();
5062 }
5063 
5064 void
5065 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5066   auto TokCount = getLexer().peekTokens(Tokens);
5067 
5068   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5069     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5070 }
5071 
5072 AsmToken::TokenKind
5073 AMDGPUAsmParser::getTokenKind() const {
5074   return getLexer().getKind();
5075 }
5076 
5077 SMLoc
5078 AMDGPUAsmParser::getLoc() const {
5079   return getToken().getLoc();
5080 }
5081 
5082 StringRef
5083 AMDGPUAsmParser::getTokenStr() const {
5084   return getToken().getString();
5085 }
5086 
5087 void
5088 AMDGPUAsmParser::lex() {
5089   Parser.Lex();
5090 }
5091 
5092 //===----------------------------------------------------------------------===//
5093 // swizzle
5094 //===----------------------------------------------------------------------===//
5095 
5096 LLVM_READNONE
5097 static unsigned
5098 encodeBitmaskPerm(const unsigned AndMask,
5099                   const unsigned OrMask,
5100                   const unsigned XorMask) {
5101   using namespace llvm::AMDGPU::Swizzle;
5102 
5103   return BITMASK_PERM_ENC |
5104          (AndMask << BITMASK_AND_SHIFT) |
5105          (OrMask  << BITMASK_OR_SHIFT)  |
5106          (XorMask << BITMASK_XOR_SHIFT);
5107 }
5108 
5109 bool
5110 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5111                                       const unsigned MinVal,
5112                                       const unsigned MaxVal,
5113                                       const StringRef ErrMsg) {
5114   for (unsigned i = 0; i < OpNum; ++i) {
5115     if (!skipToken(AsmToken::Comma, "expected a comma")){
5116       return false;
5117     }
5118     SMLoc ExprLoc = Parser.getTok().getLoc();
5119     if (!parseExpr(Op[i])) {
5120       return false;
5121     }
5122     if (Op[i] < MinVal || Op[i] > MaxVal) {
5123       Error(ExprLoc, ErrMsg);
5124       return false;
5125     }
5126   }
5127 
5128   return true;
5129 }
5130 
5131 bool
5132 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5133   using namespace llvm::AMDGPU::Swizzle;
5134 
5135   int64_t Lane[LANE_NUM];
5136   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5137                            "expected a 2-bit lane id")) {
5138     Imm = QUAD_PERM_ENC;
5139     for (unsigned I = 0; I < LANE_NUM; ++I) {
5140       Imm |= Lane[I] << (LANE_SHIFT * I);
5141     }
5142     return true;
5143   }
5144   return false;
5145 }
5146 
5147 bool
5148 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5149   using namespace llvm::AMDGPU::Swizzle;
5150 
5151   SMLoc S = Parser.getTok().getLoc();
5152   int64_t GroupSize;
5153   int64_t LaneIdx;
5154 
5155   if (!parseSwizzleOperands(1, &GroupSize,
5156                             2, 32,
5157                             "group size must be in the interval [2,32]")) {
5158     return false;
5159   }
5160   if (!isPowerOf2_64(GroupSize)) {
5161     Error(S, "group size must be a power of two");
5162     return false;
5163   }
5164   if (parseSwizzleOperands(1, &LaneIdx,
5165                            0, GroupSize - 1,
5166                            "lane id must be in the interval [0,group size - 1]")) {
5167     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5168     return true;
5169   }
5170   return false;
5171 }
5172 
5173 bool
5174 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5175   using namespace llvm::AMDGPU::Swizzle;
5176 
5177   SMLoc S = Parser.getTok().getLoc();
5178   int64_t GroupSize;
5179 
5180   if (!parseSwizzleOperands(1, &GroupSize,
5181       2, 32, "group size must be in the interval [2,32]")) {
5182     return false;
5183   }
5184   if (!isPowerOf2_64(GroupSize)) {
5185     Error(S, "group size must be a power of two");
5186     return false;
5187   }
5188 
5189   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5190   return true;
5191 }
5192 
5193 bool
5194 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5195   using namespace llvm::AMDGPU::Swizzle;
5196 
5197   SMLoc S = Parser.getTok().getLoc();
5198   int64_t GroupSize;
5199 
5200   if (!parseSwizzleOperands(1, &GroupSize,
5201       1, 16, "group size must be in the interval [1,16]")) {
5202     return false;
5203   }
5204   if (!isPowerOf2_64(GroupSize)) {
5205     Error(S, "group size must be a power of two");
5206     return false;
5207   }
5208 
5209   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5210   return true;
5211 }
5212 
5213 bool
5214 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5215   using namespace llvm::AMDGPU::Swizzle;
5216 
5217   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5218     return false;
5219   }
5220 
5221   StringRef Ctl;
5222   SMLoc StrLoc = Parser.getTok().getLoc();
5223   if (!parseString(Ctl)) {
5224     return false;
5225   }
5226   if (Ctl.size() != BITMASK_WIDTH) {
5227     Error(StrLoc, "expected a 5-character mask");
5228     return false;
5229   }
5230 
5231   unsigned AndMask = 0;
5232   unsigned OrMask = 0;
5233   unsigned XorMask = 0;
5234 
5235   for (size_t i = 0; i < Ctl.size(); ++i) {
5236     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5237     switch(Ctl[i]) {
5238     default:
5239       Error(StrLoc, "invalid mask");
5240       return false;
5241     case '0':
5242       break;
5243     case '1':
5244       OrMask |= Mask;
5245       break;
5246     case 'p':
5247       AndMask |= Mask;
5248       break;
5249     case 'i':
5250       AndMask |= Mask;
5251       XorMask |= Mask;
5252       break;
5253     }
5254   }
5255 
5256   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5257   return true;
5258 }
5259 
5260 bool
5261 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5262 
5263   SMLoc OffsetLoc = Parser.getTok().getLoc();
5264 
5265   if (!parseExpr(Imm)) {
5266     return false;
5267   }
5268   if (!isUInt<16>(Imm)) {
5269     Error(OffsetLoc, "expected a 16-bit offset");
5270     return false;
5271   }
5272   return true;
5273 }
5274 
5275 bool
5276 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5277   using namespace llvm::AMDGPU::Swizzle;
5278 
5279   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5280 
5281     SMLoc ModeLoc = Parser.getTok().getLoc();
5282     bool Ok = false;
5283 
5284     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5285       Ok = parseSwizzleQuadPerm(Imm);
5286     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5287       Ok = parseSwizzleBitmaskPerm(Imm);
5288     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5289       Ok = parseSwizzleBroadcast(Imm);
5290     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5291       Ok = parseSwizzleSwap(Imm);
5292     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5293       Ok = parseSwizzleReverse(Imm);
5294     } else {
5295       Error(ModeLoc, "expected a swizzle mode");
5296     }
5297 
5298     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5299   }
5300 
5301   return false;
5302 }
5303 
5304 OperandMatchResultTy
5305 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5306   SMLoc S = Parser.getTok().getLoc();
5307   int64_t Imm = 0;
5308 
5309   if (trySkipId("offset")) {
5310 
5311     bool Ok = false;
5312     if (skipToken(AsmToken::Colon, "expected a colon")) {
5313       if (trySkipId("swizzle")) {
5314         Ok = parseSwizzleMacro(Imm);
5315       } else {
5316         Ok = parseSwizzleOffset(Imm);
5317       }
5318     }
5319 
5320     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5321 
5322     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5323   } else {
5324     // Swizzle "offset" operand is optional.
5325     // If it is omitted, try parsing other optional operands.
5326     return parseOptionalOpr(Operands);
5327   }
5328 }
5329 
5330 bool
5331 AMDGPUOperand::isSwizzle() const {
5332   return isImmTy(ImmTySwizzle);
5333 }
5334 
5335 //===----------------------------------------------------------------------===//
5336 // VGPR Index Mode
5337 //===----------------------------------------------------------------------===//
5338 
5339 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5340 
5341   using namespace llvm::AMDGPU::VGPRIndexMode;
5342 
5343   if (trySkipToken(AsmToken::RParen)) {
5344     return OFF;
5345   }
5346 
5347   int64_t Imm = 0;
5348 
5349   while (true) {
5350     unsigned Mode = 0;
5351     SMLoc S = Parser.getTok().getLoc();
5352 
5353     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5354       if (trySkipId(IdSymbolic[ModeId])) {
5355         Mode = 1 << ModeId;
5356         break;
5357       }
5358     }
5359 
5360     if (Mode == 0) {
5361       Error(S, (Imm == 0)?
5362                "expected a VGPR index mode or a closing parenthesis" :
5363                "expected a VGPR index mode");
5364       break;
5365     }
5366 
5367     if (Imm & Mode) {
5368       Error(S, "duplicate VGPR index mode");
5369       break;
5370     }
5371     Imm |= Mode;
5372 
5373     if (trySkipToken(AsmToken::RParen))
5374       break;
5375     if (!skipToken(AsmToken::Comma,
5376                    "expected a comma or a closing parenthesis"))
5377       break;
5378   }
5379 
5380   return Imm;
5381 }
5382 
5383 OperandMatchResultTy
5384 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5385 
5386   int64_t Imm = 0;
5387   SMLoc S = Parser.getTok().getLoc();
5388 
5389   if (getLexer().getKind() == AsmToken::Identifier &&
5390       Parser.getTok().getString() == "gpr_idx" &&
5391       getLexer().peekTok().is(AsmToken::LParen)) {
5392 
5393     Parser.Lex();
5394     Parser.Lex();
5395 
5396     // If parse failed, trigger an error but do not return error code
5397     // to avoid excessive error messages.
5398     Imm = parseGPRIdxMacro();
5399 
5400   } else {
5401     if (getParser().parseAbsoluteExpression(Imm))
5402       return MatchOperand_NoMatch;
5403     if (Imm < 0 || !isUInt<4>(Imm)) {
5404       Error(S, "invalid immediate: only 4-bit values are legal");
5405     }
5406   }
5407 
5408   Operands.push_back(
5409       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5410   return MatchOperand_Success;
5411 }
5412 
5413 bool AMDGPUOperand::isGPRIdxMode() const {
5414   return isImmTy(ImmTyGprIdxMode);
5415 }
5416 
5417 //===----------------------------------------------------------------------===//
5418 // sopp branch targets
5419 //===----------------------------------------------------------------------===//
5420 
5421 OperandMatchResultTy
5422 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5423   SMLoc S = Parser.getTok().getLoc();
5424 
5425   switch (getLexer().getKind()) {
5426     default: return MatchOperand_ParseFail;
5427     case AsmToken::Integer: {
5428       int64_t Imm;
5429       if (getParser().parseAbsoluteExpression(Imm))
5430         return MatchOperand_ParseFail;
5431       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
5432       return MatchOperand_Success;
5433     }
5434 
5435     case AsmToken::Identifier:
5436       Operands.push_back(AMDGPUOperand::CreateExpr(this,
5437           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
5438                                   Parser.getTok().getString()), getContext()), S));
5439       Parser.Lex();
5440       return MatchOperand_Success;
5441   }
5442 }
5443 
5444 //===----------------------------------------------------------------------===//
5445 // Boolean holding registers
5446 //===----------------------------------------------------------------------===//
5447 
5448 OperandMatchResultTy
5449 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5450   return parseReg(Operands);
5451 }
5452 
5453 //===----------------------------------------------------------------------===//
5454 // mubuf
5455 //===----------------------------------------------------------------------===//
5456 
5457 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5458   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5459 }
5460 
5461 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5462   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5463 }
5464 
5465 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5466   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5467 }
5468 
5469 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5470                                const OperandVector &Operands,
5471                                bool IsAtomic,
5472                                bool IsAtomicReturn,
5473                                bool IsLds) {
5474   bool IsLdsOpcode = IsLds;
5475   bool HasLdsModifier = false;
5476   OptionalImmIndexMap OptionalIdx;
5477   assert(IsAtomicReturn ? IsAtomic : true);
5478   unsigned FirstOperandIdx = 1;
5479 
5480   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5481     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5482 
5483     // Add the register arguments
5484     if (Op.isReg()) {
5485       Op.addRegOperands(Inst, 1);
5486       // Insert a tied src for atomic return dst.
5487       // This cannot be postponed as subsequent calls to
5488       // addImmOperands rely on correct number of MC operands.
5489       if (IsAtomicReturn && i == FirstOperandIdx)
5490         Op.addRegOperands(Inst, 1);
5491       continue;
5492     }
5493 
5494     // Handle the case where soffset is an immediate
5495     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5496       Op.addImmOperands(Inst, 1);
5497       continue;
5498     }
5499 
5500     HasLdsModifier |= Op.isLDS();
5501 
5502     // Handle tokens like 'offen' which are sometimes hard-coded into the
5503     // asm string.  There are no MCInst operands for these.
5504     if (Op.isToken()) {
5505       continue;
5506     }
5507     assert(Op.isImm());
5508 
5509     // Handle optional arguments
5510     OptionalIdx[Op.getImmTy()] = i;
5511   }
5512 
5513   // This is a workaround for an llvm quirk which may result in an
5514   // incorrect instruction selection. Lds and non-lds versions of
5515   // MUBUF instructions are identical except that lds versions
5516   // have mandatory 'lds' modifier. However this modifier follows
5517   // optional modifiers and llvm asm matcher regards this 'lds'
5518   // modifier as an optional one. As a result, an lds version
5519   // of opcode may be selected even if it has no 'lds' modifier.
5520   if (IsLdsOpcode && !HasLdsModifier) {
5521     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5522     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5523       Inst.setOpcode(NoLdsOpcode);
5524       IsLdsOpcode = false;
5525     }
5526   }
5527 
5528   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5529   if (!IsAtomic) { // glc is hard-coded.
5530     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5531   }
5532   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5533 
5534   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5535     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5536   }
5537 
5538   if (isGFX10())
5539     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5540 }
5541 
5542 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5543   OptionalImmIndexMap OptionalIdx;
5544 
5545   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5546     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5547 
5548     // Add the register arguments
5549     if (Op.isReg()) {
5550       Op.addRegOperands(Inst, 1);
5551       continue;
5552     }
5553 
5554     // Handle the case where soffset is an immediate
5555     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5556       Op.addImmOperands(Inst, 1);
5557       continue;
5558     }
5559 
5560     // Handle tokens like 'offen' which are sometimes hard-coded into the
5561     // asm string.  There are no MCInst operands for these.
5562     if (Op.isToken()) {
5563       continue;
5564     }
5565     assert(Op.isImm());
5566 
5567     // Handle optional arguments
5568     OptionalIdx[Op.getImmTy()] = i;
5569   }
5570 
5571   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5572                         AMDGPUOperand::ImmTyOffset);
5573   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5574   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5575   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5576   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5577 
5578   if (isGFX10())
5579     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5580 }
5581 
5582 //===----------------------------------------------------------------------===//
5583 // mimg
5584 //===----------------------------------------------------------------------===//
5585 
5586 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5587                               bool IsAtomic) {
5588   unsigned I = 1;
5589   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5590   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5591     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5592   }
5593 
5594   if (IsAtomic) {
5595     // Add src, same as dst
5596     assert(Desc.getNumDefs() == 1);
5597     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5598   }
5599 
5600   OptionalImmIndexMap OptionalIdx;
5601 
5602   for (unsigned E = Operands.size(); I != E; ++I) {
5603     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5604 
5605     // Add the register arguments
5606     if (Op.isReg()) {
5607       Op.addRegOperands(Inst, 1);
5608     } else if (Op.isImmModifier()) {
5609       OptionalIdx[Op.getImmTy()] = I;
5610     } else if (!Op.isToken()) {
5611       llvm_unreachable("unexpected operand type");
5612     }
5613   }
5614 
5615   bool IsGFX10 = isGFX10();
5616 
5617   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5618   if (IsGFX10)
5619     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5620   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5621   if (IsGFX10)
5622     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5623   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5624   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5625   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5626   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5627   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5628   if (!IsGFX10)
5629     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5630   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5631 }
5632 
5633 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5634   cvtMIMG(Inst, Operands, true);
5635 }
5636 
5637 //===----------------------------------------------------------------------===//
5638 // smrd
5639 //===----------------------------------------------------------------------===//
5640 
5641 bool AMDGPUOperand::isSMRDOffset8() const {
5642   return isImm() && isUInt<8>(getImm());
5643 }
5644 
5645 bool AMDGPUOperand::isSMRDOffset20() const {
5646   return isImm() && isUInt<20>(getImm());
5647 }
5648 
5649 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5650   // 32-bit literals are only supported on CI and we only want to use them
5651   // when the offset is > 8-bits.
5652   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5653 }
5654 
5655 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5656   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5657 }
5658 
5659 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5660   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5661 }
5662 
5663 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5664   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5665 }
5666 
5667 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
5668   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5669 }
5670 
5671 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
5672   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5673 }
5674 
5675 //===----------------------------------------------------------------------===//
5676 // vop3
5677 //===----------------------------------------------------------------------===//
5678 
5679 static bool ConvertOmodMul(int64_t &Mul) {
5680   if (Mul != 1 && Mul != 2 && Mul != 4)
5681     return false;
5682 
5683   Mul >>= 1;
5684   return true;
5685 }
5686 
5687 static bool ConvertOmodDiv(int64_t &Div) {
5688   if (Div == 1) {
5689     Div = 0;
5690     return true;
5691   }
5692 
5693   if (Div == 2) {
5694     Div = 3;
5695     return true;
5696   }
5697 
5698   return false;
5699 }
5700 
5701 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5702   if (BoundCtrl == 0) {
5703     BoundCtrl = 1;
5704     return true;
5705   }
5706 
5707   if (BoundCtrl == -1) {
5708     BoundCtrl = 0;
5709     return true;
5710   }
5711 
5712   return false;
5713 }
5714 
5715 // Note: the order in this table matches the order of operands in AsmString.
5716 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5717   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5718   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5719   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5720   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5721   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5722   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5723   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5724   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5725   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5726   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
5727   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5728   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5729   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5730   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5731   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5732   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5733   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5734   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5735   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5736   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5737   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5738   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5739   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5740   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5741   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5742   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
5743   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5744   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5745   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5746   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
5747   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5748   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5749   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5750   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5751   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5752   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5753   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5754   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5755   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5756   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
5757 };
5758 
5759 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5760   unsigned size = Operands.size();
5761   assert(size > 0);
5762 
5763   OperandMatchResultTy res = parseOptionalOpr(Operands);
5764 
5765   // This is a hack to enable hardcoded mandatory operands which follow
5766   // optional operands.
5767   //
5768   // Current design assumes that all operands after the first optional operand
5769   // are also optional. However implementation of some instructions violates
5770   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5771   //
5772   // To alleviate this problem, we have to (implicitly) parse extra operands
5773   // to make sure autogenerated parser of custom operands never hit hardcoded
5774   // mandatory operands.
5775 
5776   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5777 
5778     // We have parsed the first optional operand.
5779     // Parse as many operands as necessary to skip all mandatory operands.
5780 
5781     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5782       if (res != MatchOperand_Success ||
5783           getLexer().is(AsmToken::EndOfStatement)) break;
5784       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5785       res = parseOptionalOpr(Operands);
5786     }
5787   }
5788 
5789   return res;
5790 }
5791 
5792 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
5793   OperandMatchResultTy res;
5794   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
5795     // try to parse any optional operand here
5796     if (Op.IsBit) {
5797       res = parseNamedBit(Op.Name, Operands, Op.Type);
5798     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
5799       res = parseOModOperand(Operands);
5800     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
5801                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
5802                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
5803       res = parseSDWASel(Operands, Op.Name, Op.Type);
5804     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
5805       res = parseSDWADstUnused(Operands);
5806     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
5807                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
5808                Op.Type == AMDGPUOperand::ImmTyNegLo ||
5809                Op.Type == AMDGPUOperand::ImmTyNegHi) {
5810       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
5811                                         Op.ConvertResult);
5812     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
5813       res = parseDim(Operands);
5814     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
5815       res = parseDfmtNfmt(Operands);
5816     } else {
5817       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
5818     }
5819     if (res != MatchOperand_NoMatch) {
5820       return res;
5821     }
5822   }
5823   return MatchOperand_NoMatch;
5824 }
5825 
5826 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
5827   StringRef Name = Parser.getTok().getString();
5828   if (Name == "mul") {
5829     return parseIntWithPrefix("mul", Operands,
5830                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
5831   }
5832 
5833   if (Name == "div") {
5834     return parseIntWithPrefix("div", Operands,
5835                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
5836   }
5837 
5838   return MatchOperand_NoMatch;
5839 }
5840 
5841 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
5842   cvtVOP3P(Inst, Operands);
5843 
5844   int Opc = Inst.getOpcode();
5845 
5846   int SrcNum;
5847   const int Ops[] = { AMDGPU::OpName::src0,
5848                       AMDGPU::OpName::src1,
5849                       AMDGPU::OpName::src2 };
5850   for (SrcNum = 0;
5851        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
5852        ++SrcNum);
5853   assert(SrcNum > 0);
5854 
5855   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5856   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5857 
5858   if ((OpSel & (1 << SrcNum)) != 0) {
5859     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
5860     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
5861     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
5862   }
5863 }
5864 
5865 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
5866       // 1. This operand is input modifiers
5867   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
5868       // 2. This is not last operand
5869       && Desc.NumOperands > (OpNum + 1)
5870       // 3. Next operand is register class
5871       && Desc.OpInfo[OpNum + 1].RegClass != -1
5872       // 4. Next register is not tied to any other operand
5873       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
5874 }
5875 
5876 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
5877 {
5878   OptionalImmIndexMap OptionalIdx;
5879   unsigned Opc = Inst.getOpcode();
5880 
5881   unsigned I = 1;
5882   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5883   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5884     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5885   }
5886 
5887   for (unsigned E = Operands.size(); I != E; ++I) {
5888     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5889     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5890       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5891     } else if (Op.isInterpSlot() ||
5892                Op.isInterpAttr() ||
5893                Op.isAttrChan()) {
5894       Inst.addOperand(MCOperand::createImm(Op.getImm()));
5895     } else if (Op.isImmModifier()) {
5896       OptionalIdx[Op.getImmTy()] = I;
5897     } else {
5898       llvm_unreachable("unhandled operand type");
5899     }
5900   }
5901 
5902   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
5903     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
5904   }
5905 
5906   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5907     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5908   }
5909 
5910   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5911     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5912   }
5913 }
5914 
5915 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
5916                               OptionalImmIndexMap &OptionalIdx) {
5917   unsigned Opc = Inst.getOpcode();
5918 
5919   unsigned I = 1;
5920   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5921   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5922     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5923   }
5924 
5925   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
5926     // This instruction has src modifiers
5927     for (unsigned E = Operands.size(); I != E; ++I) {
5928       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5929       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5930         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5931       } else if (Op.isImmModifier()) {
5932         OptionalIdx[Op.getImmTy()] = I;
5933       } else if (Op.isRegOrImm()) {
5934         Op.addRegOrImmOperands(Inst, 1);
5935       } else {
5936         llvm_unreachable("unhandled operand type");
5937       }
5938     }
5939   } else {
5940     // No src modifiers
5941     for (unsigned E = Operands.size(); I != E; ++I) {
5942       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5943       if (Op.isMod()) {
5944         OptionalIdx[Op.getImmTy()] = I;
5945       } else {
5946         Op.addRegOrImmOperands(Inst, 1);
5947       }
5948     }
5949   }
5950 
5951   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5952     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5953   }
5954 
5955   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5956     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5957   }
5958 
5959   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
5960   // it has src2 register operand that is tied to dst operand
5961   // we don't allow modifiers for this operand in assembler so src2_modifiers
5962   // should be 0.
5963   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
5964       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
5965       Opc == AMDGPU::V_MAC_F32_e64_vi ||
5966       Opc == AMDGPU::V_MAC_F16_e64_vi ||
5967       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
5968       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
5969       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
5970     auto it = Inst.begin();
5971     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5972     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5973     ++it;
5974     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5975   }
5976 }
5977 
5978 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5979   OptionalImmIndexMap OptionalIdx;
5980   cvtVOP3(Inst, Operands, OptionalIdx);
5981 }
5982 
5983 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5984                                const OperandVector &Operands) {
5985   OptionalImmIndexMap OptIdx;
5986   const int Opc = Inst.getOpcode();
5987   const MCInstrDesc &Desc = MII.get(Opc);
5988 
5989   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5990 
5991   cvtVOP3(Inst, Operands, OptIdx);
5992 
5993   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5994     assert(!IsPacked);
5995     Inst.addOperand(Inst.getOperand(0));
5996   }
5997 
5998   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5999   // instruction, and then figure out where to actually put the modifiers
6000 
6001   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6002 
6003   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6004   if (OpSelHiIdx != -1) {
6005     int DefaultVal = IsPacked ? -1 : 0;
6006     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6007                           DefaultVal);
6008   }
6009 
6010   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6011   if (NegLoIdx != -1) {
6012     assert(IsPacked);
6013     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6014     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6015   }
6016 
6017   const int Ops[] = { AMDGPU::OpName::src0,
6018                       AMDGPU::OpName::src1,
6019                       AMDGPU::OpName::src2 };
6020   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6021                          AMDGPU::OpName::src1_modifiers,
6022                          AMDGPU::OpName::src2_modifiers };
6023 
6024   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6025 
6026   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6027   unsigned OpSelHi = 0;
6028   unsigned NegLo = 0;
6029   unsigned NegHi = 0;
6030 
6031   if (OpSelHiIdx != -1) {
6032     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6033   }
6034 
6035   if (NegLoIdx != -1) {
6036     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6037     NegLo = Inst.getOperand(NegLoIdx).getImm();
6038     NegHi = Inst.getOperand(NegHiIdx).getImm();
6039   }
6040 
6041   for (int J = 0; J < 3; ++J) {
6042     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6043     if (OpIdx == -1)
6044       break;
6045 
6046     uint32_t ModVal = 0;
6047 
6048     if ((OpSel & (1 << J)) != 0)
6049       ModVal |= SISrcMods::OP_SEL_0;
6050 
6051     if ((OpSelHi & (1 << J)) != 0)
6052       ModVal |= SISrcMods::OP_SEL_1;
6053 
6054     if ((NegLo & (1 << J)) != 0)
6055       ModVal |= SISrcMods::NEG;
6056 
6057     if ((NegHi & (1 << J)) != 0)
6058       ModVal |= SISrcMods::NEG_HI;
6059 
6060     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6061 
6062     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6063   }
6064 }
6065 
6066 //===----------------------------------------------------------------------===//
6067 // dpp
6068 //===----------------------------------------------------------------------===//
6069 
6070 bool AMDGPUOperand::isDPP8() const {
6071   return isImmTy(ImmTyDPP8);
6072 }
6073 
6074 bool AMDGPUOperand::isDPPCtrl() const {
6075   using namespace AMDGPU::DPP;
6076 
6077   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6078   if (result) {
6079     int64_t Imm = getImm();
6080     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6081            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6082            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6083            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6084            (Imm == DppCtrl::WAVE_SHL1) ||
6085            (Imm == DppCtrl::WAVE_ROL1) ||
6086            (Imm == DppCtrl::WAVE_SHR1) ||
6087            (Imm == DppCtrl::WAVE_ROR1) ||
6088            (Imm == DppCtrl::ROW_MIRROR) ||
6089            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6090            (Imm == DppCtrl::BCAST15) ||
6091            (Imm == DppCtrl::BCAST31) ||
6092            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6093            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6094   }
6095   return false;
6096 }
6097 
6098 bool AMDGPUOperand::isS16Imm() const {
6099   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6100 }
6101 
6102 bool AMDGPUOperand::isU16Imm() const {
6103   return isImm() && isUInt<16>(getImm());
6104 }
6105 
6106 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6107   if (!isGFX10())
6108     return MatchOperand_NoMatch;
6109 
6110   SMLoc S = Parser.getTok().getLoc();
6111 
6112   if (getLexer().isNot(AsmToken::Identifier))
6113     return MatchOperand_NoMatch;
6114   if (getLexer().getTok().getString() != "dim")
6115     return MatchOperand_NoMatch;
6116 
6117   Parser.Lex();
6118   if (getLexer().isNot(AsmToken::Colon))
6119     return MatchOperand_ParseFail;
6120 
6121   Parser.Lex();
6122 
6123   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6124   // integer.
6125   std::string Token;
6126   if (getLexer().is(AsmToken::Integer)) {
6127     SMLoc Loc = getLexer().getTok().getEndLoc();
6128     Token = getLexer().getTok().getString();
6129     Parser.Lex();
6130     if (getLexer().getTok().getLoc() != Loc)
6131       return MatchOperand_ParseFail;
6132   }
6133   if (getLexer().isNot(AsmToken::Identifier))
6134     return MatchOperand_ParseFail;
6135   Token += getLexer().getTok().getString();
6136 
6137   StringRef DimId = Token;
6138   if (DimId.startswith("SQ_RSRC_IMG_"))
6139     DimId = DimId.substr(12);
6140 
6141   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6142   if (!DimInfo)
6143     return MatchOperand_ParseFail;
6144 
6145   Parser.Lex();
6146 
6147   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6148                                               AMDGPUOperand::ImmTyDim));
6149   return MatchOperand_Success;
6150 }
6151 
6152 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6153   SMLoc S = Parser.getTok().getLoc();
6154   StringRef Prefix;
6155 
6156   if (getLexer().getKind() == AsmToken::Identifier) {
6157     Prefix = Parser.getTok().getString();
6158   } else {
6159     return MatchOperand_NoMatch;
6160   }
6161 
6162   if (Prefix != "dpp8")
6163     return parseDPPCtrl(Operands);
6164   if (!isGFX10())
6165     return MatchOperand_NoMatch;
6166 
6167   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6168 
6169   int64_t Sels[8];
6170 
6171   Parser.Lex();
6172   if (getLexer().isNot(AsmToken::Colon))
6173     return MatchOperand_ParseFail;
6174 
6175   Parser.Lex();
6176   if (getLexer().isNot(AsmToken::LBrac))
6177     return MatchOperand_ParseFail;
6178 
6179   Parser.Lex();
6180   if (getParser().parseAbsoluteExpression(Sels[0]))
6181     return MatchOperand_ParseFail;
6182   if (0 > Sels[0] || 7 < Sels[0])
6183     return MatchOperand_ParseFail;
6184 
6185   for (size_t i = 1; i < 8; ++i) {
6186     if (getLexer().isNot(AsmToken::Comma))
6187       return MatchOperand_ParseFail;
6188 
6189     Parser.Lex();
6190     if (getParser().parseAbsoluteExpression(Sels[i]))
6191       return MatchOperand_ParseFail;
6192     if (0 > Sels[i] || 7 < Sels[i])
6193       return MatchOperand_ParseFail;
6194   }
6195 
6196   if (getLexer().isNot(AsmToken::RBrac))
6197     return MatchOperand_ParseFail;
6198   Parser.Lex();
6199 
6200   unsigned DPP8 = 0;
6201   for (size_t i = 0; i < 8; ++i)
6202     DPP8 |= (Sels[i] << (i * 3));
6203 
6204   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6205   return MatchOperand_Success;
6206 }
6207 
6208 OperandMatchResultTy
6209 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6210   using namespace AMDGPU::DPP;
6211 
6212   SMLoc S = Parser.getTok().getLoc();
6213   StringRef Prefix;
6214   int64_t Int;
6215 
6216   if (getLexer().getKind() == AsmToken::Identifier) {
6217     Prefix = Parser.getTok().getString();
6218   } else {
6219     return MatchOperand_NoMatch;
6220   }
6221 
6222   if (Prefix == "row_mirror") {
6223     Int = DppCtrl::ROW_MIRROR;
6224     Parser.Lex();
6225   } else if (Prefix == "row_half_mirror") {
6226     Int = DppCtrl::ROW_HALF_MIRROR;
6227     Parser.Lex();
6228   } else {
6229     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6230     if (Prefix != "quad_perm"
6231         && Prefix != "row_shl"
6232         && Prefix != "row_shr"
6233         && Prefix != "row_ror"
6234         && Prefix != "wave_shl"
6235         && Prefix != "wave_rol"
6236         && Prefix != "wave_shr"
6237         && Prefix != "wave_ror"
6238         && Prefix != "row_bcast"
6239         && Prefix != "row_share"
6240         && Prefix != "row_xmask") {
6241       return MatchOperand_NoMatch;
6242     }
6243 
6244     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6245       return MatchOperand_NoMatch;
6246 
6247     if (!isVI() && !isGFX9() &&
6248         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6249          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6250          Prefix == "row_bcast"))
6251       return MatchOperand_NoMatch;
6252 
6253     Parser.Lex();
6254     if (getLexer().isNot(AsmToken::Colon))
6255       return MatchOperand_ParseFail;
6256 
6257     if (Prefix == "quad_perm") {
6258       // quad_perm:[%d,%d,%d,%d]
6259       Parser.Lex();
6260       if (getLexer().isNot(AsmToken::LBrac))
6261         return MatchOperand_ParseFail;
6262       Parser.Lex();
6263 
6264       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6265         return MatchOperand_ParseFail;
6266 
6267       for (int i = 0; i < 3; ++i) {
6268         if (getLexer().isNot(AsmToken::Comma))
6269           return MatchOperand_ParseFail;
6270         Parser.Lex();
6271 
6272         int64_t Temp;
6273         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6274           return MatchOperand_ParseFail;
6275         const int shift = i*2 + 2;
6276         Int += (Temp << shift);
6277       }
6278 
6279       if (getLexer().isNot(AsmToken::RBrac))
6280         return MatchOperand_ParseFail;
6281       Parser.Lex();
6282     } else {
6283       // sel:%d
6284       Parser.Lex();
6285       if (getParser().parseAbsoluteExpression(Int))
6286         return MatchOperand_ParseFail;
6287 
6288       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6289         Int |= DppCtrl::ROW_SHL0;
6290       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6291         Int |= DppCtrl::ROW_SHR0;
6292       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6293         Int |= DppCtrl::ROW_ROR0;
6294       } else if (Prefix == "wave_shl" && 1 == Int) {
6295         Int = DppCtrl::WAVE_SHL1;
6296       } else if (Prefix == "wave_rol" && 1 == Int) {
6297         Int = DppCtrl::WAVE_ROL1;
6298       } else if (Prefix == "wave_shr" && 1 == Int) {
6299         Int = DppCtrl::WAVE_SHR1;
6300       } else if (Prefix == "wave_ror" && 1 == Int) {
6301         Int = DppCtrl::WAVE_ROR1;
6302       } else if (Prefix == "row_bcast") {
6303         if (Int == 15) {
6304           Int = DppCtrl::BCAST15;
6305         } else if (Int == 31) {
6306           Int = DppCtrl::BCAST31;
6307         } else {
6308           return MatchOperand_ParseFail;
6309         }
6310       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6311         Int |= DppCtrl::ROW_SHARE_FIRST;
6312       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6313         Int |= DppCtrl::ROW_XMASK_FIRST;
6314       } else {
6315         return MatchOperand_ParseFail;
6316       }
6317     }
6318   }
6319 
6320   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6321   return MatchOperand_Success;
6322 }
6323 
6324 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6325   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6326 }
6327 
6328 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6329   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6330 }
6331 
6332 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6333   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6334 }
6335 
6336 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6337   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6338 }
6339 
6340 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6341   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6342 }
6343 
6344 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6345   OptionalImmIndexMap OptionalIdx;
6346 
6347   unsigned I = 1;
6348   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6349   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6350     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6351   }
6352 
6353   int Fi = 0;
6354   for (unsigned E = Operands.size(); I != E; ++I) {
6355     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6356                                             MCOI::TIED_TO);
6357     if (TiedTo != -1) {
6358       assert((unsigned)TiedTo < Inst.getNumOperands());
6359       // handle tied old or src2 for MAC instructions
6360       Inst.addOperand(Inst.getOperand(TiedTo));
6361     }
6362     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6363     // Add the register arguments
6364     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6365       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6366       // Skip it.
6367       continue;
6368     }
6369 
6370     if (IsDPP8) {
6371       if (Op.isDPP8()) {
6372         Op.addImmOperands(Inst, 1);
6373       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6374         Op.addRegWithFPInputModsOperands(Inst, 2);
6375       } else if (Op.isFI()) {
6376         Fi = Op.getImm();
6377       } else if (Op.isReg()) {
6378         Op.addRegOperands(Inst, 1);
6379       } else {
6380         llvm_unreachable("Invalid operand type");
6381       }
6382     } else {
6383       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6384         Op.addRegWithFPInputModsOperands(Inst, 2);
6385       } else if (Op.isDPPCtrl()) {
6386         Op.addImmOperands(Inst, 1);
6387       } else if (Op.isImm()) {
6388         // Handle optional arguments
6389         OptionalIdx[Op.getImmTy()] = I;
6390       } else {
6391         llvm_unreachable("Invalid operand type");
6392       }
6393     }
6394   }
6395 
6396   if (IsDPP8) {
6397     using namespace llvm::AMDGPU::DPP;
6398     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6399   } else {
6400     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6401     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6402     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6403     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6404       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6405     }
6406   }
6407 }
6408 
6409 //===----------------------------------------------------------------------===//
6410 // sdwa
6411 //===----------------------------------------------------------------------===//
6412 
6413 OperandMatchResultTy
6414 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6415                               AMDGPUOperand::ImmTy Type) {
6416   using namespace llvm::AMDGPU::SDWA;
6417 
6418   SMLoc S = Parser.getTok().getLoc();
6419   StringRef Value;
6420   OperandMatchResultTy res;
6421 
6422   res = parseStringWithPrefix(Prefix, Value);
6423   if (res != MatchOperand_Success) {
6424     return res;
6425   }
6426 
6427   int64_t Int;
6428   Int = StringSwitch<int64_t>(Value)
6429         .Case("BYTE_0", SdwaSel::BYTE_0)
6430         .Case("BYTE_1", SdwaSel::BYTE_1)
6431         .Case("BYTE_2", SdwaSel::BYTE_2)
6432         .Case("BYTE_3", SdwaSel::BYTE_3)
6433         .Case("WORD_0", SdwaSel::WORD_0)
6434         .Case("WORD_1", SdwaSel::WORD_1)
6435         .Case("DWORD", SdwaSel::DWORD)
6436         .Default(0xffffffff);
6437   Parser.Lex(); // eat last token
6438 
6439   if (Int == 0xffffffff) {
6440     return MatchOperand_ParseFail;
6441   }
6442 
6443   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6444   return MatchOperand_Success;
6445 }
6446 
6447 OperandMatchResultTy
6448 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6449   using namespace llvm::AMDGPU::SDWA;
6450 
6451   SMLoc S = Parser.getTok().getLoc();
6452   StringRef Value;
6453   OperandMatchResultTy res;
6454 
6455   res = parseStringWithPrefix("dst_unused", Value);
6456   if (res != MatchOperand_Success) {
6457     return res;
6458   }
6459 
6460   int64_t Int;
6461   Int = StringSwitch<int64_t>(Value)
6462         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6463         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6464         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6465         .Default(0xffffffff);
6466   Parser.Lex(); // eat last token
6467 
6468   if (Int == 0xffffffff) {
6469     return MatchOperand_ParseFail;
6470   }
6471 
6472   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6473   return MatchOperand_Success;
6474 }
6475 
6476 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6477   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6478 }
6479 
6480 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6481   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6482 }
6483 
6484 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6485   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6486 }
6487 
6488 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6489   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6490 }
6491 
6492 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6493                               uint64_t BasicInstType, bool skipVcc) {
6494   using namespace llvm::AMDGPU::SDWA;
6495 
6496   OptionalImmIndexMap OptionalIdx;
6497   bool skippedVcc = false;
6498 
6499   unsigned I = 1;
6500   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6501   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6502     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6503   }
6504 
6505   for (unsigned E = Operands.size(); I != E; ++I) {
6506     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6507     if (skipVcc && !skippedVcc && Op.isReg() &&
6508         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6509       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6510       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6511       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6512       // Skip VCC only if we didn't skip it on previous iteration.
6513       if (BasicInstType == SIInstrFlags::VOP2 &&
6514           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6515         skippedVcc = true;
6516         continue;
6517       } else if (BasicInstType == SIInstrFlags::VOPC &&
6518                  Inst.getNumOperands() == 0) {
6519         skippedVcc = true;
6520         continue;
6521       }
6522     }
6523     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6524       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6525     } else if (Op.isImm()) {
6526       // Handle optional arguments
6527       OptionalIdx[Op.getImmTy()] = I;
6528     } else {
6529       llvm_unreachable("Invalid operand type");
6530     }
6531     skippedVcc = false;
6532   }
6533 
6534   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6535       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6536       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6537     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6538     switch (BasicInstType) {
6539     case SIInstrFlags::VOP1:
6540       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6541       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6542         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6543       }
6544       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6545       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6546       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6547       break;
6548 
6549     case SIInstrFlags::VOP2:
6550       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6551       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6552         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6553       }
6554       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6555       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6556       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6557       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6558       break;
6559 
6560     case SIInstrFlags::VOPC:
6561       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6562         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6563       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6564       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6565       break;
6566 
6567     default:
6568       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6569     }
6570   }
6571 
6572   // special case v_mac_{f16, f32}:
6573   // it has src2 register operand that is tied to dst operand
6574   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6575       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6576     auto it = Inst.begin();
6577     std::advance(
6578       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6579     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6580   }
6581 }
6582 
6583 /// Force static initialization.
6584 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6585   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6586   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6587 }
6588 
6589 #define GET_REGISTER_MATCHER
6590 #define GET_MATCHER_IMPLEMENTATION
6591 #define GET_MNEMONIC_SPELL_CHECKER
6592 #include "AMDGPUGenAsmMatcher.inc"
6593 
6594 // This fuction should be defined after auto-generated include so that we have
6595 // MatchClassKind enum defined
6596 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6597                                                      unsigned Kind) {
6598   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6599   // But MatchInstructionImpl() expects to meet token and fails to validate
6600   // operand. This method checks if we are given immediate operand but expect to
6601   // get corresponding token.
6602   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6603   switch (Kind) {
6604   case MCK_addr64:
6605     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6606   case MCK_gds:
6607     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6608   case MCK_lds:
6609     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6610   case MCK_glc:
6611     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6612   case MCK_idxen:
6613     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6614   case MCK_offen:
6615     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6616   case MCK_SSrcB32:
6617     // When operands have expression values, they will return true for isToken,
6618     // because it is not possible to distinguish between a token and an
6619     // expression at parse time. MatchInstructionImpl() will always try to
6620     // match an operand as a token, when isToken returns true, and when the
6621     // name of the expression is not a valid token, the match will fail,
6622     // so we need to handle it here.
6623     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6624   case MCK_SSrcF32:
6625     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6626   case MCK_SoppBrTarget:
6627     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6628   case MCK_VReg32OrOff:
6629     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6630   case MCK_InterpSlot:
6631     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6632   case MCK_Attr:
6633     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6634   case MCK_AttrChan:
6635     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6636   default:
6637     return Match_InvalidOperand;
6638   }
6639 }
6640 
6641 //===----------------------------------------------------------------------===//
6642 // endpgm
6643 //===----------------------------------------------------------------------===//
6644 
6645 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6646   SMLoc S = Parser.getTok().getLoc();
6647   int64_t Imm = 0;
6648 
6649   if (!parseExpr(Imm)) {
6650     // The operand is optional, if not present default to 0
6651     Imm = 0;
6652   }
6653 
6654   if (!isUInt<16>(Imm)) {
6655     Error(S, "expected a 16-bit value");
6656     return MatchOperand_ParseFail;
6657   }
6658 
6659   Operands.push_back(
6660       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6661   return MatchOperand_Success;
6662 }
6663 
6664 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
6665