1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyA16,
167     ImmTyLWE,
168     ImmTyExpTgt,
169     ImmTyExpCompr,
170     ImmTyExpVM,
171     ImmTyFORMAT,
172     ImmTyHwreg,
173     ImmTyOff,
174     ImmTySendMsg,
175     ImmTyInterpSlot,
176     ImmTyInterpAttr,
177     ImmTyAttrChan,
178     ImmTyOpSel,
179     ImmTyOpSelHi,
180     ImmTyNegLo,
181     ImmTyNegHi,
182     ImmTySwizzle,
183     ImmTyGprIdxMode,
184     ImmTyHigh,
185     ImmTyBLGP,
186     ImmTyCBSZ,
187     ImmTyABID,
188     ImmTyEndpgm,
189   };
190 
191 private:
192   struct TokOp {
193     const char *Data;
194     unsigned Length;
195   };
196 
197   struct ImmOp {
198     int64_t Val;
199     ImmTy Type;
200     bool IsFPImm;
201     Modifiers Mods;
202   };
203 
204   struct RegOp {
205     unsigned RegNo;
206     Modifiers Mods;
207   };
208 
209   union {
210     TokOp Tok;
211     ImmOp Imm;
212     RegOp Reg;
213     const MCExpr *Expr;
214   };
215 
216 public:
217   bool isToken() const override {
218     if (Kind == Token)
219       return true;
220 
221     // When parsing operands, we can't always tell if something was meant to be
222     // a token, like 'gds', or an expression that references a global variable.
223     // In this case, we assume the string is an expression, and if we need to
224     // interpret is a token, then we treat the symbol name as the token.
225     return isSymbolRefExpr();
226   }
227 
228   bool isSymbolRefExpr() const {
229     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230   }
231 
232   bool isImm() const override {
233     return Kind == Immediate;
234   }
235 
236   bool isInlinableImm(MVT type) const;
237   bool isLiteralImm(MVT type) const;
238 
239   bool isRegKind() const {
240     return Kind == Register;
241   }
242 
243   bool isReg() const override {
244     return isRegKind() && !hasModifiers();
245   }
246 
247   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
248     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
249   }
250 
251   bool isRegOrImmWithInt16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
253   }
254 
255   bool isRegOrImmWithInt32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
257   }
258 
259   bool isRegOrImmWithInt64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
261   }
262 
263   bool isRegOrImmWithFP16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
265   }
266 
267   bool isRegOrImmWithFP32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
269   }
270 
271   bool isRegOrImmWithFP64InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
273   }
274 
275   bool isVReg() const {
276     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
277            isRegClass(AMDGPU::VReg_64RegClassID) ||
278            isRegClass(AMDGPU::VReg_96RegClassID) ||
279            isRegClass(AMDGPU::VReg_128RegClassID) ||
280            isRegClass(AMDGPU::VReg_160RegClassID) ||
281            isRegClass(AMDGPU::VReg_192RegClassID) ||
282            isRegClass(AMDGPU::VReg_256RegClassID) ||
283            isRegClass(AMDGPU::VReg_512RegClassID) ||
284            isRegClass(AMDGPU::VReg_1024RegClassID);
285   }
286 
287   bool isVReg32() const {
288     return isRegClass(AMDGPU::VGPR_32RegClassID);
289   }
290 
291   bool isVReg32OrOff() const {
292     return isOff() || isVReg32();
293   }
294 
295   bool isNull() const {
296     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
297   }
298 
299   bool isSDWAOperand(MVT type) const;
300   bool isSDWAFP16Operand() const;
301   bool isSDWAFP32Operand() const;
302   bool isSDWAInt16Operand() const;
303   bool isSDWAInt32Operand() const;
304 
305   bool isImmTy(ImmTy ImmT) const {
306     return isImm() && Imm.Type == ImmT;
307   }
308 
309   bool isImmModifier() const {
310     return isImm() && Imm.Type != ImmTyNone;
311   }
312 
313   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
314   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
315   bool isDMask() const { return isImmTy(ImmTyDMask); }
316   bool isDim() const { return isImmTy(ImmTyDim); }
317   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
318   bool isDA() const { return isImmTy(ImmTyDA); }
319   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
320   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
321   bool isLWE() const { return isImmTy(ImmTyLWE); }
322   bool isOff() const { return isImmTy(ImmTyOff); }
323   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
324   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
325   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
326   bool isOffen() const { return isImmTy(ImmTyOffen); }
327   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
328   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
329   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
330   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
331   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
332 
333   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
334   bool isGDS() const { return isImmTy(ImmTyGDS); }
335   bool isLDS() const { return isImmTy(ImmTyLDS); }
336   bool isDLC() const { return isImmTy(ImmTyDLC); }
337   bool isGLC() const { return isImmTy(ImmTyGLC); }
338   bool isSLC() const { return isImmTy(ImmTySLC); }
339   bool isSWZ() const { return isImmTy(ImmTySWZ); }
340   bool isTFE() const { return isImmTy(ImmTyTFE); }
341   bool isD16() const { return isImmTy(ImmTyD16); }
342   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
343   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
344   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
345   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
346   bool isFI() const { return isImmTy(ImmTyDppFi); }
347   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
348   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
349   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
350   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
351   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
352   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
353   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
354   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
355   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
356   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
357   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
358   bool isHigh() const { return isImmTy(ImmTyHigh); }
359 
360   bool isMod() const {
361     return isClampSI() || isOModSI();
362   }
363 
364   bool isRegOrImm() const {
365     return isReg() || isImm();
366   }
367 
368   bool isRegClass(unsigned RCID) const;
369 
370   bool isInlineValue() const;
371 
372   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
373     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
374   }
375 
376   bool isSCSrcB16() const {
377     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
378   }
379 
380   bool isSCSrcV2B16() const {
381     return isSCSrcB16();
382   }
383 
384   bool isSCSrcB32() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
386   }
387 
388   bool isSCSrcB64() const {
389     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
390   }
391 
392   bool isBoolReg() const;
393 
394   bool isSCSrcF16() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
396   }
397 
398   bool isSCSrcV2F16() const {
399     return isSCSrcF16();
400   }
401 
402   bool isSCSrcF32() const {
403     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
404   }
405 
406   bool isSCSrcF64() const {
407     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
408   }
409 
410   bool isSSrcB32() const {
411     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
412   }
413 
414   bool isSSrcB16() const {
415     return isSCSrcB16() || isLiteralImm(MVT::i16);
416   }
417 
418   bool isSSrcV2B16() const {
419     llvm_unreachable("cannot happen");
420     return isSSrcB16();
421   }
422 
423   bool isSSrcB64() const {
424     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
425     // See isVSrc64().
426     return isSCSrcB64() || isLiteralImm(MVT::i64);
427   }
428 
429   bool isSSrcF32() const {
430     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
431   }
432 
433   bool isSSrcF64() const {
434     return isSCSrcB64() || isLiteralImm(MVT::f64);
435   }
436 
437   bool isSSrcF16() const {
438     return isSCSrcB16() || isLiteralImm(MVT::f16);
439   }
440 
441   bool isSSrcV2F16() const {
442     llvm_unreachable("cannot happen");
443     return isSSrcF16();
444   }
445 
446   bool isSSrcOrLdsB32() const {
447     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
448            isLiteralImm(MVT::i32) || isExpr();
449   }
450 
451   bool isVCSrcB32() const {
452     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
453   }
454 
455   bool isVCSrcB64() const {
456     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
457   }
458 
459   bool isVCSrcB16() const {
460     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
461   }
462 
463   bool isVCSrcV2B16() const {
464     return isVCSrcB16();
465   }
466 
467   bool isVCSrcF32() const {
468     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
469   }
470 
471   bool isVCSrcF64() const {
472     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
473   }
474 
475   bool isVCSrcF16() const {
476     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
477   }
478 
479   bool isVCSrcV2F16() const {
480     return isVCSrcF16();
481   }
482 
483   bool isVSrcB32() const {
484     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
485   }
486 
487   bool isVSrcB64() const {
488     return isVCSrcF64() || isLiteralImm(MVT::i64);
489   }
490 
491   bool isVSrcB16() const {
492     return isVCSrcB16() || isLiteralImm(MVT::i16);
493   }
494 
495   bool isVSrcV2B16() const {
496     return isVSrcB16() || isLiteralImm(MVT::v2i16);
497   }
498 
499   bool isVSrcF32() const {
500     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
501   }
502 
503   bool isVSrcF64() const {
504     return isVCSrcF64() || isLiteralImm(MVT::f64);
505   }
506 
507   bool isVSrcF16() const {
508     return isVCSrcF16() || isLiteralImm(MVT::f16);
509   }
510 
511   bool isVSrcV2F16() const {
512     return isVSrcF16() || isLiteralImm(MVT::v2f16);
513   }
514 
515   bool isVISrcB32() const {
516     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
517   }
518 
519   bool isVISrcB16() const {
520     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
521   }
522 
523   bool isVISrcV2B16() const {
524     return isVISrcB16();
525   }
526 
527   bool isVISrcF32() const {
528     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
529   }
530 
531   bool isVISrcF16() const {
532     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
533   }
534 
535   bool isVISrcV2F16() const {
536     return isVISrcF16() || isVISrcB32();
537   }
538 
539   bool isAISrcB32() const {
540     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
541   }
542 
543   bool isAISrcB16() const {
544     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
545   }
546 
547   bool isAISrcV2B16() const {
548     return isAISrcB16();
549   }
550 
551   bool isAISrcF32() const {
552     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
553   }
554 
555   bool isAISrcF16() const {
556     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
557   }
558 
559   bool isAISrcV2F16() const {
560     return isAISrcF16() || isAISrcB32();
561   }
562 
563   bool isAISrc_128B32() const {
564     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
565   }
566 
567   bool isAISrc_128B16() const {
568     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
569   }
570 
571   bool isAISrc_128V2B16() const {
572     return isAISrc_128B16();
573   }
574 
575   bool isAISrc_128F32() const {
576     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
577   }
578 
579   bool isAISrc_128F16() const {
580     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
581   }
582 
583   bool isAISrc_128V2F16() const {
584     return isAISrc_128F16() || isAISrc_128B32();
585   }
586 
587   bool isAISrc_512B32() const {
588     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
589   }
590 
591   bool isAISrc_512B16() const {
592     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
593   }
594 
595   bool isAISrc_512V2B16() const {
596     return isAISrc_512B16();
597   }
598 
599   bool isAISrc_512F32() const {
600     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
601   }
602 
603   bool isAISrc_512F16() const {
604     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
605   }
606 
607   bool isAISrc_512V2F16() const {
608     return isAISrc_512F16() || isAISrc_512B32();
609   }
610 
611   bool isAISrc_1024B32() const {
612     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
613   }
614 
615   bool isAISrc_1024B16() const {
616     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
617   }
618 
619   bool isAISrc_1024V2B16() const {
620     return isAISrc_1024B16();
621   }
622 
623   bool isAISrc_1024F32() const {
624     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
625   }
626 
627   bool isAISrc_1024F16() const {
628     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
629   }
630 
631   bool isAISrc_1024V2F16() const {
632     return isAISrc_1024F16() || isAISrc_1024B32();
633   }
634 
635   bool isKImmFP32() const {
636     return isLiteralImm(MVT::f32);
637   }
638 
639   bool isKImmFP16() const {
640     return isLiteralImm(MVT::f16);
641   }
642 
643   bool isMem() const override {
644     return false;
645   }
646 
647   bool isExpr() const {
648     return Kind == Expression;
649   }
650 
651   bool isSoppBrTarget() const {
652     return isExpr() || isImm();
653   }
654 
655   bool isSWaitCnt() const;
656   bool isHwreg() const;
657   bool isSendMsg() const;
658   bool isSwizzle() const;
659   bool isSMRDOffset8() const;
660   bool isSMEMOffset() const;
661   bool isSMRDLiteralOffset() const;
662   bool isDPP8() const;
663   bool isDPPCtrl() const;
664   bool isBLGP() const;
665   bool isCBSZ() const;
666   bool isABID() const;
667   bool isGPRIdxMode() const;
668   bool isS16Imm() const;
669   bool isU16Imm() const;
670   bool isEndpgm() const;
671 
672   StringRef getExpressionAsToken() const {
673     assert(isExpr());
674     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
675     return S->getSymbol().getName();
676   }
677 
678   StringRef getToken() const {
679     assert(isToken());
680 
681     if (Kind == Expression)
682       return getExpressionAsToken();
683 
684     return StringRef(Tok.Data, Tok.Length);
685   }
686 
687   int64_t getImm() const {
688     assert(isImm());
689     return Imm.Val;
690   }
691 
692   void setImm(int64_t Val) {
693     assert(isImm());
694     Imm.Val = Val;
695   }
696 
697   ImmTy getImmTy() const {
698     assert(isImm());
699     return Imm.Type;
700   }
701 
702   unsigned getReg() const override {
703     assert(isRegKind());
704     return Reg.RegNo;
705   }
706 
707   SMLoc getStartLoc() const override {
708     return StartLoc;
709   }
710 
711   SMLoc getEndLoc() const override {
712     return EndLoc;
713   }
714 
715   SMRange getLocRange() const {
716     return SMRange(StartLoc, EndLoc);
717   }
718 
719   Modifiers getModifiers() const {
720     assert(isRegKind() || isImmTy(ImmTyNone));
721     return isRegKind() ? Reg.Mods : Imm.Mods;
722   }
723 
724   void setModifiers(Modifiers Mods) {
725     assert(isRegKind() || isImmTy(ImmTyNone));
726     if (isRegKind())
727       Reg.Mods = Mods;
728     else
729       Imm.Mods = Mods;
730   }
731 
732   bool hasModifiers() const {
733     return getModifiers().hasModifiers();
734   }
735 
736   bool hasFPModifiers() const {
737     return getModifiers().hasFPModifiers();
738   }
739 
740   bool hasIntModifiers() const {
741     return getModifiers().hasIntModifiers();
742   }
743 
744   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
745 
746   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
747 
748   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
749 
750   template <unsigned Bitwidth>
751   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
752 
753   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
754     addKImmFPOperands<16>(Inst, N);
755   }
756 
757   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
758     addKImmFPOperands<32>(Inst, N);
759   }
760 
761   void addRegOperands(MCInst &Inst, unsigned N) const;
762 
763   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
764     addRegOperands(Inst, N);
765   }
766 
767   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
768     if (isRegKind())
769       addRegOperands(Inst, N);
770     else if (isExpr())
771       Inst.addOperand(MCOperand::createExpr(Expr));
772     else
773       addImmOperands(Inst, N);
774   }
775 
776   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
777     Modifiers Mods = getModifiers();
778     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
779     if (isRegKind()) {
780       addRegOperands(Inst, N);
781     } else {
782       addImmOperands(Inst, N, false);
783     }
784   }
785 
786   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
787     assert(!hasIntModifiers());
788     addRegOrImmWithInputModsOperands(Inst, N);
789   }
790 
791   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
792     assert(!hasFPModifiers());
793     addRegOrImmWithInputModsOperands(Inst, N);
794   }
795 
796   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
797     Modifiers Mods = getModifiers();
798     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
799     assert(isRegKind());
800     addRegOperands(Inst, N);
801   }
802 
803   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
804     assert(!hasIntModifiers());
805     addRegWithInputModsOperands(Inst, N);
806   }
807 
808   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
809     assert(!hasFPModifiers());
810     addRegWithInputModsOperands(Inst, N);
811   }
812 
813   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
814     if (isImm())
815       addImmOperands(Inst, N);
816     else {
817       assert(isExpr());
818       Inst.addOperand(MCOperand::createExpr(Expr));
819     }
820   }
821 
822   static void printImmTy(raw_ostream& OS, ImmTy Type) {
823     switch (Type) {
824     case ImmTyNone: OS << "None"; break;
825     case ImmTyGDS: OS << "GDS"; break;
826     case ImmTyLDS: OS << "LDS"; break;
827     case ImmTyOffen: OS << "Offen"; break;
828     case ImmTyIdxen: OS << "Idxen"; break;
829     case ImmTyAddr64: OS << "Addr64"; break;
830     case ImmTyOffset: OS << "Offset"; break;
831     case ImmTyInstOffset: OS << "InstOffset"; break;
832     case ImmTyOffset0: OS << "Offset0"; break;
833     case ImmTyOffset1: OS << "Offset1"; break;
834     case ImmTyDLC: OS << "DLC"; break;
835     case ImmTyGLC: OS << "GLC"; break;
836     case ImmTySLC: OS << "SLC"; break;
837     case ImmTySWZ: OS << "SWZ"; break;
838     case ImmTyTFE: OS << "TFE"; break;
839     case ImmTyD16: OS << "D16"; break;
840     case ImmTyFORMAT: OS << "FORMAT"; break;
841     case ImmTyClampSI: OS << "ClampSI"; break;
842     case ImmTyOModSI: OS << "OModSI"; break;
843     case ImmTyDPP8: OS << "DPP8"; break;
844     case ImmTyDppCtrl: OS << "DppCtrl"; break;
845     case ImmTyDppRowMask: OS << "DppRowMask"; break;
846     case ImmTyDppBankMask: OS << "DppBankMask"; break;
847     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
848     case ImmTyDppFi: OS << "FI"; break;
849     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
850     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
851     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
852     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
853     case ImmTyDMask: OS << "DMask"; break;
854     case ImmTyDim: OS << "Dim"; break;
855     case ImmTyUNorm: OS << "UNorm"; break;
856     case ImmTyDA: OS << "DA"; break;
857     case ImmTyR128A16: OS << "R128A16"; break;
858     case ImmTyA16: OS << "A16"; break;
859     case ImmTyLWE: OS << "LWE"; break;
860     case ImmTyOff: OS << "Off"; break;
861     case ImmTyExpTgt: OS << "ExpTgt"; break;
862     case ImmTyExpCompr: OS << "ExpCompr"; break;
863     case ImmTyExpVM: OS << "ExpVM"; break;
864     case ImmTyHwreg: OS << "Hwreg"; break;
865     case ImmTySendMsg: OS << "SendMsg"; break;
866     case ImmTyInterpSlot: OS << "InterpSlot"; break;
867     case ImmTyInterpAttr: OS << "InterpAttr"; break;
868     case ImmTyAttrChan: OS << "AttrChan"; break;
869     case ImmTyOpSel: OS << "OpSel"; break;
870     case ImmTyOpSelHi: OS << "OpSelHi"; break;
871     case ImmTyNegLo: OS << "NegLo"; break;
872     case ImmTyNegHi: OS << "NegHi"; break;
873     case ImmTySwizzle: OS << "Swizzle"; break;
874     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
875     case ImmTyHigh: OS << "High"; break;
876     case ImmTyBLGP: OS << "BLGP"; break;
877     case ImmTyCBSZ: OS << "CBSZ"; break;
878     case ImmTyABID: OS << "ABID"; break;
879     case ImmTyEndpgm: OS << "Endpgm"; break;
880     }
881   }
882 
883   void print(raw_ostream &OS) const override {
884     switch (Kind) {
885     case Register:
886       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
887       break;
888     case Immediate:
889       OS << '<' << getImm();
890       if (getImmTy() != ImmTyNone) {
891         OS << " type: "; printImmTy(OS, getImmTy());
892       }
893       OS << " mods: " << Imm.Mods << '>';
894       break;
895     case Token:
896       OS << '\'' << getToken() << '\'';
897       break;
898     case Expression:
899       OS << "<expr " << *Expr << '>';
900       break;
901     }
902   }
903 
904   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
905                                       int64_t Val, SMLoc Loc,
906                                       ImmTy Type = ImmTyNone,
907                                       bool IsFPImm = false) {
908     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
909     Op->Imm.Val = Val;
910     Op->Imm.IsFPImm = IsFPImm;
911     Op->Imm.Type = Type;
912     Op->Imm.Mods = Modifiers();
913     Op->StartLoc = Loc;
914     Op->EndLoc = Loc;
915     return Op;
916   }
917 
918   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
919                                         StringRef Str, SMLoc Loc,
920                                         bool HasExplicitEncodingSize = true) {
921     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
922     Res->Tok.Data = Str.data();
923     Res->Tok.Length = Str.size();
924     Res->StartLoc = Loc;
925     Res->EndLoc = Loc;
926     return Res;
927   }
928 
929   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
930                                       unsigned RegNo, SMLoc S,
931                                       SMLoc E) {
932     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
933     Op->Reg.RegNo = RegNo;
934     Op->Reg.Mods = Modifiers();
935     Op->StartLoc = S;
936     Op->EndLoc = E;
937     return Op;
938   }
939 
940   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
941                                        const class MCExpr *Expr, SMLoc S) {
942     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
943     Op->Expr = Expr;
944     Op->StartLoc = S;
945     Op->EndLoc = S;
946     return Op;
947   }
948 };
949 
950 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
951   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
952   return OS;
953 }
954 
955 //===----------------------------------------------------------------------===//
956 // AsmParser
957 //===----------------------------------------------------------------------===//
958 
959 // Holds info related to the current kernel, e.g. count of SGPRs used.
960 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
961 // .amdgpu_hsa_kernel or at EOF.
962 class KernelScopeInfo {
963   int SgprIndexUnusedMin = -1;
964   int VgprIndexUnusedMin = -1;
965   MCContext *Ctx = nullptr;
966 
967   void usesSgprAt(int i) {
968     if (i >= SgprIndexUnusedMin) {
969       SgprIndexUnusedMin = ++i;
970       if (Ctx) {
971         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
972         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
973       }
974     }
975   }
976 
977   void usesVgprAt(int i) {
978     if (i >= VgprIndexUnusedMin) {
979       VgprIndexUnusedMin = ++i;
980       if (Ctx) {
981         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
982         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
983       }
984     }
985   }
986 
987 public:
988   KernelScopeInfo() = default;
989 
990   void initialize(MCContext &Context) {
991     Ctx = &Context;
992     usesSgprAt(SgprIndexUnusedMin = -1);
993     usesVgprAt(VgprIndexUnusedMin = -1);
994   }
995 
996   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
997     switch (RegKind) {
998       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
999       case IS_AGPR: // fall through
1000       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1001       default: break;
1002     }
1003   }
1004 };
1005 
1006 class AMDGPUAsmParser : public MCTargetAsmParser {
1007   MCAsmParser &Parser;
1008 
1009   // Number of extra operands parsed after the first optional operand.
1010   // This may be necessary to skip hardcoded mandatory operands.
1011   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1012 
1013   unsigned ForcedEncodingSize = 0;
1014   bool ForcedDPP = false;
1015   bool ForcedSDWA = false;
1016   KernelScopeInfo KernelScope;
1017 
1018   /// @name Auto-generated Match Functions
1019   /// {
1020 
1021 #define GET_ASSEMBLER_HEADER
1022 #include "AMDGPUGenAsmMatcher.inc"
1023 
1024   /// }
1025 
1026 private:
1027   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1028   bool OutOfRangeError(SMRange Range);
1029   /// Calculate VGPR/SGPR blocks required for given target, reserved
1030   /// registers, and user-specified NextFreeXGPR values.
1031   ///
1032   /// \param Features [in] Target features, used for bug corrections.
1033   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1034   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1035   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1036   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1037   /// descriptor field, if valid.
1038   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1039   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1040   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1041   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1042   /// \param VGPRBlocks [out] Result VGPR block count.
1043   /// \param SGPRBlocks [out] Result SGPR block count.
1044   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1045                           bool FlatScrUsed, bool XNACKUsed,
1046                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1047                           SMRange VGPRRange, unsigned NextFreeSGPR,
1048                           SMRange SGPRRange, unsigned &VGPRBlocks,
1049                           unsigned &SGPRBlocks);
1050   bool ParseDirectiveAMDGCNTarget();
1051   bool ParseDirectiveAMDHSAKernel();
1052   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1053   bool ParseDirectiveHSACodeObjectVersion();
1054   bool ParseDirectiveHSACodeObjectISA();
1055   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1056   bool ParseDirectiveAMDKernelCodeT();
1057   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1058   bool ParseDirectiveAMDGPUHsaKernel();
1059 
1060   bool ParseDirectiveISAVersion();
1061   bool ParseDirectiveHSAMetadata();
1062   bool ParseDirectivePALMetadataBegin();
1063   bool ParseDirectivePALMetadata();
1064   bool ParseDirectiveAMDGPULDS();
1065 
1066   /// Common code to parse out a block of text (typically YAML) between start and
1067   /// end directives.
1068   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1069                            const char *AssemblerDirectiveEnd,
1070                            std::string &CollectString);
1071 
1072   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1073                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1074   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1075                            unsigned &RegNum, unsigned &RegWidth,
1076                            bool RestoreOnFailure = false);
1077   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1078                            unsigned &RegNum, unsigned &RegWidth,
1079                            SmallVectorImpl<AsmToken> &Tokens);
1080   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1081                            unsigned &RegWidth,
1082                            SmallVectorImpl<AsmToken> &Tokens);
1083   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1084                            unsigned &RegWidth,
1085                            SmallVectorImpl<AsmToken> &Tokens);
1086   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1087                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1088   bool ParseRegRange(unsigned& Num, unsigned& Width);
1089   unsigned getRegularReg(RegisterKind RegKind,
1090                          unsigned RegNum,
1091                          unsigned RegWidth,
1092                          SMLoc Loc);
1093 
1094   bool isRegister();
1095   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1096   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1097   void initializeGprCountSymbol(RegisterKind RegKind);
1098   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1099                              unsigned RegWidth);
1100   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1101                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1102   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1103                  bool IsGdsHardcoded);
1104 
1105 public:
1106   enum AMDGPUMatchResultTy {
1107     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1108   };
1109   enum OperandMode {
1110     OperandMode_Default,
1111     OperandMode_NSA,
1112   };
1113 
1114   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1115 
1116   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1117                const MCInstrInfo &MII,
1118                const MCTargetOptions &Options)
1119       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1120     MCAsmParserExtension::Initialize(Parser);
1121 
1122     if (getFeatureBits().none()) {
1123       // Set default features.
1124       copySTI().ToggleFeature("southern-islands");
1125     }
1126 
1127     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1128 
1129     {
1130       // TODO: make those pre-defined variables read-only.
1131       // Currently there is none suitable machinery in the core llvm-mc for this.
1132       // MCSymbol::isRedefinable is intended for another purpose, and
1133       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1134       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1135       MCContext &Ctx = getContext();
1136       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1137         MCSymbol *Sym =
1138             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1139         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1140         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1141         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1142         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1143         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1144       } else {
1145         MCSymbol *Sym =
1146             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1147         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1148         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1149         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1150         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1151         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1152       }
1153       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1154         initializeGprCountSymbol(IS_VGPR);
1155         initializeGprCountSymbol(IS_SGPR);
1156       } else
1157         KernelScope.initialize(getContext());
1158     }
1159   }
1160 
1161   bool hasXNACK() const {
1162     return AMDGPU::hasXNACK(getSTI());
1163   }
1164 
1165   bool hasMIMG_R128() const {
1166     return AMDGPU::hasMIMG_R128(getSTI());
1167   }
1168 
1169   bool hasPackedD16() const {
1170     return AMDGPU::hasPackedD16(getSTI());
1171   }
1172 
1173   bool hasGFX10A16() const {
1174     return AMDGPU::hasGFX10A16(getSTI());
1175   }
1176 
1177   bool isSI() const {
1178     return AMDGPU::isSI(getSTI());
1179   }
1180 
1181   bool isCI() const {
1182     return AMDGPU::isCI(getSTI());
1183   }
1184 
1185   bool isVI() const {
1186     return AMDGPU::isVI(getSTI());
1187   }
1188 
1189   bool isGFX9() const {
1190     return AMDGPU::isGFX9(getSTI());
1191   }
1192 
1193   bool isGFX9Plus() const {
1194     return AMDGPU::isGFX9Plus(getSTI());
1195   }
1196 
1197   bool isGFX10() const {
1198     return AMDGPU::isGFX10(getSTI());
1199   }
1200 
1201   bool isGFX10_BEncoding() const {
1202     return AMDGPU::isGFX10_BEncoding(getSTI());
1203   }
1204 
1205   bool hasInv2PiInlineImm() const {
1206     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1207   }
1208 
1209   bool hasFlatOffsets() const {
1210     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1211   }
1212 
1213   bool hasSGPR102_SGPR103() const {
1214     return !isVI() && !isGFX9();
1215   }
1216 
1217   bool hasSGPR104_SGPR105() const {
1218     return isGFX10();
1219   }
1220 
1221   bool hasIntClamp() const {
1222     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1223   }
1224 
1225   AMDGPUTargetStreamer &getTargetStreamer() {
1226     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1227     return static_cast<AMDGPUTargetStreamer &>(TS);
1228   }
1229 
1230   const MCRegisterInfo *getMRI() const {
1231     // We need this const_cast because for some reason getContext() is not const
1232     // in MCAsmParser.
1233     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1234   }
1235 
1236   const MCInstrInfo *getMII() const {
1237     return &MII;
1238   }
1239 
1240   const FeatureBitset &getFeatureBits() const {
1241     return getSTI().getFeatureBits();
1242   }
1243 
1244   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1245   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1246   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1247 
1248   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1249   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1250   bool isForcedDPP() const { return ForcedDPP; }
1251   bool isForcedSDWA() const { return ForcedSDWA; }
1252   ArrayRef<unsigned> getMatchedVariants() const;
1253   StringRef getMatchedVariantName() const;
1254 
1255   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1256   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1257                      bool RestoreOnFailure);
1258   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1259   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1260                                         SMLoc &EndLoc) override;
1261   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1262   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1263                                       unsigned Kind) override;
1264   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1265                                OperandVector &Operands, MCStreamer &Out,
1266                                uint64_t &ErrorInfo,
1267                                bool MatchingInlineAsm) override;
1268   bool ParseDirective(AsmToken DirectiveID) override;
1269   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1270                                     OperandMode Mode = OperandMode_Default);
1271   StringRef parseMnemonicSuffix(StringRef Name);
1272   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1273                         SMLoc NameLoc, OperandVector &Operands) override;
1274   //bool ProcessInstruction(MCInst &Inst);
1275 
1276   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1277 
1278   OperandMatchResultTy
1279   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1280                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1281                      bool (*ConvertResult)(int64_t &) = nullptr);
1282 
1283   OperandMatchResultTy
1284   parseOperandArrayWithPrefix(const char *Prefix,
1285                               OperandVector &Operands,
1286                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1287                               bool (*ConvertResult)(int64_t&) = nullptr);
1288 
1289   OperandMatchResultTy
1290   parseNamedBit(const char *Name, OperandVector &Operands,
1291                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1292   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1293                                              StringRef &Value);
1294 
1295   bool isModifier();
1296   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1297   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1298   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1299   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1300   bool parseSP3NegModifier();
1301   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1302   OperandMatchResultTy parseReg(OperandVector &Operands);
1303   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1304   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1305   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1306   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1307   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1308   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1309   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1310   OperandMatchResultTy parseUfmt(int64_t &Format);
1311   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1312   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1313   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1314   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1315   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1316   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1317   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1318 
1319   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1320   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1321   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1322   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1323 
1324   bool parseCnt(int64_t &IntVal);
1325   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1326   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1327 
1328 private:
1329   struct OperandInfoTy {
1330     int64_t Id;
1331     bool IsSymbolic = false;
1332     bool IsDefined = false;
1333 
1334     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1335   };
1336 
1337   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1338   bool validateSendMsg(const OperandInfoTy &Msg,
1339                        const OperandInfoTy &Op,
1340                        const OperandInfoTy &Stream,
1341                        const SMLoc Loc);
1342 
1343   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1344   bool validateHwreg(const OperandInfoTy &HwReg,
1345                      const int64_t Offset,
1346                      const int64_t Width,
1347                      const SMLoc Loc);
1348 
1349   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1350   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1351   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1352 
1353   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1354   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1355   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1356   bool validateSOPLiteral(const MCInst &Inst) const;
1357   bool validateConstantBusLimitations(const MCInst &Inst);
1358   bool validateEarlyClobberLimitations(const MCInst &Inst);
1359   bool validateIntClampSupported(const MCInst &Inst);
1360   bool validateMIMGAtomicDMask(const MCInst &Inst);
1361   bool validateMIMGGatherDMask(const MCInst &Inst);
1362   bool validateMovrels(const MCInst &Inst);
1363   bool validateMIMGDataSize(const MCInst &Inst);
1364   bool validateMIMGAddrSize(const MCInst &Inst);
1365   bool validateMIMGD16(const MCInst &Inst);
1366   bool validateMIMGDim(const MCInst &Inst);
1367   bool validateLdsDirect(const MCInst &Inst);
1368   bool validateOpSel(const MCInst &Inst);
1369   bool validateVccOperand(unsigned Reg) const;
1370   bool validateVOP3Literal(const MCInst &Inst) const;
1371   bool validateMAIAccWrite(const MCInst &Inst);
1372   bool validateDivScale(const MCInst &Inst);
1373   unsigned getConstantBusLimit(unsigned Opcode) const;
1374   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1375   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1376   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1377 
1378   bool isSupportedMnemo(StringRef Mnemo,
1379                         const FeatureBitset &FBS);
1380   bool isSupportedMnemo(StringRef Mnemo,
1381                         const FeatureBitset &FBS,
1382                         ArrayRef<unsigned> Variants);
1383   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1384 
1385   bool isId(const StringRef Id) const;
1386   bool isId(const AsmToken &Token, const StringRef Id) const;
1387   bool isToken(const AsmToken::TokenKind Kind) const;
1388   bool trySkipId(const StringRef Id);
1389   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1390   bool trySkipToken(const AsmToken::TokenKind Kind);
1391   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1392   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1393   bool parseId(StringRef &Val, const StringRef ErrMsg);
1394 
1395   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1396   AsmToken::TokenKind getTokenKind() const;
1397   bool parseExpr(int64_t &Imm);
1398   bool parseExpr(OperandVector &Operands);
1399   StringRef getTokenStr() const;
1400   AsmToken peekToken();
1401   AsmToken getToken() const;
1402   SMLoc getLoc() const;
1403   void lex();
1404 
1405 public:
1406   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1407   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1408 
1409   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1410   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1411   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1412   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1413   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1414   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1415 
1416   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1417                             const unsigned MinVal,
1418                             const unsigned MaxVal,
1419                             const StringRef ErrMsg);
1420   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1421   bool parseSwizzleOffset(int64_t &Imm);
1422   bool parseSwizzleMacro(int64_t &Imm);
1423   bool parseSwizzleQuadPerm(int64_t &Imm);
1424   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1425   bool parseSwizzleBroadcast(int64_t &Imm);
1426   bool parseSwizzleSwap(int64_t &Imm);
1427   bool parseSwizzleReverse(int64_t &Imm);
1428 
1429   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1430   int64_t parseGPRIdxMacro();
1431 
1432   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1433   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1434   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1435   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1436   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1437 
1438   AMDGPUOperand::Ptr defaultDLC() const;
1439   AMDGPUOperand::Ptr defaultGLC() const;
1440   AMDGPUOperand::Ptr defaultSLC() const;
1441 
1442   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1443   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1444   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1445   AMDGPUOperand::Ptr defaultFlatOffset() const;
1446 
1447   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1448 
1449   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1450                OptionalImmIndexMap &OptionalIdx);
1451   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1452   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1453   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1454 
1455   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1456 
1457   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1458                bool IsAtomic = false);
1459   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1460   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1461 
1462   OperandMatchResultTy parseDim(OperandVector &Operands);
1463   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1464   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1465   AMDGPUOperand::Ptr defaultRowMask() const;
1466   AMDGPUOperand::Ptr defaultBankMask() const;
1467   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1468   AMDGPUOperand::Ptr defaultFI() const;
1469   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1470   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1471 
1472   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1473                                     AMDGPUOperand::ImmTy Type);
1474   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1475   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1476   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1477   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1478   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1479   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1480   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1481                uint64_t BasicInstType,
1482                bool SkipDstVcc = false,
1483                bool SkipSrcVcc = false);
1484 
1485   AMDGPUOperand::Ptr defaultBLGP() const;
1486   AMDGPUOperand::Ptr defaultCBSZ() const;
1487   AMDGPUOperand::Ptr defaultABID() const;
1488 
1489   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1490   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1491 };
1492 
1493 struct OptionalOperand {
1494   const char *Name;
1495   AMDGPUOperand::ImmTy Type;
1496   bool IsBit;
1497   bool (*ConvertResult)(int64_t&);
1498 };
1499 
1500 } // end anonymous namespace
1501 
1502 // May be called with integer type with equivalent bitwidth.
1503 static const fltSemantics *getFltSemantics(unsigned Size) {
1504   switch (Size) {
1505   case 4:
1506     return &APFloat::IEEEsingle();
1507   case 8:
1508     return &APFloat::IEEEdouble();
1509   case 2:
1510     return &APFloat::IEEEhalf();
1511   default:
1512     llvm_unreachable("unsupported fp type");
1513   }
1514 }
1515 
1516 static const fltSemantics *getFltSemantics(MVT VT) {
1517   return getFltSemantics(VT.getSizeInBits() / 8);
1518 }
1519 
1520 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1521   switch (OperandType) {
1522   case AMDGPU::OPERAND_REG_IMM_INT32:
1523   case AMDGPU::OPERAND_REG_IMM_FP32:
1524   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1525   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1526   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1527   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1528     return &APFloat::IEEEsingle();
1529   case AMDGPU::OPERAND_REG_IMM_INT64:
1530   case AMDGPU::OPERAND_REG_IMM_FP64:
1531   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1532   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1533     return &APFloat::IEEEdouble();
1534   case AMDGPU::OPERAND_REG_IMM_INT16:
1535   case AMDGPU::OPERAND_REG_IMM_FP16:
1536   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1537   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1538   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1539   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1540   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1541   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1542   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1543   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1544   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1545   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1546     return &APFloat::IEEEhalf();
1547   default:
1548     llvm_unreachable("unsupported fp type");
1549   }
1550 }
1551 
1552 //===----------------------------------------------------------------------===//
1553 // Operand
1554 //===----------------------------------------------------------------------===//
1555 
1556 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1557   bool Lost;
1558 
1559   // Convert literal to single precision
1560   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1561                                                APFloat::rmNearestTiesToEven,
1562                                                &Lost);
1563   // We allow precision lost but not overflow or underflow
1564   if (Status != APFloat::opOK &&
1565       Lost &&
1566       ((Status & APFloat::opOverflow)  != 0 ||
1567        (Status & APFloat::opUnderflow) != 0)) {
1568     return false;
1569   }
1570 
1571   return true;
1572 }
1573 
1574 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1575   return isUIntN(Size, Val) || isIntN(Size, Val);
1576 }
1577 
1578 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1579   if (VT.getScalarType() == MVT::i16) {
1580     // FP immediate values are broken.
1581     return isInlinableIntLiteral(Val);
1582   }
1583 
1584   // f16/v2f16 operands work correctly for all values.
1585   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1586 }
1587 
1588 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1589 
1590   // This is a hack to enable named inline values like
1591   // shared_base with both 32-bit and 64-bit operands.
1592   // Note that these values are defined as
1593   // 32-bit operands only.
1594   if (isInlineValue()) {
1595     return true;
1596   }
1597 
1598   if (!isImmTy(ImmTyNone)) {
1599     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1600     return false;
1601   }
1602   // TODO: We should avoid using host float here. It would be better to
1603   // check the float bit values which is what a few other places do.
1604   // We've had bot failures before due to weird NaN support on mips hosts.
1605 
1606   APInt Literal(64, Imm.Val);
1607 
1608   if (Imm.IsFPImm) { // We got fp literal token
1609     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1610       return AMDGPU::isInlinableLiteral64(Imm.Val,
1611                                           AsmParser->hasInv2PiInlineImm());
1612     }
1613 
1614     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1615     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1616       return false;
1617 
1618     if (type.getScalarSizeInBits() == 16) {
1619       return isInlineableLiteralOp16(
1620         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1621         type, AsmParser->hasInv2PiInlineImm());
1622     }
1623 
1624     // Check if single precision literal is inlinable
1625     return AMDGPU::isInlinableLiteral32(
1626       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1627       AsmParser->hasInv2PiInlineImm());
1628   }
1629 
1630   // We got int literal token.
1631   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1632     return AMDGPU::isInlinableLiteral64(Imm.Val,
1633                                         AsmParser->hasInv2PiInlineImm());
1634   }
1635 
1636   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1637     return false;
1638   }
1639 
1640   if (type.getScalarSizeInBits() == 16) {
1641     return isInlineableLiteralOp16(
1642       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1643       type, AsmParser->hasInv2PiInlineImm());
1644   }
1645 
1646   return AMDGPU::isInlinableLiteral32(
1647     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1648     AsmParser->hasInv2PiInlineImm());
1649 }
1650 
1651 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1652   // Check that this immediate can be added as literal
1653   if (!isImmTy(ImmTyNone)) {
1654     return false;
1655   }
1656 
1657   if (!Imm.IsFPImm) {
1658     // We got int literal token.
1659 
1660     if (type == MVT::f64 && hasFPModifiers()) {
1661       // Cannot apply fp modifiers to int literals preserving the same semantics
1662       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1663       // disable these cases.
1664       return false;
1665     }
1666 
1667     unsigned Size = type.getSizeInBits();
1668     if (Size == 64)
1669       Size = 32;
1670 
1671     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1672     // types.
1673     return isSafeTruncation(Imm.Val, Size);
1674   }
1675 
1676   // We got fp literal token
1677   if (type == MVT::f64) { // Expected 64-bit fp operand
1678     // We would set low 64-bits of literal to zeroes but we accept this literals
1679     return true;
1680   }
1681 
1682   if (type == MVT::i64) { // Expected 64-bit int operand
1683     // We don't allow fp literals in 64-bit integer instructions. It is
1684     // unclear how we should encode them.
1685     return false;
1686   }
1687 
1688   // We allow fp literals with f16x2 operands assuming that the specified
1689   // literal goes into the lower half and the upper half is zero. We also
1690   // require that the literal may be losslesly converted to f16.
1691   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1692                      (type == MVT::v2i16)? MVT::i16 : type;
1693 
1694   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1695   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1696 }
1697 
1698 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1699   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1700 }
1701 
1702 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1703   if (AsmParser->isVI())
1704     return isVReg32();
1705   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1706     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1707   else
1708     return false;
1709 }
1710 
1711 bool AMDGPUOperand::isSDWAFP16Operand() const {
1712   return isSDWAOperand(MVT::f16);
1713 }
1714 
1715 bool AMDGPUOperand::isSDWAFP32Operand() const {
1716   return isSDWAOperand(MVT::f32);
1717 }
1718 
1719 bool AMDGPUOperand::isSDWAInt16Operand() const {
1720   return isSDWAOperand(MVT::i16);
1721 }
1722 
1723 bool AMDGPUOperand::isSDWAInt32Operand() const {
1724   return isSDWAOperand(MVT::i32);
1725 }
1726 
1727 bool AMDGPUOperand::isBoolReg() const {
1728   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1729          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1730 }
1731 
1732 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1733 {
1734   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1735   assert(Size == 2 || Size == 4 || Size == 8);
1736 
1737   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1738 
1739   if (Imm.Mods.Abs) {
1740     Val &= ~FpSignMask;
1741   }
1742   if (Imm.Mods.Neg) {
1743     Val ^= FpSignMask;
1744   }
1745 
1746   return Val;
1747 }
1748 
1749 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1750   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1751                              Inst.getNumOperands())) {
1752     addLiteralImmOperand(Inst, Imm.Val,
1753                          ApplyModifiers &
1754                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1755   } else {
1756     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1757     Inst.addOperand(MCOperand::createImm(Imm.Val));
1758   }
1759 }
1760 
1761 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1762   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1763   auto OpNum = Inst.getNumOperands();
1764   // Check that this operand accepts literals
1765   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1766 
1767   if (ApplyModifiers) {
1768     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1769     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1770     Val = applyInputFPModifiers(Val, Size);
1771   }
1772 
1773   APInt Literal(64, Val);
1774   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1775 
1776   if (Imm.IsFPImm) { // We got fp literal token
1777     switch (OpTy) {
1778     case AMDGPU::OPERAND_REG_IMM_INT64:
1779     case AMDGPU::OPERAND_REG_IMM_FP64:
1780     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1781     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1782       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1783                                        AsmParser->hasInv2PiInlineImm())) {
1784         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1785         return;
1786       }
1787 
1788       // Non-inlineable
1789       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1790         // For fp operands we check if low 32 bits are zeros
1791         if (Literal.getLoBits(32) != 0) {
1792           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1793           "Can't encode literal as exact 64-bit floating-point operand. "
1794           "Low 32-bits will be set to zero");
1795         }
1796 
1797         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1798         return;
1799       }
1800 
1801       // We don't allow fp literals in 64-bit integer instructions. It is
1802       // unclear how we should encode them. This case should be checked earlier
1803       // in predicate methods (isLiteralImm())
1804       llvm_unreachable("fp literal in 64-bit integer instruction.");
1805 
1806     case AMDGPU::OPERAND_REG_IMM_INT32:
1807     case AMDGPU::OPERAND_REG_IMM_FP32:
1808     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1809     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1810     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1811     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1812     case AMDGPU::OPERAND_REG_IMM_INT16:
1813     case AMDGPU::OPERAND_REG_IMM_FP16:
1814     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1815     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1816     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1817     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1818     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1819     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1820     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1821     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1822     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1823     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1824       bool lost;
1825       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1826       // Convert literal to single precision
1827       FPLiteral.convert(*getOpFltSemantics(OpTy),
1828                         APFloat::rmNearestTiesToEven, &lost);
1829       // We allow precision lost but not overflow or underflow. This should be
1830       // checked earlier in isLiteralImm()
1831 
1832       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1833       Inst.addOperand(MCOperand::createImm(ImmVal));
1834       return;
1835     }
1836     default:
1837       llvm_unreachable("invalid operand size");
1838     }
1839 
1840     return;
1841   }
1842 
1843   // We got int literal token.
1844   // Only sign extend inline immediates.
1845   switch (OpTy) {
1846   case AMDGPU::OPERAND_REG_IMM_INT32:
1847   case AMDGPU::OPERAND_REG_IMM_FP32:
1848   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1849   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1850   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1851   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1852   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1853   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1854     if (isSafeTruncation(Val, 32) &&
1855         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1856                                      AsmParser->hasInv2PiInlineImm())) {
1857       Inst.addOperand(MCOperand::createImm(Val));
1858       return;
1859     }
1860 
1861     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1862     return;
1863 
1864   case AMDGPU::OPERAND_REG_IMM_INT64:
1865   case AMDGPU::OPERAND_REG_IMM_FP64:
1866   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1867   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1868     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1869       Inst.addOperand(MCOperand::createImm(Val));
1870       return;
1871     }
1872 
1873     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1874     return;
1875 
1876   case AMDGPU::OPERAND_REG_IMM_INT16:
1877   case AMDGPU::OPERAND_REG_IMM_FP16:
1878   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1879   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1880   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1881   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1882     if (isSafeTruncation(Val, 16) &&
1883         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1884                                      AsmParser->hasInv2PiInlineImm())) {
1885       Inst.addOperand(MCOperand::createImm(Val));
1886       return;
1887     }
1888 
1889     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1890     return;
1891 
1892   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1893   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1894   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1895   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1896     assert(isSafeTruncation(Val, 16));
1897     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1898                                         AsmParser->hasInv2PiInlineImm()));
1899 
1900     Inst.addOperand(MCOperand::createImm(Val));
1901     return;
1902   }
1903   default:
1904     llvm_unreachable("invalid operand size");
1905   }
1906 }
1907 
1908 template <unsigned Bitwidth>
1909 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1910   APInt Literal(64, Imm.Val);
1911 
1912   if (!Imm.IsFPImm) {
1913     // We got int literal token.
1914     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1915     return;
1916   }
1917 
1918   bool Lost;
1919   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1920   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1921                     APFloat::rmNearestTiesToEven, &Lost);
1922   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1923 }
1924 
1925 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1926   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1927 }
1928 
1929 static bool isInlineValue(unsigned Reg) {
1930   switch (Reg) {
1931   case AMDGPU::SRC_SHARED_BASE:
1932   case AMDGPU::SRC_SHARED_LIMIT:
1933   case AMDGPU::SRC_PRIVATE_BASE:
1934   case AMDGPU::SRC_PRIVATE_LIMIT:
1935   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1936     return true;
1937   case AMDGPU::SRC_VCCZ:
1938   case AMDGPU::SRC_EXECZ:
1939   case AMDGPU::SRC_SCC:
1940     return true;
1941   case AMDGPU::SGPR_NULL:
1942     return true;
1943   default:
1944     return false;
1945   }
1946 }
1947 
1948 bool AMDGPUOperand::isInlineValue() const {
1949   return isRegKind() && ::isInlineValue(getReg());
1950 }
1951 
1952 //===----------------------------------------------------------------------===//
1953 // AsmParser
1954 //===----------------------------------------------------------------------===//
1955 
1956 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1957   if (Is == IS_VGPR) {
1958     switch (RegWidth) {
1959       default: return -1;
1960       case 1: return AMDGPU::VGPR_32RegClassID;
1961       case 2: return AMDGPU::VReg_64RegClassID;
1962       case 3: return AMDGPU::VReg_96RegClassID;
1963       case 4: return AMDGPU::VReg_128RegClassID;
1964       case 5: return AMDGPU::VReg_160RegClassID;
1965       case 6: return AMDGPU::VReg_192RegClassID;
1966       case 8: return AMDGPU::VReg_256RegClassID;
1967       case 16: return AMDGPU::VReg_512RegClassID;
1968       case 32: return AMDGPU::VReg_1024RegClassID;
1969     }
1970   } else if (Is == IS_TTMP) {
1971     switch (RegWidth) {
1972       default: return -1;
1973       case 1: return AMDGPU::TTMP_32RegClassID;
1974       case 2: return AMDGPU::TTMP_64RegClassID;
1975       case 4: return AMDGPU::TTMP_128RegClassID;
1976       case 8: return AMDGPU::TTMP_256RegClassID;
1977       case 16: return AMDGPU::TTMP_512RegClassID;
1978     }
1979   } else if (Is == IS_SGPR) {
1980     switch (RegWidth) {
1981       default: return -1;
1982       case 1: return AMDGPU::SGPR_32RegClassID;
1983       case 2: return AMDGPU::SGPR_64RegClassID;
1984       case 3: return AMDGPU::SGPR_96RegClassID;
1985       case 4: return AMDGPU::SGPR_128RegClassID;
1986       case 5: return AMDGPU::SGPR_160RegClassID;
1987       case 6: return AMDGPU::SGPR_192RegClassID;
1988       case 8: return AMDGPU::SGPR_256RegClassID;
1989       case 16: return AMDGPU::SGPR_512RegClassID;
1990     }
1991   } else if (Is == IS_AGPR) {
1992     switch (RegWidth) {
1993       default: return -1;
1994       case 1: return AMDGPU::AGPR_32RegClassID;
1995       case 2: return AMDGPU::AReg_64RegClassID;
1996       case 3: return AMDGPU::AReg_96RegClassID;
1997       case 4: return AMDGPU::AReg_128RegClassID;
1998       case 5: return AMDGPU::AReg_160RegClassID;
1999       case 6: return AMDGPU::AReg_192RegClassID;
2000       case 8: return AMDGPU::AReg_256RegClassID;
2001       case 16: return AMDGPU::AReg_512RegClassID;
2002       case 32: return AMDGPU::AReg_1024RegClassID;
2003     }
2004   }
2005   return -1;
2006 }
2007 
2008 static unsigned getSpecialRegForName(StringRef RegName) {
2009   return StringSwitch<unsigned>(RegName)
2010     .Case("exec", AMDGPU::EXEC)
2011     .Case("vcc", AMDGPU::VCC)
2012     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2013     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2014     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2015     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2016     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2017     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2018     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2019     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2020     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2021     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2022     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2023     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2024     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2025     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2026     .Case("m0", AMDGPU::M0)
2027     .Case("vccz", AMDGPU::SRC_VCCZ)
2028     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2029     .Case("execz", AMDGPU::SRC_EXECZ)
2030     .Case("src_execz", AMDGPU::SRC_EXECZ)
2031     .Case("scc", AMDGPU::SRC_SCC)
2032     .Case("src_scc", AMDGPU::SRC_SCC)
2033     .Case("tba", AMDGPU::TBA)
2034     .Case("tma", AMDGPU::TMA)
2035     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2036     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2037     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2038     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2039     .Case("vcc_lo", AMDGPU::VCC_LO)
2040     .Case("vcc_hi", AMDGPU::VCC_HI)
2041     .Case("exec_lo", AMDGPU::EXEC_LO)
2042     .Case("exec_hi", AMDGPU::EXEC_HI)
2043     .Case("tma_lo", AMDGPU::TMA_LO)
2044     .Case("tma_hi", AMDGPU::TMA_HI)
2045     .Case("tba_lo", AMDGPU::TBA_LO)
2046     .Case("tba_hi", AMDGPU::TBA_HI)
2047     .Case("pc", AMDGPU::PC_REG)
2048     .Case("null", AMDGPU::SGPR_NULL)
2049     .Default(AMDGPU::NoRegister);
2050 }
2051 
2052 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2053                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2054   auto R = parseRegister();
2055   if (!R) return true;
2056   assert(R->isReg());
2057   RegNo = R->getReg();
2058   StartLoc = R->getStartLoc();
2059   EndLoc = R->getEndLoc();
2060   return false;
2061 }
2062 
2063 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2064                                     SMLoc &EndLoc) {
2065   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2066 }
2067 
2068 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2069                                                        SMLoc &StartLoc,
2070                                                        SMLoc &EndLoc) {
2071   bool Result =
2072       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2073   bool PendingErrors = getParser().hasPendingError();
2074   getParser().clearPendingErrors();
2075   if (PendingErrors)
2076     return MatchOperand_ParseFail;
2077   if (Result)
2078     return MatchOperand_NoMatch;
2079   return MatchOperand_Success;
2080 }
2081 
2082 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2083                                             RegisterKind RegKind, unsigned Reg1,
2084                                             SMLoc Loc) {
2085   switch (RegKind) {
2086   case IS_SPECIAL:
2087     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2088       Reg = AMDGPU::EXEC;
2089       RegWidth = 2;
2090       return true;
2091     }
2092     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2093       Reg = AMDGPU::FLAT_SCR;
2094       RegWidth = 2;
2095       return true;
2096     }
2097     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2098       Reg = AMDGPU::XNACK_MASK;
2099       RegWidth = 2;
2100       return true;
2101     }
2102     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2103       Reg = AMDGPU::VCC;
2104       RegWidth = 2;
2105       return true;
2106     }
2107     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2108       Reg = AMDGPU::TBA;
2109       RegWidth = 2;
2110       return true;
2111     }
2112     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2113       Reg = AMDGPU::TMA;
2114       RegWidth = 2;
2115       return true;
2116     }
2117     Error(Loc, "register does not fit in the list");
2118     return false;
2119   case IS_VGPR:
2120   case IS_SGPR:
2121   case IS_AGPR:
2122   case IS_TTMP:
2123     if (Reg1 != Reg + RegWidth) {
2124       Error(Loc, "registers in a list must have consecutive indices");
2125       return false;
2126     }
2127     RegWidth++;
2128     return true;
2129   default:
2130     llvm_unreachable("unexpected register kind");
2131   }
2132 }
2133 
2134 struct RegInfo {
2135   StringLiteral Name;
2136   RegisterKind Kind;
2137 };
2138 
2139 static constexpr RegInfo RegularRegisters[] = {
2140   {{"v"},    IS_VGPR},
2141   {{"s"},    IS_SGPR},
2142   {{"ttmp"}, IS_TTMP},
2143   {{"acc"},  IS_AGPR},
2144   {{"a"},    IS_AGPR},
2145 };
2146 
2147 static bool isRegularReg(RegisterKind Kind) {
2148   return Kind == IS_VGPR ||
2149          Kind == IS_SGPR ||
2150          Kind == IS_TTMP ||
2151          Kind == IS_AGPR;
2152 }
2153 
2154 static const RegInfo* getRegularRegInfo(StringRef Str) {
2155   for (const RegInfo &Reg : RegularRegisters)
2156     if (Str.startswith(Reg.Name))
2157       return &Reg;
2158   return nullptr;
2159 }
2160 
2161 static bool getRegNum(StringRef Str, unsigned& Num) {
2162   return !Str.getAsInteger(10, Num);
2163 }
2164 
2165 bool
2166 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2167                             const AsmToken &NextToken) const {
2168 
2169   // A list of consecutive registers: [s0,s1,s2,s3]
2170   if (Token.is(AsmToken::LBrac))
2171     return true;
2172 
2173   if (!Token.is(AsmToken::Identifier))
2174     return false;
2175 
2176   // A single register like s0 or a range of registers like s[0:1]
2177 
2178   StringRef Str = Token.getString();
2179   const RegInfo *Reg = getRegularRegInfo(Str);
2180   if (Reg) {
2181     StringRef RegName = Reg->Name;
2182     StringRef RegSuffix = Str.substr(RegName.size());
2183     if (!RegSuffix.empty()) {
2184       unsigned Num;
2185       // A single register with an index: rXX
2186       if (getRegNum(RegSuffix, Num))
2187         return true;
2188     } else {
2189       // A range of registers: r[XX:YY].
2190       if (NextToken.is(AsmToken::LBrac))
2191         return true;
2192     }
2193   }
2194 
2195   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2196 }
2197 
2198 bool
2199 AMDGPUAsmParser::isRegister()
2200 {
2201   return isRegister(getToken(), peekToken());
2202 }
2203 
2204 unsigned
2205 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2206                                unsigned RegNum,
2207                                unsigned RegWidth,
2208                                SMLoc Loc) {
2209 
2210   assert(isRegularReg(RegKind));
2211 
2212   unsigned AlignSize = 1;
2213   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2214     // SGPR and TTMP registers must be aligned.
2215     // Max required alignment is 4 dwords.
2216     AlignSize = std::min(RegWidth, 4u);
2217   }
2218 
2219   if (RegNum % AlignSize != 0) {
2220     Error(Loc, "invalid register alignment");
2221     return AMDGPU::NoRegister;
2222   }
2223 
2224   unsigned RegIdx = RegNum / AlignSize;
2225   int RCID = getRegClass(RegKind, RegWidth);
2226   if (RCID == -1) {
2227     Error(Loc, "invalid or unsupported register size");
2228     return AMDGPU::NoRegister;
2229   }
2230 
2231   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2232   const MCRegisterClass RC = TRI->getRegClass(RCID);
2233   if (RegIdx >= RC.getNumRegs()) {
2234     Error(Loc, "register index is out of range");
2235     return AMDGPU::NoRegister;
2236   }
2237 
2238   return RC.getRegister(RegIdx);
2239 }
2240 
2241 bool
2242 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2243   int64_t RegLo, RegHi;
2244   if (!skipToken(AsmToken::LBrac, "missing register index"))
2245     return false;
2246 
2247   SMLoc FirstIdxLoc = getLoc();
2248   SMLoc SecondIdxLoc;
2249 
2250   if (!parseExpr(RegLo))
2251     return false;
2252 
2253   if (trySkipToken(AsmToken::Colon)) {
2254     SecondIdxLoc = getLoc();
2255     if (!parseExpr(RegHi))
2256       return false;
2257   } else {
2258     RegHi = RegLo;
2259   }
2260 
2261   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2262     return false;
2263 
2264   if (!isUInt<32>(RegLo)) {
2265     Error(FirstIdxLoc, "invalid register index");
2266     return false;
2267   }
2268 
2269   if (!isUInt<32>(RegHi)) {
2270     Error(SecondIdxLoc, "invalid register index");
2271     return false;
2272   }
2273 
2274   if (RegLo > RegHi) {
2275     Error(FirstIdxLoc, "first register index should not exceed second index");
2276     return false;
2277   }
2278 
2279   Num = static_cast<unsigned>(RegLo);
2280   Width = (RegHi - RegLo) + 1;
2281   return true;
2282 }
2283 
2284 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2285                                           unsigned &RegNum, unsigned &RegWidth,
2286                                           SmallVectorImpl<AsmToken> &Tokens) {
2287   assert(isToken(AsmToken::Identifier));
2288   unsigned Reg = getSpecialRegForName(getTokenStr());
2289   if (Reg) {
2290     RegNum = 0;
2291     RegWidth = 1;
2292     RegKind = IS_SPECIAL;
2293     Tokens.push_back(getToken());
2294     lex(); // skip register name
2295   }
2296   return Reg;
2297 }
2298 
2299 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2300                                           unsigned &RegNum, unsigned &RegWidth,
2301                                           SmallVectorImpl<AsmToken> &Tokens) {
2302   assert(isToken(AsmToken::Identifier));
2303   StringRef RegName = getTokenStr();
2304   auto Loc = getLoc();
2305 
2306   const RegInfo *RI = getRegularRegInfo(RegName);
2307   if (!RI) {
2308     Error(Loc, "invalid register name");
2309     return AMDGPU::NoRegister;
2310   }
2311 
2312   Tokens.push_back(getToken());
2313   lex(); // skip register name
2314 
2315   RegKind = RI->Kind;
2316   StringRef RegSuffix = RegName.substr(RI->Name.size());
2317   if (!RegSuffix.empty()) {
2318     // Single 32-bit register: vXX.
2319     if (!getRegNum(RegSuffix, RegNum)) {
2320       Error(Loc, "invalid register index");
2321       return AMDGPU::NoRegister;
2322     }
2323     RegWidth = 1;
2324   } else {
2325     // Range of registers: v[XX:YY]. ":YY" is optional.
2326     if (!ParseRegRange(RegNum, RegWidth))
2327       return AMDGPU::NoRegister;
2328   }
2329 
2330   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2331 }
2332 
2333 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2334                                        unsigned &RegWidth,
2335                                        SmallVectorImpl<AsmToken> &Tokens) {
2336   unsigned Reg = AMDGPU::NoRegister;
2337   auto ListLoc = getLoc();
2338 
2339   if (!skipToken(AsmToken::LBrac,
2340                  "expected a register or a list of registers")) {
2341     return AMDGPU::NoRegister;
2342   }
2343 
2344   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2345 
2346   auto Loc = getLoc();
2347   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2348     return AMDGPU::NoRegister;
2349   if (RegWidth != 1) {
2350     Error(Loc, "expected a single 32-bit register");
2351     return AMDGPU::NoRegister;
2352   }
2353 
2354   for (; trySkipToken(AsmToken::Comma); ) {
2355     RegisterKind NextRegKind;
2356     unsigned NextReg, NextRegNum, NextRegWidth;
2357     Loc = getLoc();
2358 
2359     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2360                              NextRegNum, NextRegWidth,
2361                              Tokens)) {
2362       return AMDGPU::NoRegister;
2363     }
2364     if (NextRegWidth != 1) {
2365       Error(Loc, "expected a single 32-bit register");
2366       return AMDGPU::NoRegister;
2367     }
2368     if (NextRegKind != RegKind) {
2369       Error(Loc, "registers in a list must be of the same kind");
2370       return AMDGPU::NoRegister;
2371     }
2372     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2373       return AMDGPU::NoRegister;
2374   }
2375 
2376   if (!skipToken(AsmToken::RBrac,
2377                  "expected a comma or a closing square bracket")) {
2378     return AMDGPU::NoRegister;
2379   }
2380 
2381   if (isRegularReg(RegKind))
2382     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2383 
2384   return Reg;
2385 }
2386 
2387 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2388                                           unsigned &RegNum, unsigned &RegWidth,
2389                                           SmallVectorImpl<AsmToken> &Tokens) {
2390   auto Loc = getLoc();
2391   Reg = AMDGPU::NoRegister;
2392 
2393   if (isToken(AsmToken::Identifier)) {
2394     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2395     if (Reg == AMDGPU::NoRegister)
2396       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2397   } else {
2398     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2399   }
2400 
2401   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2402   if (Reg == AMDGPU::NoRegister) {
2403     assert(Parser.hasPendingError());
2404     return false;
2405   }
2406 
2407   if (!subtargetHasRegister(*TRI, Reg)) {
2408     if (Reg == AMDGPU::SGPR_NULL) {
2409       Error(Loc, "'null' operand is not supported on this GPU");
2410     } else {
2411       Error(Loc, "register not available on this GPU");
2412     }
2413     return false;
2414   }
2415 
2416   return true;
2417 }
2418 
2419 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2420                                           unsigned &RegNum, unsigned &RegWidth,
2421                                           bool RestoreOnFailure /*=false*/) {
2422   Reg = AMDGPU::NoRegister;
2423 
2424   SmallVector<AsmToken, 1> Tokens;
2425   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2426     if (RestoreOnFailure) {
2427       while (!Tokens.empty()) {
2428         getLexer().UnLex(Tokens.pop_back_val());
2429       }
2430     }
2431     return true;
2432   }
2433   return false;
2434 }
2435 
2436 Optional<StringRef>
2437 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2438   switch (RegKind) {
2439   case IS_VGPR:
2440     return StringRef(".amdgcn.next_free_vgpr");
2441   case IS_SGPR:
2442     return StringRef(".amdgcn.next_free_sgpr");
2443   default:
2444     return None;
2445   }
2446 }
2447 
2448 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2449   auto SymbolName = getGprCountSymbolName(RegKind);
2450   assert(SymbolName && "initializing invalid register kind");
2451   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2452   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2453 }
2454 
2455 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2456                                             unsigned DwordRegIndex,
2457                                             unsigned RegWidth) {
2458   // Symbols are only defined for GCN targets
2459   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2460     return true;
2461 
2462   auto SymbolName = getGprCountSymbolName(RegKind);
2463   if (!SymbolName)
2464     return true;
2465   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2466 
2467   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2468   int64_t OldCount;
2469 
2470   if (!Sym->isVariable())
2471     return !Error(getParser().getTok().getLoc(),
2472                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2473   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2474     return !Error(
2475         getParser().getTok().getLoc(),
2476         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2477 
2478   if (OldCount <= NewMax)
2479     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2480 
2481   return true;
2482 }
2483 
2484 std::unique_ptr<AMDGPUOperand>
2485 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2486   const auto &Tok = Parser.getTok();
2487   SMLoc StartLoc = Tok.getLoc();
2488   SMLoc EndLoc = Tok.getEndLoc();
2489   RegisterKind RegKind;
2490   unsigned Reg, RegNum, RegWidth;
2491 
2492   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2493     return nullptr;
2494   }
2495   if (isHsaAbiVersion3(&getSTI())) {
2496     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2497       return nullptr;
2498   } else
2499     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2500   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2501 }
2502 
2503 OperandMatchResultTy
2504 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2505   // TODO: add syntactic sugar for 1/(2*PI)
2506 
2507   assert(!isRegister());
2508   assert(!isModifier());
2509 
2510   const auto& Tok = getToken();
2511   const auto& NextTok = peekToken();
2512   bool IsReal = Tok.is(AsmToken::Real);
2513   SMLoc S = getLoc();
2514   bool Negate = false;
2515 
2516   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2517     lex();
2518     IsReal = true;
2519     Negate = true;
2520   }
2521 
2522   if (IsReal) {
2523     // Floating-point expressions are not supported.
2524     // Can only allow floating-point literals with an
2525     // optional sign.
2526 
2527     StringRef Num = getTokenStr();
2528     lex();
2529 
2530     APFloat RealVal(APFloat::IEEEdouble());
2531     auto roundMode = APFloat::rmNearestTiesToEven;
2532     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2533       return MatchOperand_ParseFail;
2534     }
2535     if (Negate)
2536       RealVal.changeSign();
2537 
2538     Operands.push_back(
2539       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2540                                AMDGPUOperand::ImmTyNone, true));
2541 
2542     return MatchOperand_Success;
2543 
2544   } else {
2545     int64_t IntVal;
2546     const MCExpr *Expr;
2547     SMLoc S = getLoc();
2548 
2549     if (HasSP3AbsModifier) {
2550       // This is a workaround for handling expressions
2551       // as arguments of SP3 'abs' modifier, for example:
2552       //     |1.0|
2553       //     |-1|
2554       //     |1+x|
2555       // This syntax is not compatible with syntax of standard
2556       // MC expressions (due to the trailing '|').
2557       SMLoc EndLoc;
2558       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2559         return MatchOperand_ParseFail;
2560     } else {
2561       if (Parser.parseExpression(Expr))
2562         return MatchOperand_ParseFail;
2563     }
2564 
2565     if (Expr->evaluateAsAbsolute(IntVal)) {
2566       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2567     } else {
2568       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2569     }
2570 
2571     return MatchOperand_Success;
2572   }
2573 
2574   return MatchOperand_NoMatch;
2575 }
2576 
2577 OperandMatchResultTy
2578 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2579   if (!isRegister())
2580     return MatchOperand_NoMatch;
2581 
2582   if (auto R = parseRegister()) {
2583     assert(R->isReg());
2584     Operands.push_back(std::move(R));
2585     return MatchOperand_Success;
2586   }
2587   return MatchOperand_ParseFail;
2588 }
2589 
2590 OperandMatchResultTy
2591 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2592   auto res = parseReg(Operands);
2593   if (res != MatchOperand_NoMatch) {
2594     return res;
2595   } else if (isModifier()) {
2596     return MatchOperand_NoMatch;
2597   } else {
2598     return parseImm(Operands, HasSP3AbsMod);
2599   }
2600 }
2601 
2602 bool
2603 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2604   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2605     const auto &str = Token.getString();
2606     return str == "abs" || str == "neg" || str == "sext";
2607   }
2608   return false;
2609 }
2610 
2611 bool
2612 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2613   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2614 }
2615 
2616 bool
2617 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2618   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2619 }
2620 
2621 bool
2622 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2623   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2624 }
2625 
2626 // Check if this is an operand modifier or an opcode modifier
2627 // which may look like an expression but it is not. We should
2628 // avoid parsing these modifiers as expressions. Currently
2629 // recognized sequences are:
2630 //   |...|
2631 //   abs(...)
2632 //   neg(...)
2633 //   sext(...)
2634 //   -reg
2635 //   -|...|
2636 //   -abs(...)
2637 //   name:...
2638 // Note that simple opcode modifiers like 'gds' may be parsed as
2639 // expressions; this is a special case. See getExpressionAsToken.
2640 //
2641 bool
2642 AMDGPUAsmParser::isModifier() {
2643 
2644   AsmToken Tok = getToken();
2645   AsmToken NextToken[2];
2646   peekTokens(NextToken);
2647 
2648   return isOperandModifier(Tok, NextToken[0]) ||
2649          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2650          isOpcodeModifierWithVal(Tok, NextToken[0]);
2651 }
2652 
2653 // Check if the current token is an SP3 'neg' modifier.
2654 // Currently this modifier is allowed in the following context:
2655 //
2656 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2657 // 2. Before an 'abs' modifier: -abs(...)
2658 // 3. Before an SP3 'abs' modifier: -|...|
2659 //
2660 // In all other cases "-" is handled as a part
2661 // of an expression that follows the sign.
2662 //
2663 // Note: When "-" is followed by an integer literal,
2664 // this is interpreted as integer negation rather
2665 // than a floating-point NEG modifier applied to N.
2666 // Beside being contr-intuitive, such use of floating-point
2667 // NEG modifier would have resulted in different meaning
2668 // of integer literals used with VOP1/2/C and VOP3,
2669 // for example:
2670 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2671 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2672 // Negative fp literals with preceding "-" are
2673 // handled likewise for unifomtity
2674 //
2675 bool
2676 AMDGPUAsmParser::parseSP3NegModifier() {
2677 
2678   AsmToken NextToken[2];
2679   peekTokens(NextToken);
2680 
2681   if (isToken(AsmToken::Minus) &&
2682       (isRegister(NextToken[0], NextToken[1]) ||
2683        NextToken[0].is(AsmToken::Pipe) ||
2684        isId(NextToken[0], "abs"))) {
2685     lex();
2686     return true;
2687   }
2688 
2689   return false;
2690 }
2691 
2692 OperandMatchResultTy
2693 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2694                                               bool AllowImm) {
2695   bool Neg, SP3Neg;
2696   bool Abs, SP3Abs;
2697   SMLoc Loc;
2698 
2699   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2700   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2701     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2702     return MatchOperand_ParseFail;
2703   }
2704 
2705   SP3Neg = parseSP3NegModifier();
2706 
2707   Loc = getLoc();
2708   Neg = trySkipId("neg");
2709   if (Neg && SP3Neg) {
2710     Error(Loc, "expected register or immediate");
2711     return MatchOperand_ParseFail;
2712   }
2713   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2714     return MatchOperand_ParseFail;
2715 
2716   Abs = trySkipId("abs");
2717   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2718     return MatchOperand_ParseFail;
2719 
2720   Loc = getLoc();
2721   SP3Abs = trySkipToken(AsmToken::Pipe);
2722   if (Abs && SP3Abs) {
2723     Error(Loc, "expected register or immediate");
2724     return MatchOperand_ParseFail;
2725   }
2726 
2727   OperandMatchResultTy Res;
2728   if (AllowImm) {
2729     Res = parseRegOrImm(Operands, SP3Abs);
2730   } else {
2731     Res = parseReg(Operands);
2732   }
2733   if (Res != MatchOperand_Success) {
2734     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2735   }
2736 
2737   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2738     return MatchOperand_ParseFail;
2739   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2740     return MatchOperand_ParseFail;
2741   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2742     return MatchOperand_ParseFail;
2743 
2744   AMDGPUOperand::Modifiers Mods;
2745   Mods.Abs = Abs || SP3Abs;
2746   Mods.Neg = Neg || SP3Neg;
2747 
2748   if (Mods.hasFPModifiers()) {
2749     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2750     if (Op.isExpr()) {
2751       Error(Op.getStartLoc(), "expected an absolute expression");
2752       return MatchOperand_ParseFail;
2753     }
2754     Op.setModifiers(Mods);
2755   }
2756   return MatchOperand_Success;
2757 }
2758 
2759 OperandMatchResultTy
2760 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2761                                                bool AllowImm) {
2762   bool Sext = trySkipId("sext");
2763   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2764     return MatchOperand_ParseFail;
2765 
2766   OperandMatchResultTy Res;
2767   if (AllowImm) {
2768     Res = parseRegOrImm(Operands);
2769   } else {
2770     Res = parseReg(Operands);
2771   }
2772   if (Res != MatchOperand_Success) {
2773     return Sext? MatchOperand_ParseFail : Res;
2774   }
2775 
2776   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2777     return MatchOperand_ParseFail;
2778 
2779   AMDGPUOperand::Modifiers Mods;
2780   Mods.Sext = Sext;
2781 
2782   if (Mods.hasIntModifiers()) {
2783     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2784     if (Op.isExpr()) {
2785       Error(Op.getStartLoc(), "expected an absolute expression");
2786       return MatchOperand_ParseFail;
2787     }
2788     Op.setModifiers(Mods);
2789   }
2790 
2791   return MatchOperand_Success;
2792 }
2793 
2794 OperandMatchResultTy
2795 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2796   return parseRegOrImmWithFPInputMods(Operands, false);
2797 }
2798 
2799 OperandMatchResultTy
2800 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2801   return parseRegOrImmWithIntInputMods(Operands, false);
2802 }
2803 
2804 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2805   auto Loc = getLoc();
2806   if (trySkipId("off")) {
2807     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2808                                                 AMDGPUOperand::ImmTyOff, false));
2809     return MatchOperand_Success;
2810   }
2811 
2812   if (!isRegister())
2813     return MatchOperand_NoMatch;
2814 
2815   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2816   if (Reg) {
2817     Operands.push_back(std::move(Reg));
2818     return MatchOperand_Success;
2819   }
2820 
2821   return MatchOperand_ParseFail;
2822 
2823 }
2824 
2825 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2826   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2827 
2828   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2829       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2830       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2831       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2832     return Match_InvalidOperand;
2833 
2834   if ((TSFlags & SIInstrFlags::VOP3) &&
2835       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2836       getForcedEncodingSize() != 64)
2837     return Match_PreferE32;
2838 
2839   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2840       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2841     // v_mac_f32/16 allow only dst_sel == DWORD;
2842     auto OpNum =
2843         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2844     const auto &Op = Inst.getOperand(OpNum);
2845     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2846       return Match_InvalidOperand;
2847     }
2848   }
2849 
2850   return Match_Success;
2851 }
2852 
2853 static ArrayRef<unsigned> getAllVariants() {
2854   static const unsigned Variants[] = {
2855     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2856     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2857   };
2858 
2859   return makeArrayRef(Variants);
2860 }
2861 
2862 // What asm variants we should check
2863 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2864   if (getForcedEncodingSize() == 32) {
2865     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2866     return makeArrayRef(Variants);
2867   }
2868 
2869   if (isForcedVOP3()) {
2870     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2871     return makeArrayRef(Variants);
2872   }
2873 
2874   if (isForcedSDWA()) {
2875     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2876                                         AMDGPUAsmVariants::SDWA9};
2877     return makeArrayRef(Variants);
2878   }
2879 
2880   if (isForcedDPP()) {
2881     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2882     return makeArrayRef(Variants);
2883   }
2884 
2885   return getAllVariants();
2886 }
2887 
2888 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
2889   if (getForcedEncodingSize() == 32)
2890     return "e32";
2891 
2892   if (isForcedVOP3())
2893     return "e64";
2894 
2895   if (isForcedSDWA())
2896     return "sdwa";
2897 
2898   if (isForcedDPP())
2899     return "dpp";
2900 
2901   return "";
2902 }
2903 
2904 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2905   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2906   const unsigned Num = Desc.getNumImplicitUses();
2907   for (unsigned i = 0; i < Num; ++i) {
2908     unsigned Reg = Desc.ImplicitUses[i];
2909     switch (Reg) {
2910     case AMDGPU::FLAT_SCR:
2911     case AMDGPU::VCC:
2912     case AMDGPU::VCC_LO:
2913     case AMDGPU::VCC_HI:
2914     case AMDGPU::M0:
2915       return Reg;
2916     default:
2917       break;
2918     }
2919   }
2920   return AMDGPU::NoRegister;
2921 }
2922 
2923 // NB: This code is correct only when used to check constant
2924 // bus limitations because GFX7 support no f16 inline constants.
2925 // Note that there are no cases when a GFX7 opcode violates
2926 // constant bus limitations due to the use of an f16 constant.
2927 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2928                                        unsigned OpIdx) const {
2929   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2930 
2931   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2932     return false;
2933   }
2934 
2935   const MCOperand &MO = Inst.getOperand(OpIdx);
2936 
2937   int64_t Val = MO.getImm();
2938   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2939 
2940   switch (OpSize) { // expected operand size
2941   case 8:
2942     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2943   case 4:
2944     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2945   case 2: {
2946     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2947     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
2948         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
2949         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
2950       return AMDGPU::isInlinableIntLiteral(Val);
2951 
2952     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2953         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2954         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
2955       return AMDGPU::isInlinableIntLiteralV216(Val);
2956 
2957     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2958         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2959         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
2960       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2961 
2962     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2963   }
2964   default:
2965     llvm_unreachable("invalid operand size");
2966   }
2967 }
2968 
2969 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2970   if (!isGFX10())
2971     return 1;
2972 
2973   switch (Opcode) {
2974   // 64-bit shift instructions can use only one scalar value input
2975   case AMDGPU::V_LSHLREV_B64:
2976   case AMDGPU::V_LSHLREV_B64_gfx10:
2977   case AMDGPU::V_LSHL_B64:
2978   case AMDGPU::V_LSHRREV_B64:
2979   case AMDGPU::V_LSHRREV_B64_gfx10:
2980   case AMDGPU::V_LSHR_B64:
2981   case AMDGPU::V_ASHRREV_I64:
2982   case AMDGPU::V_ASHRREV_I64_gfx10:
2983   case AMDGPU::V_ASHR_I64:
2984     return 1;
2985   default:
2986     return 2;
2987   }
2988 }
2989 
2990 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2991   const MCOperand &MO = Inst.getOperand(OpIdx);
2992   if (MO.isImm()) {
2993     return !isInlineConstant(Inst, OpIdx);
2994   } else if (MO.isReg()) {
2995     auto Reg = MO.getReg();
2996     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2997     auto PReg = mc2PseudoReg(Reg);
2998     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
2999   } else {
3000     return true;
3001   }
3002 }
3003 
3004 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
3005   const unsigned Opcode = Inst.getOpcode();
3006   const MCInstrDesc &Desc = MII.get(Opcode);
3007   unsigned ConstantBusUseCount = 0;
3008   unsigned NumLiterals = 0;
3009   unsigned LiteralSize;
3010 
3011   if (Desc.TSFlags &
3012       (SIInstrFlags::VOPC |
3013        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3014        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3015        SIInstrFlags::SDWA)) {
3016     // Check special imm operands (used by madmk, etc)
3017     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3018       ++ConstantBusUseCount;
3019     }
3020 
3021     SmallDenseSet<unsigned> SGPRsUsed;
3022     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3023     if (SGPRUsed != AMDGPU::NoRegister) {
3024       SGPRsUsed.insert(SGPRUsed);
3025       ++ConstantBusUseCount;
3026     }
3027 
3028     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3029     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3030     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3031 
3032     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3033 
3034     for (int OpIdx : OpIndices) {
3035       if (OpIdx == -1) break;
3036 
3037       const MCOperand &MO = Inst.getOperand(OpIdx);
3038       if (usesConstantBus(Inst, OpIdx)) {
3039         if (MO.isReg()) {
3040           const unsigned Reg = mc2PseudoReg(MO.getReg());
3041           // Pairs of registers with a partial intersections like these
3042           //   s0, s[0:1]
3043           //   flat_scratch_lo, flat_scratch
3044           //   flat_scratch_lo, flat_scratch_hi
3045           // are theoretically valid but they are disabled anyway.
3046           // Note that this code mimics SIInstrInfo::verifyInstruction
3047           if (!SGPRsUsed.count(Reg)) {
3048             SGPRsUsed.insert(Reg);
3049             ++ConstantBusUseCount;
3050           }
3051         } else { // Expression or a literal
3052 
3053           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3054             continue; // special operand like VINTERP attr_chan
3055 
3056           // An instruction may use only one literal.
3057           // This has been validated on the previous step.
3058           // See validateVOP3Literal.
3059           // This literal may be used as more than one operand.
3060           // If all these operands are of the same size,
3061           // this literal counts as one scalar value.
3062           // Otherwise it counts as 2 scalar values.
3063           // See "GFX10 Shader Programming", section 3.6.2.3.
3064 
3065           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3066           if (Size < 4) Size = 4;
3067 
3068           if (NumLiterals == 0) {
3069             NumLiterals = 1;
3070             LiteralSize = Size;
3071           } else if (LiteralSize != Size) {
3072             NumLiterals = 2;
3073           }
3074         }
3075       }
3076     }
3077   }
3078   ConstantBusUseCount += NumLiterals;
3079 
3080   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
3081 }
3082 
3083 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
3084   const unsigned Opcode = Inst.getOpcode();
3085   const MCInstrDesc &Desc = MII.get(Opcode);
3086 
3087   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3088   if (DstIdx == -1 ||
3089       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3090     return true;
3091   }
3092 
3093   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3094 
3095   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3096   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3097   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3098 
3099   assert(DstIdx != -1);
3100   const MCOperand &Dst = Inst.getOperand(DstIdx);
3101   assert(Dst.isReg());
3102   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3103 
3104   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3105 
3106   for (int SrcIdx : SrcIndices) {
3107     if (SrcIdx == -1) break;
3108     const MCOperand &Src = Inst.getOperand(SrcIdx);
3109     if (Src.isReg()) {
3110       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3111       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3112         return false;
3113       }
3114     }
3115   }
3116 
3117   return true;
3118 }
3119 
3120 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3121 
3122   const unsigned Opc = Inst.getOpcode();
3123   const MCInstrDesc &Desc = MII.get(Opc);
3124 
3125   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3126     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3127     assert(ClampIdx != -1);
3128     return Inst.getOperand(ClampIdx).getImm() == 0;
3129   }
3130 
3131   return true;
3132 }
3133 
3134 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3135 
3136   const unsigned Opc = Inst.getOpcode();
3137   const MCInstrDesc &Desc = MII.get(Opc);
3138 
3139   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3140     return true;
3141 
3142   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3143   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3144   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3145 
3146   assert(VDataIdx != -1);
3147 
3148   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3149     return true;
3150 
3151   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3152   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3153   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3154   if (DMask == 0)
3155     DMask = 1;
3156 
3157   unsigned DataSize =
3158     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3159   if (hasPackedD16()) {
3160     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3161     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3162       DataSize = (DataSize + 1) / 2;
3163   }
3164 
3165   return (VDataSize / 4) == DataSize + TFESize;
3166 }
3167 
3168 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3169   const unsigned Opc = Inst.getOpcode();
3170   const MCInstrDesc &Desc = MII.get(Opc);
3171 
3172   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
3173     return true;
3174 
3175   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3176 
3177   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3178       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3179   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3180   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3181   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3182 
3183   assert(VAddr0Idx != -1);
3184   assert(SrsrcIdx != -1);
3185   assert(SrsrcIdx > VAddr0Idx);
3186 
3187   if (DimIdx == -1)
3188     return true; // intersect_ray
3189 
3190   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3191   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3192   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3193   unsigned VAddrSize =
3194       IsNSA ? SrsrcIdx - VAddr0Idx
3195             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3196 
3197   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3198                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3199                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3200                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3201   if (!IsNSA) {
3202     if (AddrSize > 8)
3203       AddrSize = 16;
3204     else if (AddrSize > 4)
3205       AddrSize = 8;
3206   }
3207 
3208   return VAddrSize == AddrSize;
3209 }
3210 
3211 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3212 
3213   const unsigned Opc = Inst.getOpcode();
3214   const MCInstrDesc &Desc = MII.get(Opc);
3215 
3216   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3217     return true;
3218   if (!Desc.mayLoad() || !Desc.mayStore())
3219     return true; // Not atomic
3220 
3221   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3222   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3223 
3224   // This is an incomplete check because image_atomic_cmpswap
3225   // may only use 0x3 and 0xf while other atomic operations
3226   // may use 0x1 and 0x3. However these limitations are
3227   // verified when we check that dmask matches dst size.
3228   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3229 }
3230 
3231 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3232 
3233   const unsigned Opc = Inst.getOpcode();
3234   const MCInstrDesc &Desc = MII.get(Opc);
3235 
3236   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3237     return true;
3238 
3239   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3240   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3241 
3242   // GATHER4 instructions use dmask in a different fashion compared to
3243   // other MIMG instructions. The only useful DMASK values are
3244   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3245   // (red,red,red,red) etc.) The ISA document doesn't mention
3246   // this.
3247   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3248 }
3249 
3250 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3251 {
3252   switch (Opcode) {
3253   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3254   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3255   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3256     return true;
3257   default:
3258     return false;
3259   }
3260 }
3261 
3262 // movrels* opcodes should only allow VGPRS as src0.
3263 // This is specified in .td description for vop1/vop3,
3264 // but sdwa is handled differently. See isSDWAOperand.
3265 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3266 
3267   const unsigned Opc = Inst.getOpcode();
3268   const MCInstrDesc &Desc = MII.get(Opc);
3269 
3270   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3271     return true;
3272 
3273   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3274   assert(Src0Idx != -1);
3275 
3276   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3277   if (!Src0.isReg())
3278     return false;
3279 
3280   auto Reg = Src0.getReg();
3281   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3282   return !isSGPR(mc2PseudoReg(Reg), TRI);
3283 }
3284 
3285 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) {
3286 
3287   const unsigned Opc = Inst.getOpcode();
3288 
3289   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3290     return true;
3291 
3292   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3293   assert(Src0Idx != -1);
3294 
3295   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3296   if (!Src0.isReg())
3297     return true;
3298 
3299   auto Reg = Src0.getReg();
3300   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3301   if (isSGPR(mc2PseudoReg(Reg), TRI)) {
3302     Error(getLoc(), "source operand must be either a VGPR or an inline constant");
3303     return false;
3304   }
3305 
3306   return true;
3307 }
3308 
3309 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3310   switch (Inst.getOpcode()) {
3311   default:
3312     return true;
3313   case V_DIV_SCALE_F32_gfx6_gfx7:
3314   case V_DIV_SCALE_F32_vi:
3315   case V_DIV_SCALE_F32_gfx10:
3316   case V_DIV_SCALE_F64_gfx6_gfx7:
3317   case V_DIV_SCALE_F64_vi:
3318   case V_DIV_SCALE_F64_gfx10:
3319     break;
3320   }
3321 
3322   // TODO: Check that src0 = src1 or src2.
3323 
3324   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3325                     AMDGPU::OpName::src2_modifiers,
3326                     AMDGPU::OpName::src2_modifiers}) {
3327     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3328             .getImm() &
3329         SISrcMods::ABS) {
3330       Error(getLoc(), "ABS not allowed in VOP3B instructions");
3331       return false;
3332     }
3333   }
3334 
3335   return true;
3336 }
3337 
3338 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3339 
3340   const unsigned Opc = Inst.getOpcode();
3341   const MCInstrDesc &Desc = MII.get(Opc);
3342 
3343   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3344     return true;
3345 
3346   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3347   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3348     if (isCI() || isSI())
3349       return false;
3350   }
3351 
3352   return true;
3353 }
3354 
3355 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3356   const unsigned Opc = Inst.getOpcode();
3357   const MCInstrDesc &Desc = MII.get(Opc);
3358 
3359   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3360     return true;
3361 
3362   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3363   if (DimIdx < 0)
3364     return true;
3365 
3366   long Imm = Inst.getOperand(DimIdx).getImm();
3367   if (Imm < 0 || Imm >= 8)
3368     return false;
3369 
3370   return true;
3371 }
3372 
3373 static bool IsRevOpcode(const unsigned Opcode)
3374 {
3375   switch (Opcode) {
3376   case AMDGPU::V_SUBREV_F32_e32:
3377   case AMDGPU::V_SUBREV_F32_e64:
3378   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3379   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3380   case AMDGPU::V_SUBREV_F32_e32_vi:
3381   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3382   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3383   case AMDGPU::V_SUBREV_F32_e64_vi:
3384 
3385   case AMDGPU::V_SUBREV_CO_U32_e32:
3386   case AMDGPU::V_SUBREV_CO_U32_e64:
3387   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3388   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3389 
3390   case AMDGPU::V_SUBBREV_U32_e32:
3391   case AMDGPU::V_SUBBREV_U32_e64:
3392   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3393   case AMDGPU::V_SUBBREV_U32_e32_vi:
3394   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3395   case AMDGPU::V_SUBBREV_U32_e64_vi:
3396 
3397   case AMDGPU::V_SUBREV_U32_e32:
3398   case AMDGPU::V_SUBREV_U32_e64:
3399   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3400   case AMDGPU::V_SUBREV_U32_e32_vi:
3401   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3402   case AMDGPU::V_SUBREV_U32_e64_vi:
3403 
3404   case AMDGPU::V_SUBREV_F16_e32:
3405   case AMDGPU::V_SUBREV_F16_e64:
3406   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3407   case AMDGPU::V_SUBREV_F16_e32_vi:
3408   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3409   case AMDGPU::V_SUBREV_F16_e64_vi:
3410 
3411   case AMDGPU::V_SUBREV_U16_e32:
3412   case AMDGPU::V_SUBREV_U16_e64:
3413   case AMDGPU::V_SUBREV_U16_e32_vi:
3414   case AMDGPU::V_SUBREV_U16_e64_vi:
3415 
3416   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3417   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3418   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3419 
3420   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3421   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3422 
3423   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3424   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3425 
3426   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3427   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3428 
3429   case AMDGPU::V_LSHRREV_B32_e32:
3430   case AMDGPU::V_LSHRREV_B32_e64:
3431   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3432   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3433   case AMDGPU::V_LSHRREV_B32_e32_vi:
3434   case AMDGPU::V_LSHRREV_B32_e64_vi:
3435   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3436   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3437 
3438   case AMDGPU::V_ASHRREV_I32_e32:
3439   case AMDGPU::V_ASHRREV_I32_e64:
3440   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3441   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3442   case AMDGPU::V_ASHRREV_I32_e32_vi:
3443   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3444   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3445   case AMDGPU::V_ASHRREV_I32_e64_vi:
3446 
3447   case AMDGPU::V_LSHLREV_B32_e32:
3448   case AMDGPU::V_LSHLREV_B32_e64:
3449   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3450   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3451   case AMDGPU::V_LSHLREV_B32_e32_vi:
3452   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3453   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3454   case AMDGPU::V_LSHLREV_B32_e64_vi:
3455 
3456   case AMDGPU::V_LSHLREV_B16_e32:
3457   case AMDGPU::V_LSHLREV_B16_e64:
3458   case AMDGPU::V_LSHLREV_B16_e32_vi:
3459   case AMDGPU::V_LSHLREV_B16_e64_vi:
3460   case AMDGPU::V_LSHLREV_B16_gfx10:
3461 
3462   case AMDGPU::V_LSHRREV_B16_e32:
3463   case AMDGPU::V_LSHRREV_B16_e64:
3464   case AMDGPU::V_LSHRREV_B16_e32_vi:
3465   case AMDGPU::V_LSHRREV_B16_e64_vi:
3466   case AMDGPU::V_LSHRREV_B16_gfx10:
3467 
3468   case AMDGPU::V_ASHRREV_I16_e32:
3469   case AMDGPU::V_ASHRREV_I16_e64:
3470   case AMDGPU::V_ASHRREV_I16_e32_vi:
3471   case AMDGPU::V_ASHRREV_I16_e64_vi:
3472   case AMDGPU::V_ASHRREV_I16_gfx10:
3473 
3474   case AMDGPU::V_LSHLREV_B64:
3475   case AMDGPU::V_LSHLREV_B64_gfx10:
3476   case AMDGPU::V_LSHLREV_B64_vi:
3477 
3478   case AMDGPU::V_LSHRREV_B64:
3479   case AMDGPU::V_LSHRREV_B64_gfx10:
3480   case AMDGPU::V_LSHRREV_B64_vi:
3481 
3482   case AMDGPU::V_ASHRREV_I64:
3483   case AMDGPU::V_ASHRREV_I64_gfx10:
3484   case AMDGPU::V_ASHRREV_I64_vi:
3485 
3486   case AMDGPU::V_PK_LSHLREV_B16:
3487   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3488   case AMDGPU::V_PK_LSHLREV_B16_vi:
3489 
3490   case AMDGPU::V_PK_LSHRREV_B16:
3491   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3492   case AMDGPU::V_PK_LSHRREV_B16_vi:
3493   case AMDGPU::V_PK_ASHRREV_I16:
3494   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3495   case AMDGPU::V_PK_ASHRREV_I16_vi:
3496     return true;
3497   default:
3498     return false;
3499   }
3500 }
3501 
3502 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3503 
3504   using namespace SIInstrFlags;
3505   const unsigned Opcode = Inst.getOpcode();
3506   const MCInstrDesc &Desc = MII.get(Opcode);
3507 
3508   // lds_direct register is defined so that it can be used
3509   // with 9-bit operands only. Ignore encodings which do not accept these.
3510   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3511     return true;
3512 
3513   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3514   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3515   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3516 
3517   const int SrcIndices[] = { Src1Idx, Src2Idx };
3518 
3519   // lds_direct cannot be specified as either src1 or src2.
3520   for (int SrcIdx : SrcIndices) {
3521     if (SrcIdx == -1) break;
3522     const MCOperand &Src = Inst.getOperand(SrcIdx);
3523     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3524       return false;
3525     }
3526   }
3527 
3528   if (Src0Idx == -1)
3529     return true;
3530 
3531   const MCOperand &Src = Inst.getOperand(Src0Idx);
3532   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3533     return true;
3534 
3535   // lds_direct is specified as src0. Check additional limitations.
3536   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3537 }
3538 
3539 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3540   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3541     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3542     if (Op.isFlatOffset())
3543       return Op.getStartLoc();
3544   }
3545   return getLoc();
3546 }
3547 
3548 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3549                                          const OperandVector &Operands) {
3550   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3551   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3552     return true;
3553 
3554   auto Opcode = Inst.getOpcode();
3555   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3556   assert(OpNum != -1);
3557 
3558   const auto &Op = Inst.getOperand(OpNum);
3559   if (!hasFlatOffsets() && Op.getImm() != 0) {
3560     Error(getFlatOffsetLoc(Operands),
3561           "flat offset modifier is not supported on this GPU");
3562     return false;
3563   }
3564 
3565   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3566   // For FLAT segment the offset must be positive;
3567   // MSB is ignored and forced to zero.
3568   unsigned OffsetSize = isGFX9() ? 13 : 12;
3569   if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) {
3570     if (!isIntN(OffsetSize, Op.getImm())) {
3571       Error(getFlatOffsetLoc(Operands),
3572             isGFX9() ? "expected a 13-bit signed offset" :
3573                        "expected a 12-bit signed offset");
3574       return false;
3575     }
3576   } else {
3577     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3578       Error(getFlatOffsetLoc(Operands),
3579             isGFX9() ? "expected a 12-bit unsigned offset" :
3580                        "expected an 11-bit unsigned offset");
3581       return false;
3582     }
3583   }
3584 
3585   return true;
3586 }
3587 
3588 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3589   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3590     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3591     if (Op.isSMEMOffset())
3592       return Op.getStartLoc();
3593   }
3594   return getLoc();
3595 }
3596 
3597 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3598                                          const OperandVector &Operands) {
3599   if (isCI() || isSI())
3600     return true;
3601 
3602   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3603   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3604     return true;
3605 
3606   auto Opcode = Inst.getOpcode();
3607   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3608   if (OpNum == -1)
3609     return true;
3610 
3611   const auto &Op = Inst.getOperand(OpNum);
3612   if (!Op.isImm())
3613     return true;
3614 
3615   uint64_t Offset = Op.getImm();
3616   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3617   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3618       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3619     return true;
3620 
3621   Error(getSMEMOffsetLoc(Operands),
3622         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3623                                "expected a 21-bit signed offset");
3624 
3625   return false;
3626 }
3627 
3628 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3629   unsigned Opcode = Inst.getOpcode();
3630   const MCInstrDesc &Desc = MII.get(Opcode);
3631   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3632     return true;
3633 
3634   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3635   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3636 
3637   const int OpIndices[] = { Src0Idx, Src1Idx };
3638 
3639   unsigned NumExprs = 0;
3640   unsigned NumLiterals = 0;
3641   uint32_t LiteralValue;
3642 
3643   for (int OpIdx : OpIndices) {
3644     if (OpIdx == -1) break;
3645 
3646     const MCOperand &MO = Inst.getOperand(OpIdx);
3647     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3648     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3649       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3650         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3651         if (NumLiterals == 0 || LiteralValue != Value) {
3652           LiteralValue = Value;
3653           ++NumLiterals;
3654         }
3655       } else if (MO.isExpr()) {
3656         ++NumExprs;
3657       }
3658     }
3659   }
3660 
3661   return NumLiterals + NumExprs <= 1;
3662 }
3663 
3664 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3665   const unsigned Opc = Inst.getOpcode();
3666   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3667       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3668     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3669     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3670 
3671     if (OpSel & ~3)
3672       return false;
3673   }
3674   return true;
3675 }
3676 
3677 // Check if VCC register matches wavefront size
3678 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3679   auto FB = getFeatureBits();
3680   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3681     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3682 }
3683 
3684 // VOP3 literal is only allowed in GFX10+ and only one can be used
3685 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3686   unsigned Opcode = Inst.getOpcode();
3687   const MCInstrDesc &Desc = MII.get(Opcode);
3688   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3689     return true;
3690 
3691   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3692   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3693   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3694 
3695   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3696 
3697   unsigned NumExprs = 0;
3698   unsigned NumLiterals = 0;
3699   uint32_t LiteralValue;
3700 
3701   for (int OpIdx : OpIndices) {
3702     if (OpIdx == -1) break;
3703 
3704     const MCOperand &MO = Inst.getOperand(OpIdx);
3705     if (!MO.isImm() && !MO.isExpr())
3706       continue;
3707     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3708       continue;
3709 
3710     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3711         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3712       return false;
3713 
3714     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3715       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3716       if (NumLiterals == 0 || LiteralValue != Value) {
3717         LiteralValue = Value;
3718         ++NumLiterals;
3719       }
3720     } else if (MO.isExpr()) {
3721       ++NumExprs;
3722     }
3723   }
3724   NumLiterals += NumExprs;
3725 
3726   return !NumLiterals ||
3727          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3728 }
3729 
3730 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3731                                           const SMLoc &IDLoc,
3732                                           const OperandVector &Operands) {
3733   if (!validateLdsDirect(Inst)) {
3734     Error(IDLoc,
3735       "invalid use of lds_direct");
3736     return false;
3737   }
3738   if (!validateSOPLiteral(Inst)) {
3739     Error(IDLoc,
3740       "only one literal operand is allowed");
3741     return false;
3742   }
3743   if (!validateVOP3Literal(Inst)) {
3744     Error(IDLoc,
3745       "invalid literal operand");
3746     return false;
3747   }
3748   if (!validateConstantBusLimitations(Inst)) {
3749     Error(IDLoc,
3750       "invalid operand (violates constant bus restrictions)");
3751     return false;
3752   }
3753   if (!validateEarlyClobberLimitations(Inst)) {
3754     Error(IDLoc,
3755       "destination must be different than all sources");
3756     return false;
3757   }
3758   if (!validateIntClampSupported(Inst)) {
3759     Error(IDLoc,
3760       "integer clamping is not supported on this GPU");
3761     return false;
3762   }
3763   if (!validateOpSel(Inst)) {
3764     Error(IDLoc,
3765       "invalid op_sel operand");
3766     return false;
3767   }
3768   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3769   if (!validateMIMGD16(Inst)) {
3770     Error(IDLoc,
3771       "d16 modifier is not supported on this GPU");
3772     return false;
3773   }
3774   if (!validateMIMGDim(Inst)) {
3775     Error(IDLoc, "dim modifier is required on this GPU");
3776     return false;
3777   }
3778   if (!validateMIMGDataSize(Inst)) {
3779     Error(IDLoc,
3780       "image data size does not match dmask and tfe");
3781     return false;
3782   }
3783   if (!validateMIMGAddrSize(Inst)) {
3784     Error(IDLoc,
3785       "image address size does not match dim and a16");
3786     return false;
3787   }
3788   if (!validateMIMGAtomicDMask(Inst)) {
3789     Error(IDLoc,
3790       "invalid atomic image dmask");
3791     return false;
3792   }
3793   if (!validateMIMGGatherDMask(Inst)) {
3794     Error(IDLoc,
3795       "invalid image_gather dmask: only one bit must be set");
3796     return false;
3797   }
3798   if (!validateMovrels(Inst)) {
3799     Error(IDLoc, "source operand must be a VGPR");
3800     return false;
3801   }
3802   if (!validateFlatOffset(Inst, Operands)) {
3803     return false;
3804   }
3805   if (!validateSMEMOffset(Inst, Operands)) {
3806     return false;
3807   }
3808   if (!validateMAIAccWrite(Inst)) {
3809     return false;
3810   }
3811   if (!validateDivScale(Inst)) {
3812     return false;
3813   }
3814 
3815   return true;
3816 }
3817 
3818 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3819                                             const FeatureBitset &FBS,
3820                                             unsigned VariantID = 0);
3821 
3822 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
3823                                 const FeatureBitset &AvailableFeatures,
3824                                 unsigned VariantID);
3825 
3826 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3827                                        const FeatureBitset &FBS) {
3828   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
3829 }
3830 
3831 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3832                                        const FeatureBitset &FBS,
3833                                        ArrayRef<unsigned> Variants) {
3834   for (auto Variant : Variants) {
3835     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
3836       return true;
3837   }
3838 
3839   return false;
3840 }
3841 
3842 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
3843                                                   const SMLoc &IDLoc) {
3844   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3845 
3846   // Check if requested instruction variant is supported.
3847   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
3848     return false;
3849 
3850   // This instruction is not supported.
3851   // Clear any other pending errors because they are no longer relevant.
3852   getParser().clearPendingErrors();
3853 
3854   // Requested instruction variant is not supported.
3855   // Check if any other variants are supported.
3856   StringRef VariantName = getMatchedVariantName();
3857   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
3858     return Error(IDLoc,
3859                  Twine(VariantName,
3860                        " variant of this instruction is not supported"));
3861   }
3862 
3863   // Finally check if this instruction is supported on any other GPU.
3864   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
3865     return Error(IDLoc, "instruction not supported on this GPU");
3866   }
3867 
3868   // Instruction not supported on any GPU. Probably a typo.
3869   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
3870   return Error(IDLoc, "invalid instruction" + Suggestion);
3871 }
3872 
3873 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3874                                               OperandVector &Operands,
3875                                               MCStreamer &Out,
3876                                               uint64_t &ErrorInfo,
3877                                               bool MatchingInlineAsm) {
3878   MCInst Inst;
3879   unsigned Result = Match_Success;
3880   for (auto Variant : getMatchedVariants()) {
3881     uint64_t EI;
3882     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3883                                   Variant);
3884     // We order match statuses from least to most specific. We use most specific
3885     // status as resulting
3886     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3887     if ((R == Match_Success) ||
3888         (R == Match_PreferE32) ||
3889         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3890         (R == Match_InvalidOperand && Result != Match_MissingFeature
3891                                    && Result != Match_PreferE32) ||
3892         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3893                                    && Result != Match_MissingFeature
3894                                    && Result != Match_PreferE32)) {
3895       Result = R;
3896       ErrorInfo = EI;
3897     }
3898     if (R == Match_Success)
3899       break;
3900   }
3901 
3902   if (Result == Match_Success) {
3903     if (!validateInstruction(Inst, IDLoc, Operands)) {
3904       return true;
3905     }
3906     Inst.setLoc(IDLoc);
3907     Out.emitInstruction(Inst, getSTI());
3908     return false;
3909   }
3910 
3911   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
3912   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
3913     return true;
3914   }
3915 
3916   switch (Result) {
3917   default: break;
3918   case Match_MissingFeature:
3919     // It has been verified that the specified instruction
3920     // mnemonic is valid. A match was found but it requires
3921     // features which are not supported on this GPU.
3922     return Error(IDLoc, "operands are not valid for this GPU or mode");
3923 
3924   case Match_InvalidOperand: {
3925     SMLoc ErrorLoc = IDLoc;
3926     if (ErrorInfo != ~0ULL) {
3927       if (ErrorInfo >= Operands.size()) {
3928         return Error(IDLoc, "too few operands for instruction");
3929       }
3930       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3931       if (ErrorLoc == SMLoc())
3932         ErrorLoc = IDLoc;
3933     }
3934     return Error(ErrorLoc, "invalid operand for instruction");
3935   }
3936 
3937   case Match_PreferE32:
3938     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3939                         "should be encoded as e32");
3940   case Match_MnemonicFail:
3941     llvm_unreachable("Invalid instructions should have been handled already");
3942   }
3943   llvm_unreachable("Implement any new match types added!");
3944 }
3945 
3946 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3947   int64_t Tmp = -1;
3948   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3949     return true;
3950   }
3951   if (getParser().parseAbsoluteExpression(Tmp)) {
3952     return true;
3953   }
3954   Ret = static_cast<uint32_t>(Tmp);
3955   return false;
3956 }
3957 
3958 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3959                                                uint32_t &Minor) {
3960   if (ParseAsAbsoluteExpression(Major))
3961     return TokError("invalid major version");
3962 
3963   if (getLexer().isNot(AsmToken::Comma))
3964     return TokError("minor version number required, comma expected");
3965   Lex();
3966 
3967   if (ParseAsAbsoluteExpression(Minor))
3968     return TokError("invalid minor version");
3969 
3970   return false;
3971 }
3972 
3973 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3974   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3975     return TokError("directive only supported for amdgcn architecture");
3976 
3977   std::string Target;
3978 
3979   SMLoc TargetStart = getTok().getLoc();
3980   if (getParser().parseEscapedString(Target))
3981     return true;
3982   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3983 
3984   std::string ExpectedTarget;
3985   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3986   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3987 
3988   if (Target != ExpectedTargetOS.str())
3989     return getParser().Error(TargetRange.Start, "target must match options",
3990                              TargetRange);
3991 
3992   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3993   return false;
3994 }
3995 
3996 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3997   return getParser().Error(Range.Start, "value out of range", Range);
3998 }
3999 
4000 bool AMDGPUAsmParser::calculateGPRBlocks(
4001     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4002     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4003     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4004     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4005   // TODO(scott.linder): These calculations are duplicated from
4006   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4007   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4008 
4009   unsigned NumVGPRs = NextFreeVGPR;
4010   unsigned NumSGPRs = NextFreeSGPR;
4011 
4012   if (Version.Major >= 10)
4013     NumSGPRs = 0;
4014   else {
4015     unsigned MaxAddressableNumSGPRs =
4016         IsaInfo::getAddressableNumSGPRs(&getSTI());
4017 
4018     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4019         NumSGPRs > MaxAddressableNumSGPRs)
4020       return OutOfRangeError(SGPRRange);
4021 
4022     NumSGPRs +=
4023         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4024 
4025     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4026         NumSGPRs > MaxAddressableNumSGPRs)
4027       return OutOfRangeError(SGPRRange);
4028 
4029     if (Features.test(FeatureSGPRInitBug))
4030       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4031   }
4032 
4033   VGPRBlocks =
4034       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4035   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4036 
4037   return false;
4038 }
4039 
4040 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4041   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4042     return TokError("directive only supported for amdgcn architecture");
4043 
4044   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4045     return TokError("directive only supported for amdhsa OS");
4046 
4047   StringRef KernelName;
4048   if (getParser().parseIdentifier(KernelName))
4049     return true;
4050 
4051   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4052 
4053   StringSet<> Seen;
4054 
4055   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4056 
4057   SMRange VGPRRange;
4058   uint64_t NextFreeVGPR = 0;
4059   SMRange SGPRRange;
4060   uint64_t NextFreeSGPR = 0;
4061   unsigned UserSGPRCount = 0;
4062   bool ReserveVCC = true;
4063   bool ReserveFlatScr = true;
4064   bool ReserveXNACK = hasXNACK();
4065   Optional<bool> EnableWavefrontSize32;
4066 
4067   while (true) {
4068     while (getLexer().is(AsmToken::EndOfStatement))
4069       Lex();
4070 
4071     if (getLexer().isNot(AsmToken::Identifier))
4072       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
4073 
4074     StringRef ID = getTok().getIdentifier();
4075     SMRange IDRange = getTok().getLocRange();
4076     Lex();
4077 
4078     if (ID == ".end_amdhsa_kernel")
4079       break;
4080 
4081     if (Seen.find(ID) != Seen.end())
4082       return TokError(".amdhsa_ directives cannot be repeated");
4083     Seen.insert(ID);
4084 
4085     SMLoc ValStart = getTok().getLoc();
4086     int64_t IVal;
4087     if (getParser().parseAbsoluteExpression(IVal))
4088       return true;
4089     SMLoc ValEnd = getTok().getLoc();
4090     SMRange ValRange = SMRange(ValStart, ValEnd);
4091 
4092     if (IVal < 0)
4093       return OutOfRangeError(ValRange);
4094 
4095     uint64_t Val = IVal;
4096 
4097 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4098   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4099     return OutOfRangeError(RANGE);                                             \
4100   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4101 
4102     if (ID == ".amdhsa_group_segment_fixed_size") {
4103       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4104         return OutOfRangeError(ValRange);
4105       KD.group_segment_fixed_size = Val;
4106     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4107       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4108         return OutOfRangeError(ValRange);
4109       KD.private_segment_fixed_size = Val;
4110     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4111       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4112                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4113                        Val, ValRange);
4114       if (Val)
4115         UserSGPRCount += 4;
4116     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4117       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4118                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4119                        ValRange);
4120       if (Val)
4121         UserSGPRCount += 2;
4122     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4123       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4124                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4125                        ValRange);
4126       if (Val)
4127         UserSGPRCount += 2;
4128     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4129       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4130                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4131                        Val, ValRange);
4132       if (Val)
4133         UserSGPRCount += 2;
4134     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4135       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4136                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4137                        ValRange);
4138       if (Val)
4139         UserSGPRCount += 2;
4140     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4141       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4142                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4143                        ValRange);
4144       if (Val)
4145         UserSGPRCount += 2;
4146     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4147       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4148                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4149                        Val, ValRange);
4150       if (Val)
4151         UserSGPRCount += 1;
4152     } else if (ID == ".amdhsa_wavefront_size32") {
4153       if (IVersion.Major < 10)
4154         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4155                                  IDRange);
4156       EnableWavefrontSize32 = Val;
4157       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4158                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4159                        Val, ValRange);
4160     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4161       PARSE_BITS_ENTRY(
4162           KD.compute_pgm_rsrc2,
4163           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
4164           ValRange);
4165     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4166       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4167                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4168                        ValRange);
4169     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4170       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4171                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4172                        ValRange);
4173     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4174       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4175                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4176                        ValRange);
4177     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4178       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4179                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4180                        ValRange);
4181     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4182       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4183                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4184                        ValRange);
4185     } else if (ID == ".amdhsa_next_free_vgpr") {
4186       VGPRRange = ValRange;
4187       NextFreeVGPR = Val;
4188     } else if (ID == ".amdhsa_next_free_sgpr") {
4189       SGPRRange = ValRange;
4190       NextFreeSGPR = Val;
4191     } else if (ID == ".amdhsa_reserve_vcc") {
4192       if (!isUInt<1>(Val))
4193         return OutOfRangeError(ValRange);
4194       ReserveVCC = Val;
4195     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4196       if (IVersion.Major < 7)
4197         return getParser().Error(IDRange.Start, "directive requires gfx7+",
4198                                  IDRange);
4199       if (!isUInt<1>(Val))
4200         return OutOfRangeError(ValRange);
4201       ReserveFlatScr = Val;
4202     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4203       if (IVersion.Major < 8)
4204         return getParser().Error(IDRange.Start, "directive requires gfx8+",
4205                                  IDRange);
4206       if (!isUInt<1>(Val))
4207         return OutOfRangeError(ValRange);
4208       ReserveXNACK = Val;
4209     } else if (ID == ".amdhsa_float_round_mode_32") {
4210       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4211                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4212     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4213       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4214                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4215     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4216       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4217                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4218     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4219       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4220                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4221                        ValRange);
4222     } else if (ID == ".amdhsa_dx10_clamp") {
4223       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4224                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4225     } else if (ID == ".amdhsa_ieee_mode") {
4226       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4227                        Val, ValRange);
4228     } else if (ID == ".amdhsa_fp16_overflow") {
4229       if (IVersion.Major < 9)
4230         return getParser().Error(IDRange.Start, "directive requires gfx9+",
4231                                  IDRange);
4232       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4233                        ValRange);
4234     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4235       if (IVersion.Major < 10)
4236         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4237                                  IDRange);
4238       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4239                        ValRange);
4240     } else if (ID == ".amdhsa_memory_ordered") {
4241       if (IVersion.Major < 10)
4242         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4243                                  IDRange);
4244       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4245                        ValRange);
4246     } else if (ID == ".amdhsa_forward_progress") {
4247       if (IVersion.Major < 10)
4248         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4249                                  IDRange);
4250       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4251                        ValRange);
4252     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4253       PARSE_BITS_ENTRY(
4254           KD.compute_pgm_rsrc2,
4255           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4256           ValRange);
4257     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4258       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4259                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4260                        Val, ValRange);
4261     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4262       PARSE_BITS_ENTRY(
4263           KD.compute_pgm_rsrc2,
4264           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4265           ValRange);
4266     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4267       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4268                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4269                        Val, ValRange);
4270     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4271       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4272                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4273                        Val, ValRange);
4274     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4275       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4276                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4277                        Val, ValRange);
4278     } else if (ID == ".amdhsa_exception_int_div_zero") {
4279       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4280                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4281                        Val, ValRange);
4282     } else {
4283       return getParser().Error(IDRange.Start,
4284                                "unknown .amdhsa_kernel directive", IDRange);
4285     }
4286 
4287 #undef PARSE_BITS_ENTRY
4288   }
4289 
4290   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4291     return TokError(".amdhsa_next_free_vgpr directive is required");
4292 
4293   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4294     return TokError(".amdhsa_next_free_sgpr directive is required");
4295 
4296   unsigned VGPRBlocks;
4297   unsigned SGPRBlocks;
4298   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4299                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4300                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4301                          SGPRBlocks))
4302     return true;
4303 
4304   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4305           VGPRBlocks))
4306     return OutOfRangeError(VGPRRange);
4307   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4308                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4309 
4310   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4311           SGPRBlocks))
4312     return OutOfRangeError(SGPRRange);
4313   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4314                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4315                   SGPRBlocks);
4316 
4317   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4318     return TokError("too many user SGPRs enabled");
4319   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4320                   UserSGPRCount);
4321 
4322   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4323       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4324       ReserveFlatScr, ReserveXNACK);
4325   return false;
4326 }
4327 
4328 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4329   uint32_t Major;
4330   uint32_t Minor;
4331 
4332   if (ParseDirectiveMajorMinor(Major, Minor))
4333     return true;
4334 
4335   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4336   return false;
4337 }
4338 
4339 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4340   uint32_t Major;
4341   uint32_t Minor;
4342   uint32_t Stepping;
4343   StringRef VendorName;
4344   StringRef ArchName;
4345 
4346   // If this directive has no arguments, then use the ISA version for the
4347   // targeted GPU.
4348   if (getLexer().is(AsmToken::EndOfStatement)) {
4349     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4350     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4351                                                       ISA.Stepping,
4352                                                       "AMD", "AMDGPU");
4353     return false;
4354   }
4355 
4356   if (ParseDirectiveMajorMinor(Major, Minor))
4357     return true;
4358 
4359   if (getLexer().isNot(AsmToken::Comma))
4360     return TokError("stepping version number required, comma expected");
4361   Lex();
4362 
4363   if (ParseAsAbsoluteExpression(Stepping))
4364     return TokError("invalid stepping version");
4365 
4366   if (getLexer().isNot(AsmToken::Comma))
4367     return TokError("vendor name required, comma expected");
4368   Lex();
4369 
4370   if (getLexer().isNot(AsmToken::String))
4371     return TokError("invalid vendor name");
4372 
4373   VendorName = getLexer().getTok().getStringContents();
4374   Lex();
4375 
4376   if (getLexer().isNot(AsmToken::Comma))
4377     return TokError("arch name required, comma expected");
4378   Lex();
4379 
4380   if (getLexer().isNot(AsmToken::String))
4381     return TokError("invalid arch name");
4382 
4383   ArchName = getLexer().getTok().getStringContents();
4384   Lex();
4385 
4386   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4387                                                     VendorName, ArchName);
4388   return false;
4389 }
4390 
4391 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4392                                                amd_kernel_code_t &Header) {
4393   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4394   // assembly for backwards compatibility.
4395   if (ID == "max_scratch_backing_memory_byte_size") {
4396     Parser.eatToEndOfStatement();
4397     return false;
4398   }
4399 
4400   SmallString<40> ErrStr;
4401   raw_svector_ostream Err(ErrStr);
4402   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4403     return TokError(Err.str());
4404   }
4405   Lex();
4406 
4407   if (ID == "enable_wavefront_size32") {
4408     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4409       if (!isGFX10())
4410         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4411       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4412         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4413     } else {
4414       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4415         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4416     }
4417   }
4418 
4419   if (ID == "wavefront_size") {
4420     if (Header.wavefront_size == 5) {
4421       if (!isGFX10())
4422         return TokError("wavefront_size=5 is only allowed on GFX10+");
4423       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4424         return TokError("wavefront_size=5 requires +WavefrontSize32");
4425     } else if (Header.wavefront_size == 6) {
4426       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4427         return TokError("wavefront_size=6 requires +WavefrontSize64");
4428     }
4429   }
4430 
4431   if (ID == "enable_wgp_mode") {
4432     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4433       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4434   }
4435 
4436   if (ID == "enable_mem_ordered") {
4437     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4438       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4439   }
4440 
4441   if (ID == "enable_fwd_progress") {
4442     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4443       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4444   }
4445 
4446   return false;
4447 }
4448 
4449 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4450   amd_kernel_code_t Header;
4451   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4452 
4453   while (true) {
4454     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4455     // will set the current token to EndOfStatement.
4456     while(getLexer().is(AsmToken::EndOfStatement))
4457       Lex();
4458 
4459     if (getLexer().isNot(AsmToken::Identifier))
4460       return TokError("expected value identifier or .end_amd_kernel_code_t");
4461 
4462     StringRef ID = getLexer().getTok().getIdentifier();
4463     Lex();
4464 
4465     if (ID == ".end_amd_kernel_code_t")
4466       break;
4467 
4468     if (ParseAMDKernelCodeTValue(ID, Header))
4469       return true;
4470   }
4471 
4472   getTargetStreamer().EmitAMDKernelCodeT(Header);
4473 
4474   return false;
4475 }
4476 
4477 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4478   if (getLexer().isNot(AsmToken::Identifier))
4479     return TokError("expected symbol name");
4480 
4481   StringRef KernelName = Parser.getTok().getString();
4482 
4483   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4484                                            ELF::STT_AMDGPU_HSA_KERNEL);
4485   Lex();
4486 
4487   KernelScope.initialize(getContext());
4488   return false;
4489 }
4490 
4491 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4492   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4493     return Error(getParser().getTok().getLoc(),
4494                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4495                  "architectures");
4496   }
4497 
4498   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4499 
4500   std::string ISAVersionStringFromSTI;
4501   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4502   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4503 
4504   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4505     return Error(getParser().getTok().getLoc(),
4506                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4507                  "arguments specified through the command line");
4508   }
4509 
4510   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4511   Lex();
4512 
4513   return false;
4514 }
4515 
4516 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4517   const char *AssemblerDirectiveBegin;
4518   const char *AssemblerDirectiveEnd;
4519   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4520       isHsaAbiVersion3(&getSTI())
4521           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4522                             HSAMD::V3::AssemblerDirectiveEnd)
4523           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4524                             HSAMD::AssemblerDirectiveEnd);
4525 
4526   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4527     return Error(getParser().getTok().getLoc(),
4528                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4529                  "not available on non-amdhsa OSes")).str());
4530   }
4531 
4532   std::string HSAMetadataString;
4533   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4534                           HSAMetadataString))
4535     return true;
4536 
4537   if (isHsaAbiVersion3(&getSTI())) {
4538     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4539       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4540   } else {
4541     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4542       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4543   }
4544 
4545   return false;
4546 }
4547 
4548 /// Common code to parse out a block of text (typically YAML) between start and
4549 /// end directives.
4550 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4551                                           const char *AssemblerDirectiveEnd,
4552                                           std::string &CollectString) {
4553 
4554   raw_string_ostream CollectStream(CollectString);
4555 
4556   getLexer().setSkipSpace(false);
4557 
4558   bool FoundEnd = false;
4559   while (!getLexer().is(AsmToken::Eof)) {
4560     while (getLexer().is(AsmToken::Space)) {
4561       CollectStream << getLexer().getTok().getString();
4562       Lex();
4563     }
4564 
4565     if (getLexer().is(AsmToken::Identifier)) {
4566       StringRef ID = getLexer().getTok().getIdentifier();
4567       if (ID == AssemblerDirectiveEnd) {
4568         Lex();
4569         FoundEnd = true;
4570         break;
4571       }
4572     }
4573 
4574     CollectStream << Parser.parseStringToEndOfStatement()
4575                   << getContext().getAsmInfo()->getSeparatorString();
4576 
4577     Parser.eatToEndOfStatement();
4578   }
4579 
4580   getLexer().setSkipSpace(true);
4581 
4582   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4583     return TokError(Twine("expected directive ") +
4584                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4585   }
4586 
4587   CollectStream.flush();
4588   return false;
4589 }
4590 
4591 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4592 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4593   std::string String;
4594   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4595                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4596     return true;
4597 
4598   auto PALMetadata = getTargetStreamer().getPALMetadata();
4599   if (!PALMetadata->setFromString(String))
4600     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4601   return false;
4602 }
4603 
4604 /// Parse the assembler directive for old linear-format PAL metadata.
4605 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4606   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4607     return Error(getParser().getTok().getLoc(),
4608                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4609                  "not available on non-amdpal OSes")).str());
4610   }
4611 
4612   auto PALMetadata = getTargetStreamer().getPALMetadata();
4613   PALMetadata->setLegacy();
4614   for (;;) {
4615     uint32_t Key, Value;
4616     if (ParseAsAbsoluteExpression(Key)) {
4617       return TokError(Twine("invalid value in ") +
4618                       Twine(PALMD::AssemblerDirective));
4619     }
4620     if (getLexer().isNot(AsmToken::Comma)) {
4621       return TokError(Twine("expected an even number of values in ") +
4622                       Twine(PALMD::AssemblerDirective));
4623     }
4624     Lex();
4625     if (ParseAsAbsoluteExpression(Value)) {
4626       return TokError(Twine("invalid value in ") +
4627                       Twine(PALMD::AssemblerDirective));
4628     }
4629     PALMetadata->setRegister(Key, Value);
4630     if (getLexer().isNot(AsmToken::Comma))
4631       break;
4632     Lex();
4633   }
4634   return false;
4635 }
4636 
4637 /// ParseDirectiveAMDGPULDS
4638 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4639 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4640   if (getParser().checkForValidSection())
4641     return true;
4642 
4643   StringRef Name;
4644   SMLoc NameLoc = getLexer().getLoc();
4645   if (getParser().parseIdentifier(Name))
4646     return TokError("expected identifier in directive");
4647 
4648   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4649   if (parseToken(AsmToken::Comma, "expected ','"))
4650     return true;
4651 
4652   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4653 
4654   int64_t Size;
4655   SMLoc SizeLoc = getLexer().getLoc();
4656   if (getParser().parseAbsoluteExpression(Size))
4657     return true;
4658   if (Size < 0)
4659     return Error(SizeLoc, "size must be non-negative");
4660   if (Size > LocalMemorySize)
4661     return Error(SizeLoc, "size is too large");
4662 
4663   int64_t Alignment = 4;
4664   if (getLexer().is(AsmToken::Comma)) {
4665     Lex();
4666     SMLoc AlignLoc = getLexer().getLoc();
4667     if (getParser().parseAbsoluteExpression(Alignment))
4668       return true;
4669     if (Alignment < 0 || !isPowerOf2_64(Alignment))
4670       return Error(AlignLoc, "alignment must be a power of two");
4671 
4672     // Alignment larger than the size of LDS is possible in theory, as long
4673     // as the linker manages to place to symbol at address 0, but we do want
4674     // to make sure the alignment fits nicely into a 32-bit integer.
4675     if (Alignment >= 1u << 31)
4676       return Error(AlignLoc, "alignment is too large");
4677   }
4678 
4679   if (parseToken(AsmToken::EndOfStatement,
4680                  "unexpected token in '.amdgpu_lds' directive"))
4681     return true;
4682 
4683   Symbol->redefineIfPossible();
4684   if (!Symbol->isUndefined())
4685     return Error(NameLoc, "invalid symbol redefinition");
4686 
4687   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
4688   return false;
4689 }
4690 
4691 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4692   StringRef IDVal = DirectiveID.getString();
4693 
4694   if (isHsaAbiVersion3(&getSTI())) {
4695     if (IDVal == ".amdgcn_target")
4696       return ParseDirectiveAMDGCNTarget();
4697 
4698     if (IDVal == ".amdhsa_kernel")
4699       return ParseDirectiveAMDHSAKernel();
4700 
4701     // TODO: Restructure/combine with PAL metadata directive.
4702     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4703       return ParseDirectiveHSAMetadata();
4704   } else {
4705     if (IDVal == ".hsa_code_object_version")
4706       return ParseDirectiveHSACodeObjectVersion();
4707 
4708     if (IDVal == ".hsa_code_object_isa")
4709       return ParseDirectiveHSACodeObjectISA();
4710 
4711     if (IDVal == ".amd_kernel_code_t")
4712       return ParseDirectiveAMDKernelCodeT();
4713 
4714     if (IDVal == ".amdgpu_hsa_kernel")
4715       return ParseDirectiveAMDGPUHsaKernel();
4716 
4717     if (IDVal == ".amd_amdgpu_isa")
4718       return ParseDirectiveISAVersion();
4719 
4720     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4721       return ParseDirectiveHSAMetadata();
4722   }
4723 
4724   if (IDVal == ".amdgpu_lds")
4725     return ParseDirectiveAMDGPULDS();
4726 
4727   if (IDVal == PALMD::AssemblerDirectiveBegin)
4728     return ParseDirectivePALMetadataBegin();
4729 
4730   if (IDVal == PALMD::AssemblerDirective)
4731     return ParseDirectivePALMetadata();
4732 
4733   return true;
4734 }
4735 
4736 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4737                                            unsigned RegNo) const {
4738 
4739   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4740        R.isValid(); ++R) {
4741     if (*R == RegNo)
4742       return isGFX9Plus();
4743   }
4744 
4745   // GFX10 has 2 more SGPRs 104 and 105.
4746   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4747        R.isValid(); ++R) {
4748     if (*R == RegNo)
4749       return hasSGPR104_SGPR105();
4750   }
4751 
4752   switch (RegNo) {
4753   case AMDGPU::SRC_SHARED_BASE:
4754   case AMDGPU::SRC_SHARED_LIMIT:
4755   case AMDGPU::SRC_PRIVATE_BASE:
4756   case AMDGPU::SRC_PRIVATE_LIMIT:
4757   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4758     return !isCI() && !isSI() && !isVI();
4759   case AMDGPU::TBA:
4760   case AMDGPU::TBA_LO:
4761   case AMDGPU::TBA_HI:
4762   case AMDGPU::TMA:
4763   case AMDGPU::TMA_LO:
4764   case AMDGPU::TMA_HI:
4765     return !isGFX9() && !isGFX10();
4766   case AMDGPU::XNACK_MASK:
4767   case AMDGPU::XNACK_MASK_LO:
4768   case AMDGPU::XNACK_MASK_HI:
4769     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4770   case AMDGPU::SGPR_NULL:
4771     return isGFX10();
4772   default:
4773     break;
4774   }
4775 
4776   if (isCI())
4777     return true;
4778 
4779   if (isSI() || isGFX10()) {
4780     // No flat_scr on SI.
4781     // On GFX10 flat scratch is not a valid register operand and can only be
4782     // accessed with s_setreg/s_getreg.
4783     switch (RegNo) {
4784     case AMDGPU::FLAT_SCR:
4785     case AMDGPU::FLAT_SCR_LO:
4786     case AMDGPU::FLAT_SCR_HI:
4787       return false;
4788     default:
4789       return true;
4790     }
4791   }
4792 
4793   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4794   // SI/CI have.
4795   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4796        R.isValid(); ++R) {
4797     if (*R == RegNo)
4798       return hasSGPR102_SGPR103();
4799   }
4800 
4801   return true;
4802 }
4803 
4804 OperandMatchResultTy
4805 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4806                               OperandMode Mode) {
4807   // Try to parse with a custom parser
4808   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4809 
4810   // If we successfully parsed the operand or if there as an error parsing,
4811   // we are done.
4812   //
4813   // If we are parsing after we reach EndOfStatement then this means we
4814   // are appending default values to the Operands list.  This is only done
4815   // by custom parser, so we shouldn't continue on to the generic parsing.
4816   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4817       getLexer().is(AsmToken::EndOfStatement))
4818     return ResTy;
4819 
4820   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4821     unsigned Prefix = Operands.size();
4822     SMLoc LBraceLoc = getTok().getLoc();
4823     Parser.Lex(); // eat the '['
4824 
4825     for (;;) {
4826       ResTy = parseReg(Operands);
4827       if (ResTy != MatchOperand_Success)
4828         return ResTy;
4829 
4830       if (getLexer().is(AsmToken::RBrac))
4831         break;
4832 
4833       if (getLexer().isNot(AsmToken::Comma))
4834         return MatchOperand_ParseFail;
4835       Parser.Lex();
4836     }
4837 
4838     if (Operands.size() - Prefix > 1) {
4839       Operands.insert(Operands.begin() + Prefix,
4840                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4841       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4842                                                     getTok().getLoc()));
4843     }
4844 
4845     Parser.Lex(); // eat the ']'
4846     return MatchOperand_Success;
4847   }
4848 
4849   return parseRegOrImm(Operands);
4850 }
4851 
4852 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4853   // Clear any forced encodings from the previous instruction.
4854   setForcedEncodingSize(0);
4855   setForcedDPP(false);
4856   setForcedSDWA(false);
4857 
4858   if (Name.endswith("_e64")) {
4859     setForcedEncodingSize(64);
4860     return Name.substr(0, Name.size() - 4);
4861   } else if (Name.endswith("_e32")) {
4862     setForcedEncodingSize(32);
4863     return Name.substr(0, Name.size() - 4);
4864   } else if (Name.endswith("_dpp")) {
4865     setForcedDPP(true);
4866     return Name.substr(0, Name.size() - 4);
4867   } else if (Name.endswith("_sdwa")) {
4868     setForcedSDWA(true);
4869     return Name.substr(0, Name.size() - 5);
4870   }
4871   return Name;
4872 }
4873 
4874 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4875                                        StringRef Name,
4876                                        SMLoc NameLoc, OperandVector &Operands) {
4877   // Add the instruction mnemonic
4878   Name = parseMnemonicSuffix(Name);
4879   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4880 
4881   bool IsMIMG = Name.startswith("image_");
4882 
4883   while (!getLexer().is(AsmToken::EndOfStatement)) {
4884     OperandMode Mode = OperandMode_Default;
4885     if (IsMIMG && isGFX10() && Operands.size() == 2)
4886       Mode = OperandMode_NSA;
4887     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4888 
4889     // Eat the comma or space if there is one.
4890     if (getLexer().is(AsmToken::Comma))
4891       Parser.Lex();
4892 
4893     if (Res != MatchOperand_Success) {
4894       checkUnsupportedInstruction(Name, NameLoc);
4895       if (!Parser.hasPendingError()) {
4896         // FIXME: use real operand location rather than the current location.
4897         StringRef Msg =
4898           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
4899                                             "not a valid operand.";
4900         Error(getLexer().getLoc(), Msg);
4901       }
4902       while (!getLexer().is(AsmToken::EndOfStatement)) {
4903         Parser.Lex();
4904       }
4905       return true;
4906     }
4907   }
4908 
4909   return false;
4910 }
4911 
4912 //===----------------------------------------------------------------------===//
4913 // Utility functions
4914 //===----------------------------------------------------------------------===//
4915 
4916 OperandMatchResultTy
4917 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4918 
4919   if (!trySkipId(Prefix, AsmToken::Colon))
4920     return MatchOperand_NoMatch;
4921 
4922   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4923 }
4924 
4925 OperandMatchResultTy
4926 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4927                                     AMDGPUOperand::ImmTy ImmTy,
4928                                     bool (*ConvertResult)(int64_t&)) {
4929   SMLoc S = getLoc();
4930   int64_t Value = 0;
4931 
4932   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4933   if (Res != MatchOperand_Success)
4934     return Res;
4935 
4936   if (ConvertResult && !ConvertResult(Value)) {
4937     Error(S, "invalid " + StringRef(Prefix) + " value.");
4938   }
4939 
4940   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4941   return MatchOperand_Success;
4942 }
4943 
4944 OperandMatchResultTy
4945 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4946                                              OperandVector &Operands,
4947                                              AMDGPUOperand::ImmTy ImmTy,
4948                                              bool (*ConvertResult)(int64_t&)) {
4949   SMLoc S = getLoc();
4950   if (!trySkipId(Prefix, AsmToken::Colon))
4951     return MatchOperand_NoMatch;
4952 
4953   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4954     return MatchOperand_ParseFail;
4955 
4956   unsigned Val = 0;
4957   const unsigned MaxSize = 4;
4958 
4959   // FIXME: How to verify the number of elements matches the number of src
4960   // operands?
4961   for (int I = 0; ; ++I) {
4962     int64_t Op;
4963     SMLoc Loc = getLoc();
4964     if (!parseExpr(Op))
4965       return MatchOperand_ParseFail;
4966 
4967     if (Op != 0 && Op != 1) {
4968       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4969       return MatchOperand_ParseFail;
4970     }
4971 
4972     Val |= (Op << I);
4973 
4974     if (trySkipToken(AsmToken::RBrac))
4975       break;
4976 
4977     if (I + 1 == MaxSize) {
4978       Error(getLoc(), "expected a closing square bracket");
4979       return MatchOperand_ParseFail;
4980     }
4981 
4982     if (!skipToken(AsmToken::Comma, "expected a comma"))
4983       return MatchOperand_ParseFail;
4984   }
4985 
4986   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4987   return MatchOperand_Success;
4988 }
4989 
4990 OperandMatchResultTy
4991 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4992                                AMDGPUOperand::ImmTy ImmTy) {
4993   int64_t Bit = 0;
4994   SMLoc S = Parser.getTok().getLoc();
4995 
4996   // We are at the end of the statement, and this is a default argument, so
4997   // use a default value.
4998   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4999     switch(getLexer().getKind()) {
5000       case AsmToken::Identifier: {
5001         StringRef Tok = Parser.getTok().getString();
5002         if (Tok == Name) {
5003           if (Tok == "r128" && !hasMIMG_R128())
5004             Error(S, "r128 modifier is not supported on this GPU");
5005           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
5006             Error(S, "a16 modifier is not supported on this GPU");
5007           Bit = 1;
5008           Parser.Lex();
5009         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
5010           Bit = 0;
5011           Parser.Lex();
5012         } else {
5013           return MatchOperand_NoMatch;
5014         }
5015         break;
5016       }
5017       default:
5018         return MatchOperand_NoMatch;
5019     }
5020   }
5021 
5022   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
5023     return MatchOperand_ParseFail;
5024 
5025   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5026     ImmTy = AMDGPUOperand::ImmTyR128A16;
5027 
5028   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5029   return MatchOperand_Success;
5030 }
5031 
5032 static void addOptionalImmOperand(
5033   MCInst& Inst, const OperandVector& Operands,
5034   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5035   AMDGPUOperand::ImmTy ImmT,
5036   int64_t Default = 0) {
5037   auto i = OptionalIdx.find(ImmT);
5038   if (i != OptionalIdx.end()) {
5039     unsigned Idx = i->second;
5040     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5041   } else {
5042     Inst.addOperand(MCOperand::createImm(Default));
5043   }
5044 }
5045 
5046 OperandMatchResultTy
5047 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
5048   if (getLexer().isNot(AsmToken::Identifier)) {
5049     return MatchOperand_NoMatch;
5050   }
5051   StringRef Tok = Parser.getTok().getString();
5052   if (Tok != Prefix) {
5053     return MatchOperand_NoMatch;
5054   }
5055 
5056   Parser.Lex();
5057   if (getLexer().isNot(AsmToken::Colon)) {
5058     return MatchOperand_ParseFail;
5059   }
5060 
5061   Parser.Lex();
5062   if (getLexer().isNot(AsmToken::Identifier)) {
5063     return MatchOperand_ParseFail;
5064   }
5065 
5066   Value = Parser.getTok().getString();
5067   return MatchOperand_Success;
5068 }
5069 
5070 //===----------------------------------------------------------------------===//
5071 // MTBUF format
5072 //===----------------------------------------------------------------------===//
5073 
5074 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5075                                   int64_t MaxVal,
5076                                   int64_t &Fmt) {
5077   int64_t Val;
5078   SMLoc Loc = getLoc();
5079 
5080   auto Res = parseIntWithPrefix(Pref, Val);
5081   if (Res == MatchOperand_ParseFail)
5082     return false;
5083   if (Res == MatchOperand_NoMatch)
5084     return true;
5085 
5086   if (Val < 0 || Val > MaxVal) {
5087     Error(Loc, Twine("out of range ", StringRef(Pref)));
5088     return false;
5089   }
5090 
5091   Fmt = Val;
5092   return true;
5093 }
5094 
5095 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5096 // values to live in a joint format operand in the MCInst encoding.
5097 OperandMatchResultTy
5098 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5099   using namespace llvm::AMDGPU::MTBUFFormat;
5100 
5101   int64_t Dfmt = DFMT_UNDEF;
5102   int64_t Nfmt = NFMT_UNDEF;
5103 
5104   // dfmt and nfmt can appear in either order, and each is optional.
5105   for (int I = 0; I < 2; ++I) {
5106     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5107       return MatchOperand_ParseFail;
5108 
5109     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5110       return MatchOperand_ParseFail;
5111     }
5112     // Skip optional comma between dfmt/nfmt
5113     // but guard against 2 commas following each other.
5114     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5115         !peekToken().is(AsmToken::Comma)) {
5116       trySkipToken(AsmToken::Comma);
5117     }
5118   }
5119 
5120   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5121     return MatchOperand_NoMatch;
5122 
5123   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5124   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5125 
5126   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5127   return MatchOperand_Success;
5128 }
5129 
5130 OperandMatchResultTy
5131 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5132   using namespace llvm::AMDGPU::MTBUFFormat;
5133 
5134   int64_t Fmt = UFMT_UNDEF;
5135 
5136   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5137     return MatchOperand_ParseFail;
5138 
5139   if (Fmt == UFMT_UNDEF)
5140     return MatchOperand_NoMatch;
5141 
5142   Format = Fmt;
5143   return MatchOperand_Success;
5144 }
5145 
5146 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5147                                     int64_t &Nfmt,
5148                                     StringRef FormatStr,
5149                                     SMLoc Loc) {
5150   using namespace llvm::AMDGPU::MTBUFFormat;
5151   int64_t Format;
5152 
5153   Format = getDfmt(FormatStr);
5154   if (Format != DFMT_UNDEF) {
5155     Dfmt = Format;
5156     return true;
5157   }
5158 
5159   Format = getNfmt(FormatStr, getSTI());
5160   if (Format != NFMT_UNDEF) {
5161     Nfmt = Format;
5162     return true;
5163   }
5164 
5165   Error(Loc, "unsupported format");
5166   return false;
5167 }
5168 
5169 OperandMatchResultTy
5170 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5171                                           SMLoc FormatLoc,
5172                                           int64_t &Format) {
5173   using namespace llvm::AMDGPU::MTBUFFormat;
5174 
5175   int64_t Dfmt = DFMT_UNDEF;
5176   int64_t Nfmt = NFMT_UNDEF;
5177   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5178     return MatchOperand_ParseFail;
5179 
5180   if (trySkipToken(AsmToken::Comma)) {
5181     StringRef Str;
5182     SMLoc Loc = getLoc();
5183     if (!parseId(Str, "expected a format string") ||
5184         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5185       return MatchOperand_ParseFail;
5186     }
5187     if (Dfmt == DFMT_UNDEF) {
5188       Error(Loc, "duplicate numeric format");
5189       return MatchOperand_ParseFail;
5190     } else if (Nfmt == NFMT_UNDEF) {
5191       Error(Loc, "duplicate data format");
5192       return MatchOperand_ParseFail;
5193     }
5194   }
5195 
5196   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5197   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5198 
5199   if (isGFX10()) {
5200     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5201     if (Ufmt == UFMT_UNDEF) {
5202       Error(FormatLoc, "unsupported format");
5203       return MatchOperand_ParseFail;
5204     }
5205     Format = Ufmt;
5206   } else {
5207     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5208   }
5209 
5210   return MatchOperand_Success;
5211 }
5212 
5213 OperandMatchResultTy
5214 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5215                                             SMLoc Loc,
5216                                             int64_t &Format) {
5217   using namespace llvm::AMDGPU::MTBUFFormat;
5218 
5219   auto Id = getUnifiedFormat(FormatStr);
5220   if (Id == UFMT_UNDEF)
5221     return MatchOperand_NoMatch;
5222 
5223   if (!isGFX10()) {
5224     Error(Loc, "unified format is not supported on this GPU");
5225     return MatchOperand_ParseFail;
5226   }
5227 
5228   Format = Id;
5229   return MatchOperand_Success;
5230 }
5231 
5232 OperandMatchResultTy
5233 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5234   using namespace llvm::AMDGPU::MTBUFFormat;
5235   SMLoc Loc = getLoc();
5236 
5237   if (!parseExpr(Format))
5238     return MatchOperand_ParseFail;
5239   if (!isValidFormatEncoding(Format, getSTI())) {
5240     Error(Loc, "out of range format");
5241     return MatchOperand_ParseFail;
5242   }
5243 
5244   return MatchOperand_Success;
5245 }
5246 
5247 OperandMatchResultTy
5248 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5249   using namespace llvm::AMDGPU::MTBUFFormat;
5250 
5251   if (!trySkipId("format", AsmToken::Colon))
5252     return MatchOperand_NoMatch;
5253 
5254   if (trySkipToken(AsmToken::LBrac)) {
5255     StringRef FormatStr;
5256     SMLoc Loc = getLoc();
5257     if (!parseId(FormatStr, "expected a format string"))
5258       return MatchOperand_ParseFail;
5259 
5260     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5261     if (Res == MatchOperand_NoMatch)
5262       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5263     if (Res != MatchOperand_Success)
5264       return Res;
5265 
5266     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5267       return MatchOperand_ParseFail;
5268 
5269     return MatchOperand_Success;
5270   }
5271 
5272   return parseNumericFormat(Format);
5273 }
5274 
5275 OperandMatchResultTy
5276 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5277   using namespace llvm::AMDGPU::MTBUFFormat;
5278 
5279   int64_t Format = getDefaultFormatEncoding(getSTI());
5280   OperandMatchResultTy Res;
5281   SMLoc Loc = getLoc();
5282 
5283   // Parse legacy format syntax.
5284   Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5285   if (Res == MatchOperand_ParseFail)
5286     return Res;
5287 
5288   bool FormatFound = (Res == MatchOperand_Success);
5289 
5290   Operands.push_back(
5291     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5292 
5293   if (FormatFound)
5294     trySkipToken(AsmToken::Comma);
5295 
5296   if (isToken(AsmToken::EndOfStatement)) {
5297     // We are expecting an soffset operand,
5298     // but let matcher handle the error.
5299     return MatchOperand_Success;
5300   }
5301 
5302   // Parse soffset.
5303   Res = parseRegOrImm(Operands);
5304   if (Res != MatchOperand_Success)
5305     return Res;
5306 
5307   trySkipToken(AsmToken::Comma);
5308 
5309   if (!FormatFound) {
5310     Res = parseSymbolicOrNumericFormat(Format);
5311     if (Res == MatchOperand_ParseFail)
5312       return Res;
5313     if (Res == MatchOperand_Success) {
5314       auto Size = Operands.size();
5315       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5316       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5317       Op.setImm(Format);
5318     }
5319     return MatchOperand_Success;
5320   }
5321 
5322   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5323     Error(getLoc(), "duplicate format");
5324     return MatchOperand_ParseFail;
5325   }
5326   return MatchOperand_Success;
5327 }
5328 
5329 //===----------------------------------------------------------------------===//
5330 // ds
5331 //===----------------------------------------------------------------------===//
5332 
5333 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5334                                     const OperandVector &Operands) {
5335   OptionalImmIndexMap OptionalIdx;
5336 
5337   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5338     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5339 
5340     // Add the register arguments
5341     if (Op.isReg()) {
5342       Op.addRegOperands(Inst, 1);
5343       continue;
5344     }
5345 
5346     // Handle optional arguments
5347     OptionalIdx[Op.getImmTy()] = i;
5348   }
5349 
5350   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5351   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5352   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5353 
5354   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5355 }
5356 
5357 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5358                                 bool IsGdsHardcoded) {
5359   OptionalImmIndexMap OptionalIdx;
5360 
5361   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5362     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5363 
5364     // Add the register arguments
5365     if (Op.isReg()) {
5366       Op.addRegOperands(Inst, 1);
5367       continue;
5368     }
5369 
5370     if (Op.isToken() && Op.getToken() == "gds") {
5371       IsGdsHardcoded = true;
5372       continue;
5373     }
5374 
5375     // Handle optional arguments
5376     OptionalIdx[Op.getImmTy()] = i;
5377   }
5378 
5379   AMDGPUOperand::ImmTy OffsetType =
5380     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5381      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5382      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5383                                                       AMDGPUOperand::ImmTyOffset;
5384 
5385   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5386 
5387   if (!IsGdsHardcoded) {
5388     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5389   }
5390   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5391 }
5392 
5393 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5394   OptionalImmIndexMap OptionalIdx;
5395 
5396   unsigned OperandIdx[4];
5397   unsigned EnMask = 0;
5398   int SrcIdx = 0;
5399 
5400   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5401     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5402 
5403     // Add the register arguments
5404     if (Op.isReg()) {
5405       assert(SrcIdx < 4);
5406       OperandIdx[SrcIdx] = Inst.size();
5407       Op.addRegOperands(Inst, 1);
5408       ++SrcIdx;
5409       continue;
5410     }
5411 
5412     if (Op.isOff()) {
5413       assert(SrcIdx < 4);
5414       OperandIdx[SrcIdx] = Inst.size();
5415       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5416       ++SrcIdx;
5417       continue;
5418     }
5419 
5420     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5421       Op.addImmOperands(Inst, 1);
5422       continue;
5423     }
5424 
5425     if (Op.isToken() && Op.getToken() == "done")
5426       continue;
5427 
5428     // Handle optional arguments
5429     OptionalIdx[Op.getImmTy()] = i;
5430   }
5431 
5432   assert(SrcIdx == 4);
5433 
5434   bool Compr = false;
5435   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5436     Compr = true;
5437     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5438     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5439     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5440   }
5441 
5442   for (auto i = 0; i < SrcIdx; ++i) {
5443     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5444       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5445     }
5446   }
5447 
5448   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5449   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5450 
5451   Inst.addOperand(MCOperand::createImm(EnMask));
5452 }
5453 
5454 //===----------------------------------------------------------------------===//
5455 // s_waitcnt
5456 //===----------------------------------------------------------------------===//
5457 
5458 static bool
5459 encodeCnt(
5460   const AMDGPU::IsaVersion ISA,
5461   int64_t &IntVal,
5462   int64_t CntVal,
5463   bool Saturate,
5464   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5465   unsigned (*decode)(const IsaVersion &Version, unsigned))
5466 {
5467   bool Failed = false;
5468 
5469   IntVal = encode(ISA, IntVal, CntVal);
5470   if (CntVal != decode(ISA, IntVal)) {
5471     if (Saturate) {
5472       IntVal = encode(ISA, IntVal, -1);
5473     } else {
5474       Failed = true;
5475     }
5476   }
5477   return Failed;
5478 }
5479 
5480 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5481 
5482   SMLoc CntLoc = getLoc();
5483   StringRef CntName = getTokenStr();
5484 
5485   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5486       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5487     return false;
5488 
5489   int64_t CntVal;
5490   SMLoc ValLoc = getLoc();
5491   if (!parseExpr(CntVal))
5492     return false;
5493 
5494   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5495 
5496   bool Failed = true;
5497   bool Sat = CntName.endswith("_sat");
5498 
5499   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5500     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5501   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5502     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5503   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5504     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5505   } else {
5506     Error(CntLoc, "invalid counter name " + CntName);
5507     return false;
5508   }
5509 
5510   if (Failed) {
5511     Error(ValLoc, "too large value for " + CntName);
5512     return false;
5513   }
5514 
5515   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5516     return false;
5517 
5518   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5519     if (isToken(AsmToken::EndOfStatement)) {
5520       Error(getLoc(), "expected a counter name");
5521       return false;
5522     }
5523   }
5524 
5525   return true;
5526 }
5527 
5528 OperandMatchResultTy
5529 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5530   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5531   int64_t Waitcnt = getWaitcntBitMask(ISA);
5532   SMLoc S = getLoc();
5533 
5534   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5535     while (!isToken(AsmToken::EndOfStatement)) {
5536       if (!parseCnt(Waitcnt))
5537         return MatchOperand_ParseFail;
5538     }
5539   } else {
5540     if (!parseExpr(Waitcnt))
5541       return MatchOperand_ParseFail;
5542   }
5543 
5544   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5545   return MatchOperand_Success;
5546 }
5547 
5548 bool
5549 AMDGPUOperand::isSWaitCnt() const {
5550   return isImm();
5551 }
5552 
5553 //===----------------------------------------------------------------------===//
5554 // hwreg
5555 //===----------------------------------------------------------------------===//
5556 
5557 bool
5558 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5559                                 int64_t &Offset,
5560                                 int64_t &Width) {
5561   using namespace llvm::AMDGPU::Hwreg;
5562 
5563   // The register may be specified by name or using a numeric code
5564   if (isToken(AsmToken::Identifier) &&
5565       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5566     HwReg.IsSymbolic = true;
5567     lex(); // skip message name
5568   } else if (!parseExpr(HwReg.Id)) {
5569     return false;
5570   }
5571 
5572   if (trySkipToken(AsmToken::RParen))
5573     return true;
5574 
5575   // parse optional params
5576   return
5577     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
5578     parseExpr(Offset) &&
5579     skipToken(AsmToken::Comma, "expected a comma") &&
5580     parseExpr(Width) &&
5581     skipToken(AsmToken::RParen, "expected a closing parenthesis");
5582 }
5583 
5584 bool
5585 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5586                                const int64_t Offset,
5587                                const int64_t Width,
5588                                const SMLoc Loc) {
5589 
5590   using namespace llvm::AMDGPU::Hwreg;
5591 
5592   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5593     Error(Loc, "specified hardware register is not supported on this GPU");
5594     return false;
5595   } else if (!isValidHwreg(HwReg.Id)) {
5596     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
5597     return false;
5598   } else if (!isValidHwregOffset(Offset)) {
5599     Error(Loc, "invalid bit offset: only 5-bit values are legal");
5600     return false;
5601   } else if (!isValidHwregWidth(Width)) {
5602     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
5603     return false;
5604   }
5605   return true;
5606 }
5607 
5608 OperandMatchResultTy
5609 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5610   using namespace llvm::AMDGPU::Hwreg;
5611 
5612   int64_t ImmVal = 0;
5613   SMLoc Loc = getLoc();
5614 
5615   if (trySkipId("hwreg", AsmToken::LParen)) {
5616     OperandInfoTy HwReg(ID_UNKNOWN_);
5617     int64_t Offset = OFFSET_DEFAULT_;
5618     int64_t Width = WIDTH_DEFAULT_;
5619     if (parseHwregBody(HwReg, Offset, Width) &&
5620         validateHwreg(HwReg, Offset, Width, Loc)) {
5621       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5622     } else {
5623       return MatchOperand_ParseFail;
5624     }
5625   } else if (parseExpr(ImmVal)) {
5626     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5627       Error(Loc, "invalid immediate: only 16-bit values are legal");
5628       return MatchOperand_ParseFail;
5629     }
5630   } else {
5631     return MatchOperand_ParseFail;
5632   }
5633 
5634   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5635   return MatchOperand_Success;
5636 }
5637 
5638 bool AMDGPUOperand::isHwreg() const {
5639   return isImmTy(ImmTyHwreg);
5640 }
5641 
5642 //===----------------------------------------------------------------------===//
5643 // sendmsg
5644 //===----------------------------------------------------------------------===//
5645 
5646 bool
5647 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5648                                   OperandInfoTy &Op,
5649                                   OperandInfoTy &Stream) {
5650   using namespace llvm::AMDGPU::SendMsg;
5651 
5652   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5653     Msg.IsSymbolic = true;
5654     lex(); // skip message name
5655   } else if (!parseExpr(Msg.Id)) {
5656     return false;
5657   }
5658 
5659   if (trySkipToken(AsmToken::Comma)) {
5660     Op.IsDefined = true;
5661     if (isToken(AsmToken::Identifier) &&
5662         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5663       lex(); // skip operation name
5664     } else if (!parseExpr(Op.Id)) {
5665       return false;
5666     }
5667 
5668     if (trySkipToken(AsmToken::Comma)) {
5669       Stream.IsDefined = true;
5670       if (!parseExpr(Stream.Id))
5671         return false;
5672     }
5673   }
5674 
5675   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5676 }
5677 
5678 bool
5679 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5680                                  const OperandInfoTy &Op,
5681                                  const OperandInfoTy &Stream,
5682                                  const SMLoc S) {
5683   using namespace llvm::AMDGPU::SendMsg;
5684 
5685   // Validation strictness depends on whether message is specified
5686   // in a symbolc or in a numeric form. In the latter case
5687   // only encoding possibility is checked.
5688   bool Strict = Msg.IsSymbolic;
5689 
5690   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5691     Error(S, "invalid message id");
5692     return false;
5693   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5694     Error(S, Op.IsDefined ?
5695              "message does not support operations" :
5696              "missing message operation");
5697     return false;
5698   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5699     Error(S, "invalid operation id");
5700     return false;
5701   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5702     Error(S, "message operation does not support streams");
5703     return false;
5704   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5705     Error(S, "invalid message stream id");
5706     return false;
5707   }
5708   return true;
5709 }
5710 
5711 OperandMatchResultTy
5712 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5713   using namespace llvm::AMDGPU::SendMsg;
5714 
5715   int64_t ImmVal = 0;
5716   SMLoc Loc = getLoc();
5717 
5718   if (trySkipId("sendmsg", AsmToken::LParen)) {
5719     OperandInfoTy Msg(ID_UNKNOWN_);
5720     OperandInfoTy Op(OP_NONE_);
5721     OperandInfoTy Stream(STREAM_ID_NONE_);
5722     if (parseSendMsgBody(Msg, Op, Stream) &&
5723         validateSendMsg(Msg, Op, Stream, Loc)) {
5724       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5725     } else {
5726       return MatchOperand_ParseFail;
5727     }
5728   } else if (parseExpr(ImmVal)) {
5729     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5730       Error(Loc, "invalid immediate: only 16-bit values are legal");
5731       return MatchOperand_ParseFail;
5732     }
5733   } else {
5734     return MatchOperand_ParseFail;
5735   }
5736 
5737   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5738   return MatchOperand_Success;
5739 }
5740 
5741 bool AMDGPUOperand::isSendMsg() const {
5742   return isImmTy(ImmTySendMsg);
5743 }
5744 
5745 //===----------------------------------------------------------------------===//
5746 // v_interp
5747 //===----------------------------------------------------------------------===//
5748 
5749 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5750   if (getLexer().getKind() != AsmToken::Identifier)
5751     return MatchOperand_NoMatch;
5752 
5753   StringRef Str = Parser.getTok().getString();
5754   int Slot = StringSwitch<int>(Str)
5755     .Case("p10", 0)
5756     .Case("p20", 1)
5757     .Case("p0", 2)
5758     .Default(-1);
5759 
5760   SMLoc S = Parser.getTok().getLoc();
5761   if (Slot == -1)
5762     return MatchOperand_ParseFail;
5763 
5764   Parser.Lex();
5765   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5766                                               AMDGPUOperand::ImmTyInterpSlot));
5767   return MatchOperand_Success;
5768 }
5769 
5770 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5771   if (getLexer().getKind() != AsmToken::Identifier)
5772     return MatchOperand_NoMatch;
5773 
5774   StringRef Str = Parser.getTok().getString();
5775   if (!Str.startswith("attr"))
5776     return MatchOperand_NoMatch;
5777 
5778   StringRef Chan = Str.take_back(2);
5779   int AttrChan = StringSwitch<int>(Chan)
5780     .Case(".x", 0)
5781     .Case(".y", 1)
5782     .Case(".z", 2)
5783     .Case(".w", 3)
5784     .Default(-1);
5785   if (AttrChan == -1)
5786     return MatchOperand_ParseFail;
5787 
5788   Str = Str.drop_back(2).drop_front(4);
5789 
5790   uint8_t Attr;
5791   if (Str.getAsInteger(10, Attr))
5792     return MatchOperand_ParseFail;
5793 
5794   SMLoc S = Parser.getTok().getLoc();
5795   Parser.Lex();
5796   if (Attr > 63) {
5797     Error(S, "out of bounds attr");
5798     return MatchOperand_ParseFail;
5799   }
5800 
5801   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5802 
5803   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5804                                               AMDGPUOperand::ImmTyInterpAttr));
5805   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5806                                               AMDGPUOperand::ImmTyAttrChan));
5807   return MatchOperand_Success;
5808 }
5809 
5810 //===----------------------------------------------------------------------===//
5811 // exp
5812 //===----------------------------------------------------------------------===//
5813 
5814 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5815                                                       uint8_t &Val) {
5816   if (Str == "null") {
5817     Val = 9;
5818     return MatchOperand_Success;
5819   }
5820 
5821   if (Str.startswith("mrt")) {
5822     Str = Str.drop_front(3);
5823     if (Str == "z") { // == mrtz
5824       Val = 8;
5825       return MatchOperand_Success;
5826     }
5827 
5828     if (Str.getAsInteger(10, Val))
5829       return MatchOperand_ParseFail;
5830 
5831     if (Val > 7) {
5832       Error(getLoc(), "invalid exp target");
5833       return MatchOperand_ParseFail;
5834     }
5835 
5836     return MatchOperand_Success;
5837   }
5838 
5839   if (Str.startswith("pos")) {
5840     Str = Str.drop_front(3);
5841     if (Str.getAsInteger(10, Val))
5842       return MatchOperand_ParseFail;
5843 
5844     if (Val > 4 || (Val == 4 && !isGFX10())) {
5845       Error(getLoc(), "invalid exp target");
5846       return MatchOperand_ParseFail;
5847     }
5848 
5849     Val += 12;
5850     return MatchOperand_Success;
5851   }
5852 
5853   if (isGFX10() && Str == "prim") {
5854     Val = 20;
5855     return MatchOperand_Success;
5856   }
5857 
5858   if (Str.startswith("param")) {
5859     Str = Str.drop_front(5);
5860     if (Str.getAsInteger(10, Val))
5861       return MatchOperand_ParseFail;
5862 
5863     if (Val >= 32) {
5864       Error(getLoc(), "invalid exp target");
5865       return MatchOperand_ParseFail;
5866     }
5867 
5868     Val += 32;
5869     return MatchOperand_Success;
5870   }
5871 
5872   if (Str.startswith("invalid_target_")) {
5873     Str = Str.drop_front(15);
5874     if (Str.getAsInteger(10, Val))
5875       return MatchOperand_ParseFail;
5876 
5877     Error(getLoc(), "invalid exp target");
5878     return MatchOperand_ParseFail;
5879   }
5880 
5881   return MatchOperand_NoMatch;
5882 }
5883 
5884 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5885   uint8_t Val;
5886   StringRef Str = Parser.getTok().getString();
5887 
5888   auto Res = parseExpTgtImpl(Str, Val);
5889   if (Res != MatchOperand_Success)
5890     return Res;
5891 
5892   SMLoc S = Parser.getTok().getLoc();
5893   Parser.Lex();
5894 
5895   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5896                                               AMDGPUOperand::ImmTyExpTgt));
5897   return MatchOperand_Success;
5898 }
5899 
5900 //===----------------------------------------------------------------------===//
5901 // parser helpers
5902 //===----------------------------------------------------------------------===//
5903 
5904 bool
5905 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5906   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5907 }
5908 
5909 bool
5910 AMDGPUAsmParser::isId(const StringRef Id) const {
5911   return isId(getToken(), Id);
5912 }
5913 
5914 bool
5915 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5916   return getTokenKind() == Kind;
5917 }
5918 
5919 bool
5920 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5921   if (isId(Id)) {
5922     lex();
5923     return true;
5924   }
5925   return false;
5926 }
5927 
5928 bool
5929 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5930   if (isId(Id) && peekToken().is(Kind)) {
5931     lex();
5932     lex();
5933     return true;
5934   }
5935   return false;
5936 }
5937 
5938 bool
5939 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5940   if (isToken(Kind)) {
5941     lex();
5942     return true;
5943   }
5944   return false;
5945 }
5946 
5947 bool
5948 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5949                            const StringRef ErrMsg) {
5950   if (!trySkipToken(Kind)) {
5951     Error(getLoc(), ErrMsg);
5952     return false;
5953   }
5954   return true;
5955 }
5956 
5957 bool
5958 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5959   return !getParser().parseAbsoluteExpression(Imm);
5960 }
5961 
5962 bool
5963 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5964   SMLoc S = getLoc();
5965 
5966   const MCExpr *Expr;
5967   if (Parser.parseExpression(Expr))
5968     return false;
5969 
5970   int64_t IntVal;
5971   if (Expr->evaluateAsAbsolute(IntVal)) {
5972     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5973   } else {
5974     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5975   }
5976   return true;
5977 }
5978 
5979 bool
5980 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5981   if (isToken(AsmToken::String)) {
5982     Val = getToken().getStringContents();
5983     lex();
5984     return true;
5985   } else {
5986     Error(getLoc(), ErrMsg);
5987     return false;
5988   }
5989 }
5990 
5991 bool
5992 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
5993   if (isToken(AsmToken::Identifier)) {
5994     Val = getTokenStr();
5995     lex();
5996     return true;
5997   } else {
5998     Error(getLoc(), ErrMsg);
5999     return false;
6000   }
6001 }
6002 
6003 AsmToken
6004 AMDGPUAsmParser::getToken() const {
6005   return Parser.getTok();
6006 }
6007 
6008 AsmToken
6009 AMDGPUAsmParser::peekToken() {
6010   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6011 }
6012 
6013 void
6014 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6015   auto TokCount = getLexer().peekTokens(Tokens);
6016 
6017   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6018     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6019 }
6020 
6021 AsmToken::TokenKind
6022 AMDGPUAsmParser::getTokenKind() const {
6023   return getLexer().getKind();
6024 }
6025 
6026 SMLoc
6027 AMDGPUAsmParser::getLoc() const {
6028   return getToken().getLoc();
6029 }
6030 
6031 StringRef
6032 AMDGPUAsmParser::getTokenStr() const {
6033   return getToken().getString();
6034 }
6035 
6036 void
6037 AMDGPUAsmParser::lex() {
6038   Parser.Lex();
6039 }
6040 
6041 //===----------------------------------------------------------------------===//
6042 // swizzle
6043 //===----------------------------------------------------------------------===//
6044 
6045 LLVM_READNONE
6046 static unsigned
6047 encodeBitmaskPerm(const unsigned AndMask,
6048                   const unsigned OrMask,
6049                   const unsigned XorMask) {
6050   using namespace llvm::AMDGPU::Swizzle;
6051 
6052   return BITMASK_PERM_ENC |
6053          (AndMask << BITMASK_AND_SHIFT) |
6054          (OrMask  << BITMASK_OR_SHIFT)  |
6055          (XorMask << BITMASK_XOR_SHIFT);
6056 }
6057 
6058 bool
6059 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6060                                       const unsigned MinVal,
6061                                       const unsigned MaxVal,
6062                                       const StringRef ErrMsg) {
6063   for (unsigned i = 0; i < OpNum; ++i) {
6064     if (!skipToken(AsmToken::Comma, "expected a comma")){
6065       return false;
6066     }
6067     SMLoc ExprLoc = Parser.getTok().getLoc();
6068     if (!parseExpr(Op[i])) {
6069       return false;
6070     }
6071     if (Op[i] < MinVal || Op[i] > MaxVal) {
6072       Error(ExprLoc, ErrMsg);
6073       return false;
6074     }
6075   }
6076 
6077   return true;
6078 }
6079 
6080 bool
6081 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6082   using namespace llvm::AMDGPU::Swizzle;
6083 
6084   int64_t Lane[LANE_NUM];
6085   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6086                            "expected a 2-bit lane id")) {
6087     Imm = QUAD_PERM_ENC;
6088     for (unsigned I = 0; I < LANE_NUM; ++I) {
6089       Imm |= Lane[I] << (LANE_SHIFT * I);
6090     }
6091     return true;
6092   }
6093   return false;
6094 }
6095 
6096 bool
6097 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6098   using namespace llvm::AMDGPU::Swizzle;
6099 
6100   SMLoc S = Parser.getTok().getLoc();
6101   int64_t GroupSize;
6102   int64_t LaneIdx;
6103 
6104   if (!parseSwizzleOperands(1, &GroupSize,
6105                             2, 32,
6106                             "group size must be in the interval [2,32]")) {
6107     return false;
6108   }
6109   if (!isPowerOf2_64(GroupSize)) {
6110     Error(S, "group size must be a power of two");
6111     return false;
6112   }
6113   if (parseSwizzleOperands(1, &LaneIdx,
6114                            0, GroupSize - 1,
6115                            "lane id must be in the interval [0,group size - 1]")) {
6116     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6117     return true;
6118   }
6119   return false;
6120 }
6121 
6122 bool
6123 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6124   using namespace llvm::AMDGPU::Swizzle;
6125 
6126   SMLoc S = Parser.getTok().getLoc();
6127   int64_t GroupSize;
6128 
6129   if (!parseSwizzleOperands(1, &GroupSize,
6130       2, 32, "group size must be in the interval [2,32]")) {
6131     return false;
6132   }
6133   if (!isPowerOf2_64(GroupSize)) {
6134     Error(S, "group size must be a power of two");
6135     return false;
6136   }
6137 
6138   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6139   return true;
6140 }
6141 
6142 bool
6143 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6144   using namespace llvm::AMDGPU::Swizzle;
6145 
6146   SMLoc S = Parser.getTok().getLoc();
6147   int64_t GroupSize;
6148 
6149   if (!parseSwizzleOperands(1, &GroupSize,
6150       1, 16, "group size must be in the interval [1,16]")) {
6151     return false;
6152   }
6153   if (!isPowerOf2_64(GroupSize)) {
6154     Error(S, "group size must be a power of two");
6155     return false;
6156   }
6157 
6158   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6159   return true;
6160 }
6161 
6162 bool
6163 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6164   using namespace llvm::AMDGPU::Swizzle;
6165 
6166   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6167     return false;
6168   }
6169 
6170   StringRef Ctl;
6171   SMLoc StrLoc = Parser.getTok().getLoc();
6172   if (!parseString(Ctl)) {
6173     return false;
6174   }
6175   if (Ctl.size() != BITMASK_WIDTH) {
6176     Error(StrLoc, "expected a 5-character mask");
6177     return false;
6178   }
6179 
6180   unsigned AndMask = 0;
6181   unsigned OrMask = 0;
6182   unsigned XorMask = 0;
6183 
6184   for (size_t i = 0; i < Ctl.size(); ++i) {
6185     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6186     switch(Ctl[i]) {
6187     default:
6188       Error(StrLoc, "invalid mask");
6189       return false;
6190     case '0':
6191       break;
6192     case '1':
6193       OrMask |= Mask;
6194       break;
6195     case 'p':
6196       AndMask |= Mask;
6197       break;
6198     case 'i':
6199       AndMask |= Mask;
6200       XorMask |= Mask;
6201       break;
6202     }
6203   }
6204 
6205   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6206   return true;
6207 }
6208 
6209 bool
6210 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6211 
6212   SMLoc OffsetLoc = Parser.getTok().getLoc();
6213 
6214   if (!parseExpr(Imm)) {
6215     return false;
6216   }
6217   if (!isUInt<16>(Imm)) {
6218     Error(OffsetLoc, "expected a 16-bit offset");
6219     return false;
6220   }
6221   return true;
6222 }
6223 
6224 bool
6225 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6226   using namespace llvm::AMDGPU::Swizzle;
6227 
6228   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6229 
6230     SMLoc ModeLoc = Parser.getTok().getLoc();
6231     bool Ok = false;
6232 
6233     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6234       Ok = parseSwizzleQuadPerm(Imm);
6235     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6236       Ok = parseSwizzleBitmaskPerm(Imm);
6237     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6238       Ok = parseSwizzleBroadcast(Imm);
6239     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6240       Ok = parseSwizzleSwap(Imm);
6241     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6242       Ok = parseSwizzleReverse(Imm);
6243     } else {
6244       Error(ModeLoc, "expected a swizzle mode");
6245     }
6246 
6247     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6248   }
6249 
6250   return false;
6251 }
6252 
6253 OperandMatchResultTy
6254 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6255   SMLoc S = Parser.getTok().getLoc();
6256   int64_t Imm = 0;
6257 
6258   if (trySkipId("offset")) {
6259 
6260     bool Ok = false;
6261     if (skipToken(AsmToken::Colon, "expected a colon")) {
6262       if (trySkipId("swizzle")) {
6263         Ok = parseSwizzleMacro(Imm);
6264       } else {
6265         Ok = parseSwizzleOffset(Imm);
6266       }
6267     }
6268 
6269     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6270 
6271     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6272   } else {
6273     // Swizzle "offset" operand is optional.
6274     // If it is omitted, try parsing other optional operands.
6275     return parseOptionalOpr(Operands);
6276   }
6277 }
6278 
6279 bool
6280 AMDGPUOperand::isSwizzle() const {
6281   return isImmTy(ImmTySwizzle);
6282 }
6283 
6284 //===----------------------------------------------------------------------===//
6285 // VGPR Index Mode
6286 //===----------------------------------------------------------------------===//
6287 
6288 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6289 
6290   using namespace llvm::AMDGPU::VGPRIndexMode;
6291 
6292   if (trySkipToken(AsmToken::RParen)) {
6293     return OFF;
6294   }
6295 
6296   int64_t Imm = 0;
6297 
6298   while (true) {
6299     unsigned Mode = 0;
6300     SMLoc S = Parser.getTok().getLoc();
6301 
6302     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6303       if (trySkipId(IdSymbolic[ModeId])) {
6304         Mode = 1 << ModeId;
6305         break;
6306       }
6307     }
6308 
6309     if (Mode == 0) {
6310       Error(S, (Imm == 0)?
6311                "expected a VGPR index mode or a closing parenthesis" :
6312                "expected a VGPR index mode");
6313       return UNDEF;
6314     }
6315 
6316     if (Imm & Mode) {
6317       Error(S, "duplicate VGPR index mode");
6318       return UNDEF;
6319     }
6320     Imm |= Mode;
6321 
6322     if (trySkipToken(AsmToken::RParen))
6323       break;
6324     if (!skipToken(AsmToken::Comma,
6325                    "expected a comma or a closing parenthesis"))
6326       return UNDEF;
6327   }
6328 
6329   return Imm;
6330 }
6331 
6332 OperandMatchResultTy
6333 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6334 
6335   using namespace llvm::AMDGPU::VGPRIndexMode;
6336 
6337   int64_t Imm = 0;
6338   SMLoc S = Parser.getTok().getLoc();
6339 
6340   if (getLexer().getKind() == AsmToken::Identifier &&
6341       Parser.getTok().getString() == "gpr_idx" &&
6342       getLexer().peekTok().is(AsmToken::LParen)) {
6343 
6344     Parser.Lex();
6345     Parser.Lex();
6346 
6347     Imm = parseGPRIdxMacro();
6348     if (Imm == UNDEF)
6349       return MatchOperand_ParseFail;
6350 
6351   } else {
6352     if (getParser().parseAbsoluteExpression(Imm))
6353       return MatchOperand_ParseFail;
6354     if (Imm < 0 || !isUInt<4>(Imm)) {
6355       Error(S, "invalid immediate: only 4-bit values are legal");
6356       return MatchOperand_ParseFail;
6357     }
6358   }
6359 
6360   Operands.push_back(
6361       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6362   return MatchOperand_Success;
6363 }
6364 
6365 bool AMDGPUOperand::isGPRIdxMode() const {
6366   return isImmTy(ImmTyGprIdxMode);
6367 }
6368 
6369 //===----------------------------------------------------------------------===//
6370 // sopp branch targets
6371 //===----------------------------------------------------------------------===//
6372 
6373 OperandMatchResultTy
6374 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6375 
6376   // Make sure we are not parsing something
6377   // that looks like a label or an expression but is not.
6378   // This will improve error messages.
6379   if (isRegister() || isModifier())
6380     return MatchOperand_NoMatch;
6381 
6382   if (!parseExpr(Operands))
6383     return MatchOperand_ParseFail;
6384 
6385   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6386   assert(Opr.isImm() || Opr.isExpr());
6387   SMLoc Loc = Opr.getStartLoc();
6388 
6389   // Currently we do not support arbitrary expressions as branch targets.
6390   // Only labels and absolute expressions are accepted.
6391   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6392     Error(Loc, "expected an absolute expression or a label");
6393   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6394     Error(Loc, "expected a 16-bit signed jump offset");
6395   }
6396 
6397   return MatchOperand_Success;
6398 }
6399 
6400 //===----------------------------------------------------------------------===//
6401 // Boolean holding registers
6402 //===----------------------------------------------------------------------===//
6403 
6404 OperandMatchResultTy
6405 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6406   return parseReg(Operands);
6407 }
6408 
6409 //===----------------------------------------------------------------------===//
6410 // mubuf
6411 //===----------------------------------------------------------------------===//
6412 
6413 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
6414   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
6415 }
6416 
6417 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
6418   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
6419 }
6420 
6421 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
6422   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
6423 }
6424 
6425 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6426                                const OperandVector &Operands,
6427                                bool IsAtomic,
6428                                bool IsAtomicReturn,
6429                                bool IsLds) {
6430   bool IsLdsOpcode = IsLds;
6431   bool HasLdsModifier = false;
6432   OptionalImmIndexMap OptionalIdx;
6433   assert(IsAtomicReturn ? IsAtomic : true);
6434   unsigned FirstOperandIdx = 1;
6435 
6436   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6437     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6438 
6439     // Add the register arguments
6440     if (Op.isReg()) {
6441       Op.addRegOperands(Inst, 1);
6442       // Insert a tied src for atomic return dst.
6443       // This cannot be postponed as subsequent calls to
6444       // addImmOperands rely on correct number of MC operands.
6445       if (IsAtomicReturn && i == FirstOperandIdx)
6446         Op.addRegOperands(Inst, 1);
6447       continue;
6448     }
6449 
6450     // Handle the case where soffset is an immediate
6451     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6452       Op.addImmOperands(Inst, 1);
6453       continue;
6454     }
6455 
6456     HasLdsModifier |= Op.isLDS();
6457 
6458     // Handle tokens like 'offen' which are sometimes hard-coded into the
6459     // asm string.  There are no MCInst operands for these.
6460     if (Op.isToken()) {
6461       continue;
6462     }
6463     assert(Op.isImm());
6464 
6465     // Handle optional arguments
6466     OptionalIdx[Op.getImmTy()] = i;
6467   }
6468 
6469   // This is a workaround for an llvm quirk which may result in an
6470   // incorrect instruction selection. Lds and non-lds versions of
6471   // MUBUF instructions are identical except that lds versions
6472   // have mandatory 'lds' modifier. However this modifier follows
6473   // optional modifiers and llvm asm matcher regards this 'lds'
6474   // modifier as an optional one. As a result, an lds version
6475   // of opcode may be selected even if it has no 'lds' modifier.
6476   if (IsLdsOpcode && !HasLdsModifier) {
6477     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6478     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6479       Inst.setOpcode(NoLdsOpcode);
6480       IsLdsOpcode = false;
6481     }
6482   }
6483 
6484   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6485   if (!IsAtomic) { // glc is hard-coded.
6486     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6487   }
6488   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6489 
6490   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6491     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6492   }
6493 
6494   if (isGFX10())
6495     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6496 }
6497 
6498 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6499   OptionalImmIndexMap OptionalIdx;
6500 
6501   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6502     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6503 
6504     // Add the register arguments
6505     if (Op.isReg()) {
6506       Op.addRegOperands(Inst, 1);
6507       continue;
6508     }
6509 
6510     // Handle the case where soffset is an immediate
6511     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6512       Op.addImmOperands(Inst, 1);
6513       continue;
6514     }
6515 
6516     // Handle tokens like 'offen' which are sometimes hard-coded into the
6517     // asm string.  There are no MCInst operands for these.
6518     if (Op.isToken()) {
6519       continue;
6520     }
6521     assert(Op.isImm());
6522 
6523     // Handle optional arguments
6524     OptionalIdx[Op.getImmTy()] = i;
6525   }
6526 
6527   addOptionalImmOperand(Inst, Operands, OptionalIdx,
6528                         AMDGPUOperand::ImmTyOffset);
6529   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6530   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6531   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6532   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6533 
6534   if (isGFX10())
6535     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6536 }
6537 
6538 //===----------------------------------------------------------------------===//
6539 // mimg
6540 //===----------------------------------------------------------------------===//
6541 
6542 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6543                               bool IsAtomic) {
6544   unsigned I = 1;
6545   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6546   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6547     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6548   }
6549 
6550   if (IsAtomic) {
6551     // Add src, same as dst
6552     assert(Desc.getNumDefs() == 1);
6553     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6554   }
6555 
6556   OptionalImmIndexMap OptionalIdx;
6557 
6558   for (unsigned E = Operands.size(); I != E; ++I) {
6559     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6560 
6561     // Add the register arguments
6562     if (Op.isReg()) {
6563       Op.addRegOperands(Inst, 1);
6564     } else if (Op.isImmModifier()) {
6565       OptionalIdx[Op.getImmTy()] = I;
6566     } else if (!Op.isToken()) {
6567       llvm_unreachable("unexpected operand type");
6568     }
6569   }
6570 
6571   bool IsGFX10 = isGFX10();
6572 
6573   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6574   if (IsGFX10)
6575     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6576   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6577   if (IsGFX10)
6578     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6579   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6580   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6581   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6582   if (IsGFX10)
6583     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6584   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6585   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6586   if (!IsGFX10)
6587     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6588   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6589 }
6590 
6591 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6592   cvtMIMG(Inst, Operands, true);
6593 }
6594 
6595 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
6596                                       const OperandVector &Operands) {
6597   for (unsigned I = 1; I < Operands.size(); ++I) {
6598     auto &Operand = (AMDGPUOperand &)*Operands[I];
6599     if (Operand.isReg())
6600       Operand.addRegOperands(Inst, 1);
6601   }
6602 
6603   Inst.addOperand(MCOperand::createImm(1)); // a16
6604 }
6605 
6606 //===----------------------------------------------------------------------===//
6607 // smrd
6608 //===----------------------------------------------------------------------===//
6609 
6610 bool AMDGPUOperand::isSMRDOffset8() const {
6611   return isImm() && isUInt<8>(getImm());
6612 }
6613 
6614 bool AMDGPUOperand::isSMEMOffset() const {
6615   return isImm(); // Offset range is checked later by validator.
6616 }
6617 
6618 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6619   // 32-bit literals are only supported on CI and we only want to use them
6620   // when the offset is > 8-bits.
6621   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6622 }
6623 
6624 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6625   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6626 }
6627 
6628 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
6629   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6630 }
6631 
6632 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6633   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6634 }
6635 
6636 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6637   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6638 }
6639 
6640 //===----------------------------------------------------------------------===//
6641 // vop3
6642 //===----------------------------------------------------------------------===//
6643 
6644 static bool ConvertOmodMul(int64_t &Mul) {
6645   if (Mul != 1 && Mul != 2 && Mul != 4)
6646     return false;
6647 
6648   Mul >>= 1;
6649   return true;
6650 }
6651 
6652 static bool ConvertOmodDiv(int64_t &Div) {
6653   if (Div == 1) {
6654     Div = 0;
6655     return true;
6656   }
6657 
6658   if (Div == 2) {
6659     Div = 3;
6660     return true;
6661   }
6662 
6663   return false;
6664 }
6665 
6666 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6667   if (BoundCtrl == 0) {
6668     BoundCtrl = 1;
6669     return true;
6670   }
6671 
6672   if (BoundCtrl == -1) {
6673     BoundCtrl = 0;
6674     return true;
6675   }
6676 
6677   return false;
6678 }
6679 
6680 // Note: the order in this table matches the order of operands in AsmString.
6681 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6682   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6683   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6684   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6685   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6686   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6687   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6688   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6689   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6690   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6691   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6692   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6693   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6694   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6695   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6696   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6697   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6698   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6699   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6700   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6701   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6702   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6703   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6704   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6705   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6706   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6707   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6708   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6709   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6710   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6711   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6712   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6713   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6714   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6715   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6716   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6717   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6718   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6719   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6720   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6721   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6722   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6723   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6724   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6725 };
6726 
6727 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6728 
6729   OperandMatchResultTy res = parseOptionalOpr(Operands);
6730 
6731   // This is a hack to enable hardcoded mandatory operands which follow
6732   // optional operands.
6733   //
6734   // Current design assumes that all operands after the first optional operand
6735   // are also optional. However implementation of some instructions violates
6736   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6737   //
6738   // To alleviate this problem, we have to (implicitly) parse extra operands
6739   // to make sure autogenerated parser of custom operands never hit hardcoded
6740   // mandatory operands.
6741 
6742   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6743     if (res != MatchOperand_Success ||
6744         isToken(AsmToken::EndOfStatement))
6745       break;
6746 
6747     trySkipToken(AsmToken::Comma);
6748     res = parseOptionalOpr(Operands);
6749   }
6750 
6751   return res;
6752 }
6753 
6754 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6755   OperandMatchResultTy res;
6756   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6757     // try to parse any optional operand here
6758     if (Op.IsBit) {
6759       res = parseNamedBit(Op.Name, Operands, Op.Type);
6760     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6761       res = parseOModOperand(Operands);
6762     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6763                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6764                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6765       res = parseSDWASel(Operands, Op.Name, Op.Type);
6766     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6767       res = parseSDWADstUnused(Operands);
6768     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6769                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6770                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6771                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6772       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6773                                         Op.ConvertResult);
6774     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6775       res = parseDim(Operands);
6776     } else {
6777       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6778     }
6779     if (res != MatchOperand_NoMatch) {
6780       return res;
6781     }
6782   }
6783   return MatchOperand_NoMatch;
6784 }
6785 
6786 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6787   StringRef Name = Parser.getTok().getString();
6788   if (Name == "mul") {
6789     return parseIntWithPrefix("mul", Operands,
6790                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6791   }
6792 
6793   if (Name == "div") {
6794     return parseIntWithPrefix("div", Operands,
6795                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6796   }
6797 
6798   return MatchOperand_NoMatch;
6799 }
6800 
6801 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6802   cvtVOP3P(Inst, Operands);
6803 
6804   int Opc = Inst.getOpcode();
6805 
6806   int SrcNum;
6807   const int Ops[] = { AMDGPU::OpName::src0,
6808                       AMDGPU::OpName::src1,
6809                       AMDGPU::OpName::src2 };
6810   for (SrcNum = 0;
6811        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6812        ++SrcNum);
6813   assert(SrcNum > 0);
6814 
6815   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6816   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6817 
6818   if ((OpSel & (1 << SrcNum)) != 0) {
6819     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6820     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6821     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6822   }
6823 }
6824 
6825 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6826       // 1. This operand is input modifiers
6827   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6828       // 2. This is not last operand
6829       && Desc.NumOperands > (OpNum + 1)
6830       // 3. Next operand is register class
6831       && Desc.OpInfo[OpNum + 1].RegClass != -1
6832       // 4. Next register is not tied to any other operand
6833       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6834 }
6835 
6836 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6837 {
6838   OptionalImmIndexMap OptionalIdx;
6839   unsigned Opc = Inst.getOpcode();
6840 
6841   unsigned I = 1;
6842   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6843   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6844     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6845   }
6846 
6847   for (unsigned E = Operands.size(); I != E; ++I) {
6848     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6849     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6850       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6851     } else if (Op.isInterpSlot() ||
6852                Op.isInterpAttr() ||
6853                Op.isAttrChan()) {
6854       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6855     } else if (Op.isImmModifier()) {
6856       OptionalIdx[Op.getImmTy()] = I;
6857     } else {
6858       llvm_unreachable("unhandled operand type");
6859     }
6860   }
6861 
6862   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6863     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6864   }
6865 
6866   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6867     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6868   }
6869 
6870   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6871     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6872   }
6873 }
6874 
6875 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6876                               OptionalImmIndexMap &OptionalIdx) {
6877   unsigned Opc = Inst.getOpcode();
6878 
6879   unsigned I = 1;
6880   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6881   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6882     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6883   }
6884 
6885   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6886     // This instruction has src modifiers
6887     for (unsigned E = Operands.size(); I != E; ++I) {
6888       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6889       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6890         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6891       } else if (Op.isImmModifier()) {
6892         OptionalIdx[Op.getImmTy()] = I;
6893       } else if (Op.isRegOrImm()) {
6894         Op.addRegOrImmOperands(Inst, 1);
6895       } else {
6896         llvm_unreachable("unhandled operand type");
6897       }
6898     }
6899   } else {
6900     // No src modifiers
6901     for (unsigned E = Operands.size(); I != E; ++I) {
6902       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6903       if (Op.isMod()) {
6904         OptionalIdx[Op.getImmTy()] = I;
6905       } else {
6906         Op.addRegOrImmOperands(Inst, 1);
6907       }
6908     }
6909   }
6910 
6911   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6912     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6913   }
6914 
6915   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6916     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6917   }
6918 
6919   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6920   // it has src2 register operand that is tied to dst operand
6921   // we don't allow modifiers for this operand in assembler so src2_modifiers
6922   // should be 0.
6923   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6924       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6925       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6926       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
6927       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
6928       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6929       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6930       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6931       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
6932       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6933     auto it = Inst.begin();
6934     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6935     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6936     ++it;
6937     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6938   }
6939 }
6940 
6941 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6942   OptionalImmIndexMap OptionalIdx;
6943   cvtVOP3(Inst, Operands, OptionalIdx);
6944 }
6945 
6946 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6947                                const OperandVector &Operands) {
6948   OptionalImmIndexMap OptIdx;
6949   const int Opc = Inst.getOpcode();
6950   const MCInstrDesc &Desc = MII.get(Opc);
6951 
6952   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6953 
6954   cvtVOP3(Inst, Operands, OptIdx);
6955 
6956   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6957     assert(!IsPacked);
6958     Inst.addOperand(Inst.getOperand(0));
6959   }
6960 
6961   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6962   // instruction, and then figure out where to actually put the modifiers
6963 
6964   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6965 
6966   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6967   if (OpSelHiIdx != -1) {
6968     int DefaultVal = IsPacked ? -1 : 0;
6969     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6970                           DefaultVal);
6971   }
6972 
6973   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6974   if (NegLoIdx != -1) {
6975     assert(IsPacked);
6976     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6977     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6978   }
6979 
6980   const int Ops[] = { AMDGPU::OpName::src0,
6981                       AMDGPU::OpName::src1,
6982                       AMDGPU::OpName::src2 };
6983   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6984                          AMDGPU::OpName::src1_modifiers,
6985                          AMDGPU::OpName::src2_modifiers };
6986 
6987   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6988 
6989   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6990   unsigned OpSelHi = 0;
6991   unsigned NegLo = 0;
6992   unsigned NegHi = 0;
6993 
6994   if (OpSelHiIdx != -1) {
6995     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6996   }
6997 
6998   if (NegLoIdx != -1) {
6999     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7000     NegLo = Inst.getOperand(NegLoIdx).getImm();
7001     NegHi = Inst.getOperand(NegHiIdx).getImm();
7002   }
7003 
7004   for (int J = 0; J < 3; ++J) {
7005     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7006     if (OpIdx == -1)
7007       break;
7008 
7009     uint32_t ModVal = 0;
7010 
7011     if ((OpSel & (1 << J)) != 0)
7012       ModVal |= SISrcMods::OP_SEL_0;
7013 
7014     if ((OpSelHi & (1 << J)) != 0)
7015       ModVal |= SISrcMods::OP_SEL_1;
7016 
7017     if ((NegLo & (1 << J)) != 0)
7018       ModVal |= SISrcMods::NEG;
7019 
7020     if ((NegHi & (1 << J)) != 0)
7021       ModVal |= SISrcMods::NEG_HI;
7022 
7023     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7024 
7025     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7026   }
7027 }
7028 
7029 //===----------------------------------------------------------------------===//
7030 // dpp
7031 //===----------------------------------------------------------------------===//
7032 
7033 bool AMDGPUOperand::isDPP8() const {
7034   return isImmTy(ImmTyDPP8);
7035 }
7036 
7037 bool AMDGPUOperand::isDPPCtrl() const {
7038   using namespace AMDGPU::DPP;
7039 
7040   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7041   if (result) {
7042     int64_t Imm = getImm();
7043     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7044            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7045            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7046            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7047            (Imm == DppCtrl::WAVE_SHL1) ||
7048            (Imm == DppCtrl::WAVE_ROL1) ||
7049            (Imm == DppCtrl::WAVE_SHR1) ||
7050            (Imm == DppCtrl::WAVE_ROR1) ||
7051            (Imm == DppCtrl::ROW_MIRROR) ||
7052            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7053            (Imm == DppCtrl::BCAST15) ||
7054            (Imm == DppCtrl::BCAST31) ||
7055            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7056            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7057   }
7058   return false;
7059 }
7060 
7061 //===----------------------------------------------------------------------===//
7062 // mAI
7063 //===----------------------------------------------------------------------===//
7064 
7065 bool AMDGPUOperand::isBLGP() const {
7066   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7067 }
7068 
7069 bool AMDGPUOperand::isCBSZ() const {
7070   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7071 }
7072 
7073 bool AMDGPUOperand::isABID() const {
7074   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7075 }
7076 
7077 bool AMDGPUOperand::isS16Imm() const {
7078   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7079 }
7080 
7081 bool AMDGPUOperand::isU16Imm() const {
7082   return isImm() && isUInt<16>(getImm());
7083 }
7084 
7085 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7086   if (!isGFX10())
7087     return MatchOperand_NoMatch;
7088 
7089   SMLoc S = Parser.getTok().getLoc();
7090 
7091   if (getLexer().isNot(AsmToken::Identifier))
7092     return MatchOperand_NoMatch;
7093   if (getLexer().getTok().getString() != "dim")
7094     return MatchOperand_NoMatch;
7095 
7096   Parser.Lex();
7097   if (getLexer().isNot(AsmToken::Colon))
7098     return MatchOperand_ParseFail;
7099 
7100   Parser.Lex();
7101 
7102   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
7103   // integer.
7104   std::string Token;
7105   if (getLexer().is(AsmToken::Integer)) {
7106     SMLoc Loc = getLexer().getTok().getEndLoc();
7107     Token = std::string(getLexer().getTok().getString());
7108     Parser.Lex();
7109     if (getLexer().getTok().getLoc() != Loc)
7110       return MatchOperand_ParseFail;
7111   }
7112   if (getLexer().isNot(AsmToken::Identifier))
7113     return MatchOperand_ParseFail;
7114   Token += getLexer().getTok().getString();
7115 
7116   StringRef DimId = Token;
7117   if (DimId.startswith("SQ_RSRC_IMG_"))
7118     DimId = DimId.substr(12);
7119 
7120   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7121   if (!DimInfo)
7122     return MatchOperand_ParseFail;
7123 
7124   Parser.Lex();
7125 
7126   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
7127                                               AMDGPUOperand::ImmTyDim));
7128   return MatchOperand_Success;
7129 }
7130 
7131 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7132   SMLoc S = Parser.getTok().getLoc();
7133   StringRef Prefix;
7134 
7135   if (getLexer().getKind() == AsmToken::Identifier) {
7136     Prefix = Parser.getTok().getString();
7137   } else {
7138     return MatchOperand_NoMatch;
7139   }
7140 
7141   if (Prefix != "dpp8")
7142     return parseDPPCtrl(Operands);
7143   if (!isGFX10())
7144     return MatchOperand_NoMatch;
7145 
7146   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7147 
7148   int64_t Sels[8];
7149 
7150   Parser.Lex();
7151   if (getLexer().isNot(AsmToken::Colon))
7152     return MatchOperand_ParseFail;
7153 
7154   Parser.Lex();
7155   if (getLexer().isNot(AsmToken::LBrac))
7156     return MatchOperand_ParseFail;
7157 
7158   Parser.Lex();
7159   if (getParser().parseAbsoluteExpression(Sels[0]))
7160     return MatchOperand_ParseFail;
7161   if (0 > Sels[0] || 7 < Sels[0])
7162     return MatchOperand_ParseFail;
7163 
7164   for (size_t i = 1; i < 8; ++i) {
7165     if (getLexer().isNot(AsmToken::Comma))
7166       return MatchOperand_ParseFail;
7167 
7168     Parser.Lex();
7169     if (getParser().parseAbsoluteExpression(Sels[i]))
7170       return MatchOperand_ParseFail;
7171     if (0 > Sels[i] || 7 < Sels[i])
7172       return MatchOperand_ParseFail;
7173   }
7174 
7175   if (getLexer().isNot(AsmToken::RBrac))
7176     return MatchOperand_ParseFail;
7177   Parser.Lex();
7178 
7179   unsigned DPP8 = 0;
7180   for (size_t i = 0; i < 8; ++i)
7181     DPP8 |= (Sels[i] << (i * 3));
7182 
7183   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7184   return MatchOperand_Success;
7185 }
7186 
7187 OperandMatchResultTy
7188 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7189   using namespace AMDGPU::DPP;
7190 
7191   SMLoc S = Parser.getTok().getLoc();
7192   StringRef Prefix;
7193   int64_t Int;
7194 
7195   if (getLexer().getKind() == AsmToken::Identifier) {
7196     Prefix = Parser.getTok().getString();
7197   } else {
7198     return MatchOperand_NoMatch;
7199   }
7200 
7201   if (Prefix == "row_mirror") {
7202     Int = DppCtrl::ROW_MIRROR;
7203     Parser.Lex();
7204   } else if (Prefix == "row_half_mirror") {
7205     Int = DppCtrl::ROW_HALF_MIRROR;
7206     Parser.Lex();
7207   } else {
7208     // Check to prevent parseDPPCtrlOps from eating invalid tokens
7209     if (Prefix != "quad_perm"
7210         && Prefix != "row_shl"
7211         && Prefix != "row_shr"
7212         && Prefix != "row_ror"
7213         && Prefix != "wave_shl"
7214         && Prefix != "wave_rol"
7215         && Prefix != "wave_shr"
7216         && Prefix != "wave_ror"
7217         && Prefix != "row_bcast"
7218         && Prefix != "row_share"
7219         && Prefix != "row_xmask") {
7220       return MatchOperand_NoMatch;
7221     }
7222 
7223     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
7224       return MatchOperand_NoMatch;
7225 
7226     if (!isVI() && !isGFX9() &&
7227         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
7228          Prefix == "wave_rol" || Prefix == "wave_ror" ||
7229          Prefix == "row_bcast"))
7230       return MatchOperand_NoMatch;
7231 
7232     Parser.Lex();
7233     if (getLexer().isNot(AsmToken::Colon))
7234       return MatchOperand_ParseFail;
7235 
7236     if (Prefix == "quad_perm") {
7237       // quad_perm:[%d,%d,%d,%d]
7238       Parser.Lex();
7239       if (getLexer().isNot(AsmToken::LBrac))
7240         return MatchOperand_ParseFail;
7241       Parser.Lex();
7242 
7243       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
7244         return MatchOperand_ParseFail;
7245 
7246       for (int i = 0; i < 3; ++i) {
7247         if (getLexer().isNot(AsmToken::Comma))
7248           return MatchOperand_ParseFail;
7249         Parser.Lex();
7250 
7251         int64_t Temp;
7252         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
7253           return MatchOperand_ParseFail;
7254         const int shift = i*2 + 2;
7255         Int += (Temp << shift);
7256       }
7257 
7258       if (getLexer().isNot(AsmToken::RBrac))
7259         return MatchOperand_ParseFail;
7260       Parser.Lex();
7261     } else {
7262       // sel:%d
7263       Parser.Lex();
7264       if (getParser().parseAbsoluteExpression(Int))
7265         return MatchOperand_ParseFail;
7266 
7267       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
7268         Int |= DppCtrl::ROW_SHL0;
7269       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
7270         Int |= DppCtrl::ROW_SHR0;
7271       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
7272         Int |= DppCtrl::ROW_ROR0;
7273       } else if (Prefix == "wave_shl" && 1 == Int) {
7274         Int = DppCtrl::WAVE_SHL1;
7275       } else if (Prefix == "wave_rol" && 1 == Int) {
7276         Int = DppCtrl::WAVE_ROL1;
7277       } else if (Prefix == "wave_shr" && 1 == Int) {
7278         Int = DppCtrl::WAVE_SHR1;
7279       } else if (Prefix == "wave_ror" && 1 == Int) {
7280         Int = DppCtrl::WAVE_ROR1;
7281       } else if (Prefix == "row_bcast") {
7282         if (Int == 15) {
7283           Int = DppCtrl::BCAST15;
7284         } else if (Int == 31) {
7285           Int = DppCtrl::BCAST31;
7286         } else {
7287           return MatchOperand_ParseFail;
7288         }
7289       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
7290         Int |= DppCtrl::ROW_SHARE_FIRST;
7291       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
7292         Int |= DppCtrl::ROW_XMASK_FIRST;
7293       } else {
7294         return MatchOperand_ParseFail;
7295       }
7296     }
7297   }
7298 
7299   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
7300   return MatchOperand_Success;
7301 }
7302 
7303 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7304   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7305 }
7306 
7307 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7308   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7309 }
7310 
7311 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7312   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7313 }
7314 
7315 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7316   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7317 }
7318 
7319 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7320   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7321 }
7322 
7323 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7324   OptionalImmIndexMap OptionalIdx;
7325 
7326   unsigned I = 1;
7327   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7328   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7329     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7330   }
7331 
7332   int Fi = 0;
7333   for (unsigned E = Operands.size(); I != E; ++I) {
7334     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7335                                             MCOI::TIED_TO);
7336     if (TiedTo != -1) {
7337       assert((unsigned)TiedTo < Inst.getNumOperands());
7338       // handle tied old or src2 for MAC instructions
7339       Inst.addOperand(Inst.getOperand(TiedTo));
7340     }
7341     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7342     // Add the register arguments
7343     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7344       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7345       // Skip it.
7346       continue;
7347     }
7348 
7349     if (IsDPP8) {
7350       if (Op.isDPP8()) {
7351         Op.addImmOperands(Inst, 1);
7352       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7353         Op.addRegWithFPInputModsOperands(Inst, 2);
7354       } else if (Op.isFI()) {
7355         Fi = Op.getImm();
7356       } else if (Op.isReg()) {
7357         Op.addRegOperands(Inst, 1);
7358       } else {
7359         llvm_unreachable("Invalid operand type");
7360       }
7361     } else {
7362       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7363         Op.addRegWithFPInputModsOperands(Inst, 2);
7364       } else if (Op.isDPPCtrl()) {
7365         Op.addImmOperands(Inst, 1);
7366       } else if (Op.isImm()) {
7367         // Handle optional arguments
7368         OptionalIdx[Op.getImmTy()] = I;
7369       } else {
7370         llvm_unreachable("Invalid operand type");
7371       }
7372     }
7373   }
7374 
7375   if (IsDPP8) {
7376     using namespace llvm::AMDGPU::DPP;
7377     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7378   } else {
7379     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7380     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7381     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7382     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7383       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7384     }
7385   }
7386 }
7387 
7388 //===----------------------------------------------------------------------===//
7389 // sdwa
7390 //===----------------------------------------------------------------------===//
7391 
7392 OperandMatchResultTy
7393 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7394                               AMDGPUOperand::ImmTy Type) {
7395   using namespace llvm::AMDGPU::SDWA;
7396 
7397   SMLoc S = Parser.getTok().getLoc();
7398   StringRef Value;
7399   OperandMatchResultTy res;
7400 
7401   res = parseStringWithPrefix(Prefix, Value);
7402   if (res != MatchOperand_Success) {
7403     return res;
7404   }
7405 
7406   int64_t Int;
7407   Int = StringSwitch<int64_t>(Value)
7408         .Case("BYTE_0", SdwaSel::BYTE_0)
7409         .Case("BYTE_1", SdwaSel::BYTE_1)
7410         .Case("BYTE_2", SdwaSel::BYTE_2)
7411         .Case("BYTE_3", SdwaSel::BYTE_3)
7412         .Case("WORD_0", SdwaSel::WORD_0)
7413         .Case("WORD_1", SdwaSel::WORD_1)
7414         .Case("DWORD", SdwaSel::DWORD)
7415         .Default(0xffffffff);
7416   Parser.Lex(); // eat last token
7417 
7418   if (Int == 0xffffffff) {
7419     return MatchOperand_ParseFail;
7420   }
7421 
7422   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
7423   return MatchOperand_Success;
7424 }
7425 
7426 OperandMatchResultTy
7427 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
7428   using namespace llvm::AMDGPU::SDWA;
7429 
7430   SMLoc S = Parser.getTok().getLoc();
7431   StringRef Value;
7432   OperandMatchResultTy res;
7433 
7434   res = parseStringWithPrefix("dst_unused", Value);
7435   if (res != MatchOperand_Success) {
7436     return res;
7437   }
7438 
7439   int64_t Int;
7440   Int = StringSwitch<int64_t>(Value)
7441         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
7442         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
7443         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
7444         .Default(0xffffffff);
7445   Parser.Lex(); // eat last token
7446 
7447   if (Int == 0xffffffff) {
7448     return MatchOperand_ParseFail;
7449   }
7450 
7451   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
7452   return MatchOperand_Success;
7453 }
7454 
7455 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
7456   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
7457 }
7458 
7459 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
7460   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
7461 }
7462 
7463 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
7464   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
7465 }
7466 
7467 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
7468   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
7469 }
7470 
7471 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
7472   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
7473 }
7474 
7475 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
7476                               uint64_t BasicInstType,
7477                               bool SkipDstVcc,
7478                               bool SkipSrcVcc) {
7479   using namespace llvm::AMDGPU::SDWA;
7480 
7481   OptionalImmIndexMap OptionalIdx;
7482   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
7483   bool SkippedVcc = false;
7484 
7485   unsigned I = 1;
7486   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7487   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7488     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7489   }
7490 
7491   for (unsigned E = Operands.size(); I != E; ++I) {
7492     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7493     if (SkipVcc && !SkippedVcc && Op.isReg() &&
7494         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7495       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7496       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7497       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7498       // Skip VCC only if we didn't skip it on previous iteration.
7499       // Note that src0 and src1 occupy 2 slots each because of modifiers.
7500       if (BasicInstType == SIInstrFlags::VOP2 &&
7501           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7502            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7503         SkippedVcc = true;
7504         continue;
7505       } else if (BasicInstType == SIInstrFlags::VOPC &&
7506                  Inst.getNumOperands() == 0) {
7507         SkippedVcc = true;
7508         continue;
7509       }
7510     }
7511     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7512       Op.addRegOrImmWithInputModsOperands(Inst, 2);
7513     } else if (Op.isImm()) {
7514       // Handle optional arguments
7515       OptionalIdx[Op.getImmTy()] = I;
7516     } else {
7517       llvm_unreachable("Invalid operand type");
7518     }
7519     SkippedVcc = false;
7520   }
7521 
7522   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7523       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7524       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7525     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7526     switch (BasicInstType) {
7527     case SIInstrFlags::VOP1:
7528       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7529       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7530         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7531       }
7532       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7533       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7534       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7535       break;
7536 
7537     case SIInstrFlags::VOP2:
7538       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7539       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7540         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7541       }
7542       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7543       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7544       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7545       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7546       break;
7547 
7548     case SIInstrFlags::VOPC:
7549       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7550         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7551       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7552       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7553       break;
7554 
7555     default:
7556       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7557     }
7558   }
7559 
7560   // special case v_mac_{f16, f32}:
7561   // it has src2 register operand that is tied to dst operand
7562   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7563       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7564     auto it = Inst.begin();
7565     std::advance(
7566       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7567     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7568   }
7569 }
7570 
7571 //===----------------------------------------------------------------------===//
7572 // mAI
7573 //===----------------------------------------------------------------------===//
7574 
7575 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7576   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7577 }
7578 
7579 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7580   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7581 }
7582 
7583 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7584   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7585 }
7586 
7587 /// Force static initialization.
7588 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7589   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7590   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7591 }
7592 
7593 #define GET_REGISTER_MATCHER
7594 #define GET_MATCHER_IMPLEMENTATION
7595 #define GET_MNEMONIC_SPELL_CHECKER
7596 #define GET_MNEMONIC_CHECKER
7597 #include "AMDGPUGenAsmMatcher.inc"
7598 
7599 // This fuction should be defined after auto-generated include so that we have
7600 // MatchClassKind enum defined
7601 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7602                                                      unsigned Kind) {
7603   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7604   // But MatchInstructionImpl() expects to meet token and fails to validate
7605   // operand. This method checks if we are given immediate operand but expect to
7606   // get corresponding token.
7607   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7608   switch (Kind) {
7609   case MCK_addr64:
7610     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7611   case MCK_gds:
7612     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7613   case MCK_lds:
7614     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7615   case MCK_glc:
7616     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7617   case MCK_idxen:
7618     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7619   case MCK_offen:
7620     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7621   case MCK_SSrcB32:
7622     // When operands have expression values, they will return true for isToken,
7623     // because it is not possible to distinguish between a token and an
7624     // expression at parse time. MatchInstructionImpl() will always try to
7625     // match an operand as a token, when isToken returns true, and when the
7626     // name of the expression is not a valid token, the match will fail,
7627     // so we need to handle it here.
7628     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7629   case MCK_SSrcF32:
7630     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7631   case MCK_SoppBrTarget:
7632     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7633   case MCK_VReg32OrOff:
7634     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7635   case MCK_InterpSlot:
7636     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7637   case MCK_Attr:
7638     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7639   case MCK_AttrChan:
7640     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7641   case MCK_ImmSMEMOffset:
7642     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
7643   case MCK_SReg_64:
7644   case MCK_SReg_64_XEXEC:
7645     // Null is defined as a 32-bit register but
7646     // it should also be enabled with 64-bit operands.
7647     // The following code enables it for SReg_64 operands
7648     // used as source and destination. Remaining source
7649     // operands are handled in isInlinableImm.
7650     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7651   default:
7652     return Match_InvalidOperand;
7653   }
7654 }
7655 
7656 //===----------------------------------------------------------------------===//
7657 // endpgm
7658 //===----------------------------------------------------------------------===//
7659 
7660 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7661   SMLoc S = Parser.getTok().getLoc();
7662   int64_t Imm = 0;
7663 
7664   if (!parseExpr(Imm)) {
7665     // The operand is optional, if not present default to 0
7666     Imm = 0;
7667   }
7668 
7669   if (!isUInt<16>(Imm)) {
7670     Error(S, "expected a 16-bit value");
7671     return MatchOperand_ParseFail;
7672   }
7673 
7674   Operands.push_back(
7675       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7676   return MatchOperand_Success;
7677 }
7678 
7679 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7680