1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyA16,
167     ImmTyLWE,
168     ImmTyExpTgt,
169     ImmTyExpCompr,
170     ImmTyExpVM,
171     ImmTyFORMAT,
172     ImmTyHwreg,
173     ImmTyOff,
174     ImmTySendMsg,
175     ImmTyInterpSlot,
176     ImmTyInterpAttr,
177     ImmTyAttrChan,
178     ImmTyOpSel,
179     ImmTyOpSelHi,
180     ImmTyNegLo,
181     ImmTyNegHi,
182     ImmTySwizzle,
183     ImmTyGprIdxMode,
184     ImmTyHigh,
185     ImmTyBLGP,
186     ImmTyCBSZ,
187     ImmTyABID,
188     ImmTyEndpgm,
189   };
190 
191 private:
192   struct TokOp {
193     const char *Data;
194     unsigned Length;
195   };
196 
197   struct ImmOp {
198     int64_t Val;
199     ImmTy Type;
200     bool IsFPImm;
201     Modifiers Mods;
202   };
203 
204   struct RegOp {
205     unsigned RegNo;
206     Modifiers Mods;
207   };
208 
209   union {
210     TokOp Tok;
211     ImmOp Imm;
212     RegOp Reg;
213     const MCExpr *Expr;
214   };
215 
216 public:
217   bool isToken() const override {
218     if (Kind == Token)
219       return true;
220 
221     // When parsing operands, we can't always tell if something was meant to be
222     // a token, like 'gds', or an expression that references a global variable.
223     // In this case, we assume the string is an expression, and if we need to
224     // interpret is a token, then we treat the symbol name as the token.
225     return isSymbolRefExpr();
226   }
227 
228   bool isSymbolRefExpr() const {
229     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230   }
231 
232   bool isImm() const override {
233     return Kind == Immediate;
234   }
235 
236   bool isInlinableImm(MVT type) const;
237   bool isLiteralImm(MVT type) const;
238 
239   bool isRegKind() const {
240     return Kind == Register;
241   }
242 
243   bool isReg() const override {
244     return isRegKind() && !hasModifiers();
245   }
246 
247   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
248     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
249   }
250 
251   bool isRegOrImmWithInt16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
253   }
254 
255   bool isRegOrImmWithInt32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
257   }
258 
259   bool isRegOrImmWithInt64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
261   }
262 
263   bool isRegOrImmWithFP16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
265   }
266 
267   bool isRegOrImmWithFP32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
269   }
270 
271   bool isRegOrImmWithFP64InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
273   }
274 
275   bool isVReg() const {
276     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
277            isRegClass(AMDGPU::VReg_64RegClassID) ||
278            isRegClass(AMDGPU::VReg_96RegClassID) ||
279            isRegClass(AMDGPU::VReg_128RegClassID) ||
280            isRegClass(AMDGPU::VReg_160RegClassID) ||
281            isRegClass(AMDGPU::VReg_192RegClassID) ||
282            isRegClass(AMDGPU::VReg_256RegClassID) ||
283            isRegClass(AMDGPU::VReg_512RegClassID) ||
284            isRegClass(AMDGPU::VReg_1024RegClassID);
285   }
286 
287   bool isVReg32() const {
288     return isRegClass(AMDGPU::VGPR_32RegClassID);
289   }
290 
291   bool isVReg32OrOff() const {
292     return isOff() || isVReg32();
293   }
294 
295   bool isNull() const {
296     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
297   }
298 
299   bool isSDWAOperand(MVT type) const;
300   bool isSDWAFP16Operand() const;
301   bool isSDWAFP32Operand() const;
302   bool isSDWAInt16Operand() const;
303   bool isSDWAInt32Operand() const;
304 
305   bool isImmTy(ImmTy ImmT) const {
306     return isImm() && Imm.Type == ImmT;
307   }
308 
309   bool isImmModifier() const {
310     return isImm() && Imm.Type != ImmTyNone;
311   }
312 
313   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
314   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
315   bool isDMask() const { return isImmTy(ImmTyDMask); }
316   bool isDim() const { return isImmTy(ImmTyDim); }
317   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
318   bool isDA() const { return isImmTy(ImmTyDA); }
319   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
320   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
321   bool isLWE() const { return isImmTy(ImmTyLWE); }
322   bool isOff() const { return isImmTy(ImmTyOff); }
323   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
324   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
325   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
326   bool isOffen() const { return isImmTy(ImmTyOffen); }
327   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
328   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
329   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
330   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
331   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
332 
333   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
334   bool isGDS() const { return isImmTy(ImmTyGDS); }
335   bool isLDS() const { return isImmTy(ImmTyLDS); }
336   bool isDLC() const { return isImmTy(ImmTyDLC); }
337   bool isGLC() const { return isImmTy(ImmTyGLC); }
338   bool isSLC() const { return isImmTy(ImmTySLC); }
339   bool isSWZ() const { return isImmTy(ImmTySWZ); }
340   bool isTFE() const { return isImmTy(ImmTyTFE); }
341   bool isD16() const { return isImmTy(ImmTyD16); }
342   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
343   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
344   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
345   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
346   bool isFI() const { return isImmTy(ImmTyDppFi); }
347   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
348   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
349   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
350   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
351   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
352   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
353   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
354   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
355   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
356   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
357   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
358   bool isHigh() const { return isImmTy(ImmTyHigh); }
359 
360   bool isMod() const {
361     return isClampSI() || isOModSI();
362   }
363 
364   bool isRegOrImm() const {
365     return isReg() || isImm();
366   }
367 
368   bool isRegClass(unsigned RCID) const;
369 
370   bool isInlineValue() const;
371 
372   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
373     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
374   }
375 
376   bool isSCSrcB16() const {
377     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
378   }
379 
380   bool isSCSrcV2B16() const {
381     return isSCSrcB16();
382   }
383 
384   bool isSCSrcB32() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
386   }
387 
388   bool isSCSrcB64() const {
389     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
390   }
391 
392   bool isBoolReg() const;
393 
394   bool isSCSrcF16() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
396   }
397 
398   bool isSCSrcV2F16() const {
399     return isSCSrcF16();
400   }
401 
402   bool isSCSrcF32() const {
403     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
404   }
405 
406   bool isSCSrcF64() const {
407     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
408   }
409 
410   bool isSSrcB32() const {
411     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
412   }
413 
414   bool isSSrcB16() const {
415     return isSCSrcB16() || isLiteralImm(MVT::i16);
416   }
417 
418   bool isSSrcV2B16() const {
419     llvm_unreachable("cannot happen");
420     return isSSrcB16();
421   }
422 
423   bool isSSrcB64() const {
424     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
425     // See isVSrc64().
426     return isSCSrcB64() || isLiteralImm(MVT::i64);
427   }
428 
429   bool isSSrcF32() const {
430     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
431   }
432 
433   bool isSSrcF64() const {
434     return isSCSrcB64() || isLiteralImm(MVT::f64);
435   }
436 
437   bool isSSrcF16() const {
438     return isSCSrcB16() || isLiteralImm(MVT::f16);
439   }
440 
441   bool isSSrcV2F16() const {
442     llvm_unreachable("cannot happen");
443     return isSSrcF16();
444   }
445 
446   bool isSSrcOrLdsB32() const {
447     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
448            isLiteralImm(MVT::i32) || isExpr();
449   }
450 
451   bool isVCSrcB32() const {
452     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
453   }
454 
455   bool isVCSrcB64() const {
456     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
457   }
458 
459   bool isVCSrcB16() const {
460     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
461   }
462 
463   bool isVCSrcV2B16() const {
464     return isVCSrcB16();
465   }
466 
467   bool isVCSrcF32() const {
468     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
469   }
470 
471   bool isVCSrcF64() const {
472     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
473   }
474 
475   bool isVCSrcF16() const {
476     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
477   }
478 
479   bool isVCSrcV2F16() const {
480     return isVCSrcF16();
481   }
482 
483   bool isVSrcB32() const {
484     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
485   }
486 
487   bool isVSrcB64() const {
488     return isVCSrcF64() || isLiteralImm(MVT::i64);
489   }
490 
491   bool isVSrcB16() const {
492     return isVCSrcB16() || isLiteralImm(MVT::i16);
493   }
494 
495   bool isVSrcV2B16() const {
496     return isVSrcB16() || isLiteralImm(MVT::v2i16);
497   }
498 
499   bool isVSrcF32() const {
500     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
501   }
502 
503   bool isVSrcF64() const {
504     return isVCSrcF64() || isLiteralImm(MVT::f64);
505   }
506 
507   bool isVSrcF16() const {
508     return isVCSrcF16() || isLiteralImm(MVT::f16);
509   }
510 
511   bool isVSrcV2F16() const {
512     return isVSrcF16() || isLiteralImm(MVT::v2f16);
513   }
514 
515   bool isVISrcB32() const {
516     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
517   }
518 
519   bool isVISrcB16() const {
520     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
521   }
522 
523   bool isVISrcV2B16() const {
524     return isVISrcB16();
525   }
526 
527   bool isVISrcF32() const {
528     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
529   }
530 
531   bool isVISrcF16() const {
532     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
533   }
534 
535   bool isVISrcV2F16() const {
536     return isVISrcF16() || isVISrcB32();
537   }
538 
539   bool isAISrcB32() const {
540     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
541   }
542 
543   bool isAISrcB16() const {
544     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
545   }
546 
547   bool isAISrcV2B16() const {
548     return isAISrcB16();
549   }
550 
551   bool isAISrcF32() const {
552     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
553   }
554 
555   bool isAISrcF16() const {
556     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
557   }
558 
559   bool isAISrcV2F16() const {
560     return isAISrcF16() || isAISrcB32();
561   }
562 
563   bool isAISrc_128B32() const {
564     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
565   }
566 
567   bool isAISrc_128B16() const {
568     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
569   }
570 
571   bool isAISrc_128V2B16() const {
572     return isAISrc_128B16();
573   }
574 
575   bool isAISrc_128F32() const {
576     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
577   }
578 
579   bool isAISrc_128F16() const {
580     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
581   }
582 
583   bool isAISrc_128V2F16() const {
584     return isAISrc_128F16() || isAISrc_128B32();
585   }
586 
587   bool isAISrc_512B32() const {
588     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
589   }
590 
591   bool isAISrc_512B16() const {
592     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
593   }
594 
595   bool isAISrc_512V2B16() const {
596     return isAISrc_512B16();
597   }
598 
599   bool isAISrc_512F32() const {
600     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
601   }
602 
603   bool isAISrc_512F16() const {
604     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
605   }
606 
607   bool isAISrc_512V2F16() const {
608     return isAISrc_512F16() || isAISrc_512B32();
609   }
610 
611   bool isAISrc_1024B32() const {
612     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
613   }
614 
615   bool isAISrc_1024B16() const {
616     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
617   }
618 
619   bool isAISrc_1024V2B16() const {
620     return isAISrc_1024B16();
621   }
622 
623   bool isAISrc_1024F32() const {
624     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
625   }
626 
627   bool isAISrc_1024F16() const {
628     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
629   }
630 
631   bool isAISrc_1024V2F16() const {
632     return isAISrc_1024F16() || isAISrc_1024B32();
633   }
634 
635   bool isKImmFP32() const {
636     return isLiteralImm(MVT::f32);
637   }
638 
639   bool isKImmFP16() const {
640     return isLiteralImm(MVT::f16);
641   }
642 
643   bool isMem() const override {
644     return false;
645   }
646 
647   bool isExpr() const {
648     return Kind == Expression;
649   }
650 
651   bool isSoppBrTarget() const {
652     return isExpr() || isImm();
653   }
654 
655   bool isSWaitCnt() const;
656   bool isHwreg() const;
657   bool isSendMsg() const;
658   bool isSwizzle() const;
659   bool isSMRDOffset8() const;
660   bool isSMEMOffset() const;
661   bool isSMRDLiteralOffset() const;
662   bool isDPP8() const;
663   bool isDPPCtrl() const;
664   bool isBLGP() const;
665   bool isCBSZ() const;
666   bool isABID() const;
667   bool isGPRIdxMode() const;
668   bool isS16Imm() const;
669   bool isU16Imm() const;
670   bool isEndpgm() const;
671 
672   StringRef getExpressionAsToken() const {
673     assert(isExpr());
674     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
675     return S->getSymbol().getName();
676   }
677 
678   StringRef getToken() const {
679     assert(isToken());
680 
681     if (Kind == Expression)
682       return getExpressionAsToken();
683 
684     return StringRef(Tok.Data, Tok.Length);
685   }
686 
687   int64_t getImm() const {
688     assert(isImm());
689     return Imm.Val;
690   }
691 
692   void setImm(int64_t Val) {
693     assert(isImm());
694     Imm.Val = Val;
695   }
696 
697   ImmTy getImmTy() const {
698     assert(isImm());
699     return Imm.Type;
700   }
701 
702   unsigned getReg() const override {
703     assert(isRegKind());
704     return Reg.RegNo;
705   }
706 
707   SMLoc getStartLoc() const override {
708     return StartLoc;
709   }
710 
711   SMLoc getEndLoc() const override {
712     return EndLoc;
713   }
714 
715   SMRange getLocRange() const {
716     return SMRange(StartLoc, EndLoc);
717   }
718 
719   Modifiers getModifiers() const {
720     assert(isRegKind() || isImmTy(ImmTyNone));
721     return isRegKind() ? Reg.Mods : Imm.Mods;
722   }
723 
724   void setModifiers(Modifiers Mods) {
725     assert(isRegKind() || isImmTy(ImmTyNone));
726     if (isRegKind())
727       Reg.Mods = Mods;
728     else
729       Imm.Mods = Mods;
730   }
731 
732   bool hasModifiers() const {
733     return getModifiers().hasModifiers();
734   }
735 
736   bool hasFPModifiers() const {
737     return getModifiers().hasFPModifiers();
738   }
739 
740   bool hasIntModifiers() const {
741     return getModifiers().hasIntModifiers();
742   }
743 
744   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
745 
746   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
747 
748   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
749 
750   template <unsigned Bitwidth>
751   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
752 
753   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
754     addKImmFPOperands<16>(Inst, N);
755   }
756 
757   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
758     addKImmFPOperands<32>(Inst, N);
759   }
760 
761   void addRegOperands(MCInst &Inst, unsigned N) const;
762 
763   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
764     addRegOperands(Inst, N);
765   }
766 
767   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
768     if (isRegKind())
769       addRegOperands(Inst, N);
770     else if (isExpr())
771       Inst.addOperand(MCOperand::createExpr(Expr));
772     else
773       addImmOperands(Inst, N);
774   }
775 
776   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
777     Modifiers Mods = getModifiers();
778     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
779     if (isRegKind()) {
780       addRegOperands(Inst, N);
781     } else {
782       addImmOperands(Inst, N, false);
783     }
784   }
785 
786   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
787     assert(!hasIntModifiers());
788     addRegOrImmWithInputModsOperands(Inst, N);
789   }
790 
791   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
792     assert(!hasFPModifiers());
793     addRegOrImmWithInputModsOperands(Inst, N);
794   }
795 
796   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
797     Modifiers Mods = getModifiers();
798     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
799     assert(isRegKind());
800     addRegOperands(Inst, N);
801   }
802 
803   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
804     assert(!hasIntModifiers());
805     addRegWithInputModsOperands(Inst, N);
806   }
807 
808   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
809     assert(!hasFPModifiers());
810     addRegWithInputModsOperands(Inst, N);
811   }
812 
813   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
814     if (isImm())
815       addImmOperands(Inst, N);
816     else {
817       assert(isExpr());
818       Inst.addOperand(MCOperand::createExpr(Expr));
819     }
820   }
821 
822   static void printImmTy(raw_ostream& OS, ImmTy Type) {
823     switch (Type) {
824     case ImmTyNone: OS << "None"; break;
825     case ImmTyGDS: OS << "GDS"; break;
826     case ImmTyLDS: OS << "LDS"; break;
827     case ImmTyOffen: OS << "Offen"; break;
828     case ImmTyIdxen: OS << "Idxen"; break;
829     case ImmTyAddr64: OS << "Addr64"; break;
830     case ImmTyOffset: OS << "Offset"; break;
831     case ImmTyInstOffset: OS << "InstOffset"; break;
832     case ImmTyOffset0: OS << "Offset0"; break;
833     case ImmTyOffset1: OS << "Offset1"; break;
834     case ImmTyDLC: OS << "DLC"; break;
835     case ImmTyGLC: OS << "GLC"; break;
836     case ImmTySLC: OS << "SLC"; break;
837     case ImmTySWZ: OS << "SWZ"; break;
838     case ImmTyTFE: OS << "TFE"; break;
839     case ImmTyD16: OS << "D16"; break;
840     case ImmTyFORMAT: OS << "FORMAT"; break;
841     case ImmTyClampSI: OS << "ClampSI"; break;
842     case ImmTyOModSI: OS << "OModSI"; break;
843     case ImmTyDPP8: OS << "DPP8"; break;
844     case ImmTyDppCtrl: OS << "DppCtrl"; break;
845     case ImmTyDppRowMask: OS << "DppRowMask"; break;
846     case ImmTyDppBankMask: OS << "DppBankMask"; break;
847     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
848     case ImmTyDppFi: OS << "FI"; break;
849     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
850     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
851     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
852     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
853     case ImmTyDMask: OS << "DMask"; break;
854     case ImmTyDim: OS << "Dim"; break;
855     case ImmTyUNorm: OS << "UNorm"; break;
856     case ImmTyDA: OS << "DA"; break;
857     case ImmTyR128A16: OS << "R128A16"; break;
858     case ImmTyA16: OS << "A16"; break;
859     case ImmTyLWE: OS << "LWE"; break;
860     case ImmTyOff: OS << "Off"; break;
861     case ImmTyExpTgt: OS << "ExpTgt"; break;
862     case ImmTyExpCompr: OS << "ExpCompr"; break;
863     case ImmTyExpVM: OS << "ExpVM"; break;
864     case ImmTyHwreg: OS << "Hwreg"; break;
865     case ImmTySendMsg: OS << "SendMsg"; break;
866     case ImmTyInterpSlot: OS << "InterpSlot"; break;
867     case ImmTyInterpAttr: OS << "InterpAttr"; break;
868     case ImmTyAttrChan: OS << "AttrChan"; break;
869     case ImmTyOpSel: OS << "OpSel"; break;
870     case ImmTyOpSelHi: OS << "OpSelHi"; break;
871     case ImmTyNegLo: OS << "NegLo"; break;
872     case ImmTyNegHi: OS << "NegHi"; break;
873     case ImmTySwizzle: OS << "Swizzle"; break;
874     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
875     case ImmTyHigh: OS << "High"; break;
876     case ImmTyBLGP: OS << "BLGP"; break;
877     case ImmTyCBSZ: OS << "CBSZ"; break;
878     case ImmTyABID: OS << "ABID"; break;
879     case ImmTyEndpgm: OS << "Endpgm"; break;
880     }
881   }
882 
883   void print(raw_ostream &OS) const override {
884     switch (Kind) {
885     case Register:
886       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
887       break;
888     case Immediate:
889       OS << '<' << getImm();
890       if (getImmTy() != ImmTyNone) {
891         OS << " type: "; printImmTy(OS, getImmTy());
892       }
893       OS << " mods: " << Imm.Mods << '>';
894       break;
895     case Token:
896       OS << '\'' << getToken() << '\'';
897       break;
898     case Expression:
899       OS << "<expr " << *Expr << '>';
900       break;
901     }
902   }
903 
904   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
905                                       int64_t Val, SMLoc Loc,
906                                       ImmTy Type = ImmTyNone,
907                                       bool IsFPImm = false) {
908     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
909     Op->Imm.Val = Val;
910     Op->Imm.IsFPImm = IsFPImm;
911     Op->Imm.Type = Type;
912     Op->Imm.Mods = Modifiers();
913     Op->StartLoc = Loc;
914     Op->EndLoc = Loc;
915     return Op;
916   }
917 
918   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
919                                         StringRef Str, SMLoc Loc,
920                                         bool HasExplicitEncodingSize = true) {
921     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
922     Res->Tok.Data = Str.data();
923     Res->Tok.Length = Str.size();
924     Res->StartLoc = Loc;
925     Res->EndLoc = Loc;
926     return Res;
927   }
928 
929   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
930                                       unsigned RegNo, SMLoc S,
931                                       SMLoc E) {
932     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
933     Op->Reg.RegNo = RegNo;
934     Op->Reg.Mods = Modifiers();
935     Op->StartLoc = S;
936     Op->EndLoc = E;
937     return Op;
938   }
939 
940   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
941                                        const class MCExpr *Expr, SMLoc S) {
942     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
943     Op->Expr = Expr;
944     Op->StartLoc = S;
945     Op->EndLoc = S;
946     return Op;
947   }
948 };
949 
950 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
951   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
952   return OS;
953 }
954 
955 //===----------------------------------------------------------------------===//
956 // AsmParser
957 //===----------------------------------------------------------------------===//
958 
959 // Holds info related to the current kernel, e.g. count of SGPRs used.
960 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
961 // .amdgpu_hsa_kernel or at EOF.
962 class KernelScopeInfo {
963   int SgprIndexUnusedMin = -1;
964   int VgprIndexUnusedMin = -1;
965   MCContext *Ctx = nullptr;
966 
967   void usesSgprAt(int i) {
968     if (i >= SgprIndexUnusedMin) {
969       SgprIndexUnusedMin = ++i;
970       if (Ctx) {
971         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
972         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
973       }
974     }
975   }
976 
977   void usesVgprAt(int i) {
978     if (i >= VgprIndexUnusedMin) {
979       VgprIndexUnusedMin = ++i;
980       if (Ctx) {
981         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
982         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
983       }
984     }
985   }
986 
987 public:
988   KernelScopeInfo() = default;
989 
990   void initialize(MCContext &Context) {
991     Ctx = &Context;
992     usesSgprAt(SgprIndexUnusedMin = -1);
993     usesVgprAt(VgprIndexUnusedMin = -1);
994   }
995 
996   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
997     switch (RegKind) {
998       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
999       case IS_AGPR: // fall through
1000       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1001       default: break;
1002     }
1003   }
1004 };
1005 
1006 class AMDGPUAsmParser : public MCTargetAsmParser {
1007   MCAsmParser &Parser;
1008 
1009   // Number of extra operands parsed after the first optional operand.
1010   // This may be necessary to skip hardcoded mandatory operands.
1011   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1012 
1013   unsigned ForcedEncodingSize = 0;
1014   bool ForcedDPP = false;
1015   bool ForcedSDWA = false;
1016   KernelScopeInfo KernelScope;
1017 
1018   /// @name Auto-generated Match Functions
1019   /// {
1020 
1021 #define GET_ASSEMBLER_HEADER
1022 #include "AMDGPUGenAsmMatcher.inc"
1023 
1024   /// }
1025 
1026 private:
1027   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1028   bool OutOfRangeError(SMRange Range);
1029   /// Calculate VGPR/SGPR blocks required for given target, reserved
1030   /// registers, and user-specified NextFreeXGPR values.
1031   ///
1032   /// \param Features [in] Target features, used for bug corrections.
1033   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1034   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1035   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1036   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1037   /// descriptor field, if valid.
1038   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1039   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1040   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1041   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1042   /// \param VGPRBlocks [out] Result VGPR block count.
1043   /// \param SGPRBlocks [out] Result SGPR block count.
1044   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1045                           bool FlatScrUsed, bool XNACKUsed,
1046                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1047                           SMRange VGPRRange, unsigned NextFreeSGPR,
1048                           SMRange SGPRRange, unsigned &VGPRBlocks,
1049                           unsigned &SGPRBlocks);
1050   bool ParseDirectiveAMDGCNTarget();
1051   bool ParseDirectiveAMDHSAKernel();
1052   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1053   bool ParseDirectiveHSACodeObjectVersion();
1054   bool ParseDirectiveHSACodeObjectISA();
1055   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1056   bool ParseDirectiveAMDKernelCodeT();
1057   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1058   bool ParseDirectiveAMDGPUHsaKernel();
1059 
1060   bool ParseDirectiveISAVersion();
1061   bool ParseDirectiveHSAMetadata();
1062   bool ParseDirectivePALMetadataBegin();
1063   bool ParseDirectivePALMetadata();
1064   bool ParseDirectiveAMDGPULDS();
1065 
1066   /// Common code to parse out a block of text (typically YAML) between start and
1067   /// end directives.
1068   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1069                            const char *AssemblerDirectiveEnd,
1070                            std::string &CollectString);
1071 
1072   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1073                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1074   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1075                            unsigned &RegNum, unsigned &RegWidth,
1076                            bool RestoreOnFailure = false);
1077   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1078                            unsigned &RegNum, unsigned &RegWidth,
1079                            SmallVectorImpl<AsmToken> &Tokens);
1080   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1081                            unsigned &RegWidth,
1082                            SmallVectorImpl<AsmToken> &Tokens);
1083   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1084                            unsigned &RegWidth,
1085                            SmallVectorImpl<AsmToken> &Tokens);
1086   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1087                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1088   bool ParseRegRange(unsigned& Num, unsigned& Width);
1089   unsigned getRegularReg(RegisterKind RegKind,
1090                          unsigned RegNum,
1091                          unsigned RegWidth,
1092                          SMLoc Loc);
1093 
1094   bool isRegister();
1095   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1096   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1097   void initializeGprCountSymbol(RegisterKind RegKind);
1098   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1099                              unsigned RegWidth);
1100   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1101                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1102   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1103                  bool IsGdsHardcoded);
1104 
1105 public:
1106   enum AMDGPUMatchResultTy {
1107     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1108   };
1109   enum OperandMode {
1110     OperandMode_Default,
1111     OperandMode_NSA,
1112   };
1113 
1114   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1115 
1116   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1117                const MCInstrInfo &MII,
1118                const MCTargetOptions &Options)
1119       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1120     MCAsmParserExtension::Initialize(Parser);
1121 
1122     if (getFeatureBits().none()) {
1123       // Set default features.
1124       copySTI().ToggleFeature("southern-islands");
1125     }
1126 
1127     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1128 
1129     {
1130       // TODO: make those pre-defined variables read-only.
1131       // Currently there is none suitable machinery in the core llvm-mc for this.
1132       // MCSymbol::isRedefinable is intended for another purpose, and
1133       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1134       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1135       MCContext &Ctx = getContext();
1136       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1137         MCSymbol *Sym =
1138             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1139         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1140         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1141         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1142         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1143         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1144       } else {
1145         MCSymbol *Sym =
1146             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1147         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1148         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1149         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1150         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1151         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1152       }
1153       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1154         initializeGprCountSymbol(IS_VGPR);
1155         initializeGprCountSymbol(IS_SGPR);
1156       } else
1157         KernelScope.initialize(getContext());
1158     }
1159   }
1160 
1161   bool hasXNACK() const {
1162     return AMDGPU::hasXNACK(getSTI());
1163   }
1164 
1165   bool hasMIMG_R128() const {
1166     return AMDGPU::hasMIMG_R128(getSTI());
1167   }
1168 
1169   bool hasPackedD16() const {
1170     return AMDGPU::hasPackedD16(getSTI());
1171   }
1172 
1173   bool hasGFX10A16() const {
1174     return AMDGPU::hasGFX10A16(getSTI());
1175   }
1176 
1177   bool isSI() const {
1178     return AMDGPU::isSI(getSTI());
1179   }
1180 
1181   bool isCI() const {
1182     return AMDGPU::isCI(getSTI());
1183   }
1184 
1185   bool isVI() const {
1186     return AMDGPU::isVI(getSTI());
1187   }
1188 
1189   bool isGFX9() const {
1190     return AMDGPU::isGFX9(getSTI());
1191   }
1192 
1193   bool isGFX9Plus() const {
1194     return AMDGPU::isGFX9Plus(getSTI());
1195   }
1196 
1197   bool isGFX10() const {
1198     return AMDGPU::isGFX10(getSTI());
1199   }
1200 
1201   bool isGFX10_BEncoding() const {
1202     return AMDGPU::isGFX10_BEncoding(getSTI());
1203   }
1204 
1205   bool hasInv2PiInlineImm() const {
1206     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1207   }
1208 
1209   bool hasFlatOffsets() const {
1210     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1211   }
1212 
1213   bool hasSGPR102_SGPR103() const {
1214     return !isVI() && !isGFX9();
1215   }
1216 
1217   bool hasSGPR104_SGPR105() const {
1218     return isGFX10();
1219   }
1220 
1221   bool hasIntClamp() const {
1222     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1223   }
1224 
1225   AMDGPUTargetStreamer &getTargetStreamer() {
1226     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1227     return static_cast<AMDGPUTargetStreamer &>(TS);
1228   }
1229 
1230   const MCRegisterInfo *getMRI() const {
1231     // We need this const_cast because for some reason getContext() is not const
1232     // in MCAsmParser.
1233     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1234   }
1235 
1236   const MCInstrInfo *getMII() const {
1237     return &MII;
1238   }
1239 
1240   const FeatureBitset &getFeatureBits() const {
1241     return getSTI().getFeatureBits();
1242   }
1243 
1244   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1245   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1246   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1247 
1248   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1249   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1250   bool isForcedDPP() const { return ForcedDPP; }
1251   bool isForcedSDWA() const { return ForcedSDWA; }
1252   ArrayRef<unsigned> getMatchedVariants() const;
1253   StringRef getMatchedVariantName() const;
1254 
1255   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1256   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1257                      bool RestoreOnFailure);
1258   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1259   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1260                                         SMLoc &EndLoc) override;
1261   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1262   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1263                                       unsigned Kind) override;
1264   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1265                                OperandVector &Operands, MCStreamer &Out,
1266                                uint64_t &ErrorInfo,
1267                                bool MatchingInlineAsm) override;
1268   bool ParseDirective(AsmToken DirectiveID) override;
1269   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1270                                     OperandMode Mode = OperandMode_Default);
1271   StringRef parseMnemonicSuffix(StringRef Name);
1272   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1273                         SMLoc NameLoc, OperandVector &Operands) override;
1274   //bool ProcessInstruction(MCInst &Inst);
1275 
1276   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1277 
1278   OperandMatchResultTy
1279   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1280                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1281                      bool (*ConvertResult)(int64_t &) = nullptr);
1282 
1283   OperandMatchResultTy
1284   parseOperandArrayWithPrefix(const char *Prefix,
1285                               OperandVector &Operands,
1286                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1287                               bool (*ConvertResult)(int64_t&) = nullptr);
1288 
1289   OperandMatchResultTy
1290   parseNamedBit(const char *Name, OperandVector &Operands,
1291                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1292   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1293                                              StringRef &Value);
1294 
1295   bool isModifier();
1296   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1297   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1298   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1299   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1300   bool parseSP3NegModifier();
1301   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1302   OperandMatchResultTy parseReg(OperandVector &Operands);
1303   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1304   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1305   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1306   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1307   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1308   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1309   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1310   OperandMatchResultTy parseUfmt(int64_t &Format);
1311   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1312   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1313   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1314   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1315   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1316   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1317   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1318 
1319   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1320   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1321   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1322   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1323 
1324   bool parseCnt(int64_t &IntVal);
1325   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1326   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1327 
1328 private:
1329   struct OperandInfoTy {
1330     int64_t Id;
1331     bool IsSymbolic = false;
1332     bool IsDefined = false;
1333 
1334     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1335   };
1336 
1337   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1338   bool validateSendMsg(const OperandInfoTy &Msg,
1339                        const OperandInfoTy &Op,
1340                        const OperandInfoTy &Stream,
1341                        const SMLoc Loc);
1342 
1343   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1344   bool validateHwreg(const OperandInfoTy &HwReg,
1345                      const int64_t Offset,
1346                      const int64_t Width,
1347                      const SMLoc Loc);
1348 
1349   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1350   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1351   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1352 
1353   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1354   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1355   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1356   bool validateSOPLiteral(const MCInst &Inst) const;
1357   bool validateConstantBusLimitations(const MCInst &Inst);
1358   bool validateEarlyClobberLimitations(const MCInst &Inst);
1359   bool validateIntClampSupported(const MCInst &Inst);
1360   bool validateMIMGAtomicDMask(const MCInst &Inst);
1361   bool validateMIMGGatherDMask(const MCInst &Inst);
1362   bool validateMovrels(const MCInst &Inst);
1363   bool validateMIMGDataSize(const MCInst &Inst);
1364   bool validateMIMGAddrSize(const MCInst &Inst);
1365   bool validateMIMGD16(const MCInst &Inst);
1366   bool validateMIMGDim(const MCInst &Inst);
1367   bool validateLdsDirect(const MCInst &Inst);
1368   bool validateOpSel(const MCInst &Inst);
1369   bool validateVccOperand(unsigned Reg) const;
1370   bool validateVOP3Literal(const MCInst &Inst) const;
1371   bool validateMAIAccWrite(const MCInst &Inst);
1372   bool validateDivScale(const MCInst &Inst);
1373   unsigned getConstantBusLimit(unsigned Opcode) const;
1374   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1375   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1376   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1377 
1378   bool isSupportedMnemo(StringRef Mnemo,
1379                         const FeatureBitset &FBS);
1380   bool isSupportedMnemo(StringRef Mnemo,
1381                         const FeatureBitset &FBS,
1382                         ArrayRef<unsigned> Variants);
1383   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1384 
1385   bool isId(const StringRef Id) const;
1386   bool isId(const AsmToken &Token, const StringRef Id) const;
1387   bool isToken(const AsmToken::TokenKind Kind) const;
1388   bool trySkipId(const StringRef Id);
1389   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1390   bool trySkipToken(const AsmToken::TokenKind Kind);
1391   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1392   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1393   bool parseId(StringRef &Val, const StringRef ErrMsg);
1394 
1395   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1396   AsmToken::TokenKind getTokenKind() const;
1397   bool parseExpr(int64_t &Imm);
1398   bool parseExpr(OperandVector &Operands);
1399   StringRef getTokenStr() const;
1400   AsmToken peekToken();
1401   AsmToken getToken() const;
1402   SMLoc getLoc() const;
1403   void lex();
1404 
1405 public:
1406   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1407   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1408 
1409   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1410   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1411   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1412   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1413   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1414   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1415 
1416   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1417                             const unsigned MinVal,
1418                             const unsigned MaxVal,
1419                             const StringRef ErrMsg);
1420   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1421   bool parseSwizzleOffset(int64_t &Imm);
1422   bool parseSwizzleMacro(int64_t &Imm);
1423   bool parseSwizzleQuadPerm(int64_t &Imm);
1424   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1425   bool parseSwizzleBroadcast(int64_t &Imm);
1426   bool parseSwizzleSwap(int64_t &Imm);
1427   bool parseSwizzleReverse(int64_t &Imm);
1428 
1429   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1430   int64_t parseGPRIdxMacro();
1431 
1432   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1433   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1434   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1435   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1436   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1437 
1438   AMDGPUOperand::Ptr defaultDLC() const;
1439   AMDGPUOperand::Ptr defaultGLC() const;
1440   AMDGPUOperand::Ptr defaultSLC() const;
1441 
1442   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1443   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1444   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1445   AMDGPUOperand::Ptr defaultFlatOffset() const;
1446 
1447   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1448 
1449   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1450                OptionalImmIndexMap &OptionalIdx);
1451   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1452   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1453   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1454 
1455   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1456 
1457   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1458                bool IsAtomic = false);
1459   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1460   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1461 
1462   OperandMatchResultTy parseDim(OperandVector &Operands);
1463   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1464   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1465   AMDGPUOperand::Ptr defaultRowMask() const;
1466   AMDGPUOperand::Ptr defaultBankMask() const;
1467   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1468   AMDGPUOperand::Ptr defaultFI() const;
1469   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1470   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1471 
1472   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1473                                     AMDGPUOperand::ImmTy Type);
1474   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1475   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1476   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1477   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1478   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1479   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1480   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1481                uint64_t BasicInstType,
1482                bool SkipDstVcc = false,
1483                bool SkipSrcVcc = false);
1484 
1485   AMDGPUOperand::Ptr defaultBLGP() const;
1486   AMDGPUOperand::Ptr defaultCBSZ() const;
1487   AMDGPUOperand::Ptr defaultABID() const;
1488 
1489   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1490   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1491 };
1492 
1493 struct OptionalOperand {
1494   const char *Name;
1495   AMDGPUOperand::ImmTy Type;
1496   bool IsBit;
1497   bool (*ConvertResult)(int64_t&);
1498 };
1499 
1500 } // end anonymous namespace
1501 
1502 // May be called with integer type with equivalent bitwidth.
1503 static const fltSemantics *getFltSemantics(unsigned Size) {
1504   switch (Size) {
1505   case 4:
1506     return &APFloat::IEEEsingle();
1507   case 8:
1508     return &APFloat::IEEEdouble();
1509   case 2:
1510     return &APFloat::IEEEhalf();
1511   default:
1512     llvm_unreachable("unsupported fp type");
1513   }
1514 }
1515 
1516 static const fltSemantics *getFltSemantics(MVT VT) {
1517   return getFltSemantics(VT.getSizeInBits() / 8);
1518 }
1519 
1520 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1521   switch (OperandType) {
1522   case AMDGPU::OPERAND_REG_IMM_INT32:
1523   case AMDGPU::OPERAND_REG_IMM_FP32:
1524   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1525   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1526   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1527   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1528     return &APFloat::IEEEsingle();
1529   case AMDGPU::OPERAND_REG_IMM_INT64:
1530   case AMDGPU::OPERAND_REG_IMM_FP64:
1531   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1532   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1533     return &APFloat::IEEEdouble();
1534   case AMDGPU::OPERAND_REG_IMM_INT16:
1535   case AMDGPU::OPERAND_REG_IMM_FP16:
1536   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1537   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1538   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1539   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1540   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1541   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1542   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1543   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1544   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1545   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1546     return &APFloat::IEEEhalf();
1547   default:
1548     llvm_unreachable("unsupported fp type");
1549   }
1550 }
1551 
1552 //===----------------------------------------------------------------------===//
1553 // Operand
1554 //===----------------------------------------------------------------------===//
1555 
1556 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1557   bool Lost;
1558 
1559   // Convert literal to single precision
1560   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1561                                                APFloat::rmNearestTiesToEven,
1562                                                &Lost);
1563   // We allow precision lost but not overflow or underflow
1564   if (Status != APFloat::opOK &&
1565       Lost &&
1566       ((Status & APFloat::opOverflow)  != 0 ||
1567        (Status & APFloat::opUnderflow) != 0)) {
1568     return false;
1569   }
1570 
1571   return true;
1572 }
1573 
1574 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1575   return isUIntN(Size, Val) || isIntN(Size, Val);
1576 }
1577 
1578 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1579   if (VT.getScalarType() == MVT::i16) {
1580     // FP immediate values are broken.
1581     return isInlinableIntLiteral(Val);
1582   }
1583 
1584   // f16/v2f16 operands work correctly for all values.
1585   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1586 }
1587 
1588 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1589 
1590   // This is a hack to enable named inline values like
1591   // shared_base with both 32-bit and 64-bit operands.
1592   // Note that these values are defined as
1593   // 32-bit operands only.
1594   if (isInlineValue()) {
1595     return true;
1596   }
1597 
1598   if (!isImmTy(ImmTyNone)) {
1599     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1600     return false;
1601   }
1602   // TODO: We should avoid using host float here. It would be better to
1603   // check the float bit values which is what a few other places do.
1604   // We've had bot failures before due to weird NaN support on mips hosts.
1605 
1606   APInt Literal(64, Imm.Val);
1607 
1608   if (Imm.IsFPImm) { // We got fp literal token
1609     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1610       return AMDGPU::isInlinableLiteral64(Imm.Val,
1611                                           AsmParser->hasInv2PiInlineImm());
1612     }
1613 
1614     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1615     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1616       return false;
1617 
1618     if (type.getScalarSizeInBits() == 16) {
1619       return isInlineableLiteralOp16(
1620         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1621         type, AsmParser->hasInv2PiInlineImm());
1622     }
1623 
1624     // Check if single precision literal is inlinable
1625     return AMDGPU::isInlinableLiteral32(
1626       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1627       AsmParser->hasInv2PiInlineImm());
1628   }
1629 
1630   // We got int literal token.
1631   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1632     return AMDGPU::isInlinableLiteral64(Imm.Val,
1633                                         AsmParser->hasInv2PiInlineImm());
1634   }
1635 
1636   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1637     return false;
1638   }
1639 
1640   if (type.getScalarSizeInBits() == 16) {
1641     return isInlineableLiteralOp16(
1642       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1643       type, AsmParser->hasInv2PiInlineImm());
1644   }
1645 
1646   return AMDGPU::isInlinableLiteral32(
1647     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1648     AsmParser->hasInv2PiInlineImm());
1649 }
1650 
1651 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1652   // Check that this immediate can be added as literal
1653   if (!isImmTy(ImmTyNone)) {
1654     return false;
1655   }
1656 
1657   if (!Imm.IsFPImm) {
1658     // We got int literal token.
1659 
1660     if (type == MVT::f64 && hasFPModifiers()) {
1661       // Cannot apply fp modifiers to int literals preserving the same semantics
1662       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1663       // disable these cases.
1664       return false;
1665     }
1666 
1667     unsigned Size = type.getSizeInBits();
1668     if (Size == 64)
1669       Size = 32;
1670 
1671     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1672     // types.
1673     return isSafeTruncation(Imm.Val, Size);
1674   }
1675 
1676   // We got fp literal token
1677   if (type == MVT::f64) { // Expected 64-bit fp operand
1678     // We would set low 64-bits of literal to zeroes but we accept this literals
1679     return true;
1680   }
1681 
1682   if (type == MVT::i64) { // Expected 64-bit int operand
1683     // We don't allow fp literals in 64-bit integer instructions. It is
1684     // unclear how we should encode them.
1685     return false;
1686   }
1687 
1688   // We allow fp literals with f16x2 operands assuming that the specified
1689   // literal goes into the lower half and the upper half is zero. We also
1690   // require that the literal may be losslesly converted to f16.
1691   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1692                      (type == MVT::v2i16)? MVT::i16 : type;
1693 
1694   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1695   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1696 }
1697 
1698 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1699   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1700 }
1701 
1702 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1703   if (AsmParser->isVI())
1704     return isVReg32();
1705   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1706     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1707   else
1708     return false;
1709 }
1710 
1711 bool AMDGPUOperand::isSDWAFP16Operand() const {
1712   return isSDWAOperand(MVT::f16);
1713 }
1714 
1715 bool AMDGPUOperand::isSDWAFP32Operand() const {
1716   return isSDWAOperand(MVT::f32);
1717 }
1718 
1719 bool AMDGPUOperand::isSDWAInt16Operand() const {
1720   return isSDWAOperand(MVT::i16);
1721 }
1722 
1723 bool AMDGPUOperand::isSDWAInt32Operand() const {
1724   return isSDWAOperand(MVT::i32);
1725 }
1726 
1727 bool AMDGPUOperand::isBoolReg() const {
1728   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1729          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1730 }
1731 
1732 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1733 {
1734   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1735   assert(Size == 2 || Size == 4 || Size == 8);
1736 
1737   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1738 
1739   if (Imm.Mods.Abs) {
1740     Val &= ~FpSignMask;
1741   }
1742   if (Imm.Mods.Neg) {
1743     Val ^= FpSignMask;
1744   }
1745 
1746   return Val;
1747 }
1748 
1749 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1750   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1751                              Inst.getNumOperands())) {
1752     addLiteralImmOperand(Inst, Imm.Val,
1753                          ApplyModifiers &
1754                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1755   } else {
1756     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1757     Inst.addOperand(MCOperand::createImm(Imm.Val));
1758   }
1759 }
1760 
1761 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1762   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1763   auto OpNum = Inst.getNumOperands();
1764   // Check that this operand accepts literals
1765   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1766 
1767   if (ApplyModifiers) {
1768     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1769     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1770     Val = applyInputFPModifiers(Val, Size);
1771   }
1772 
1773   APInt Literal(64, Val);
1774   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1775 
1776   if (Imm.IsFPImm) { // We got fp literal token
1777     switch (OpTy) {
1778     case AMDGPU::OPERAND_REG_IMM_INT64:
1779     case AMDGPU::OPERAND_REG_IMM_FP64:
1780     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1781     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1782       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1783                                        AsmParser->hasInv2PiInlineImm())) {
1784         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1785         return;
1786       }
1787 
1788       // Non-inlineable
1789       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1790         // For fp operands we check if low 32 bits are zeros
1791         if (Literal.getLoBits(32) != 0) {
1792           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1793           "Can't encode literal as exact 64-bit floating-point operand. "
1794           "Low 32-bits will be set to zero");
1795         }
1796 
1797         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1798         return;
1799       }
1800 
1801       // We don't allow fp literals in 64-bit integer instructions. It is
1802       // unclear how we should encode them. This case should be checked earlier
1803       // in predicate methods (isLiteralImm())
1804       llvm_unreachable("fp literal in 64-bit integer instruction.");
1805 
1806     case AMDGPU::OPERAND_REG_IMM_INT32:
1807     case AMDGPU::OPERAND_REG_IMM_FP32:
1808     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1809     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1810     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1811     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1812     case AMDGPU::OPERAND_REG_IMM_INT16:
1813     case AMDGPU::OPERAND_REG_IMM_FP16:
1814     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1815     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1816     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1817     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1818     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1819     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1820     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1821     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1822     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1823     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1824       bool lost;
1825       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1826       // Convert literal to single precision
1827       FPLiteral.convert(*getOpFltSemantics(OpTy),
1828                         APFloat::rmNearestTiesToEven, &lost);
1829       // We allow precision lost but not overflow or underflow. This should be
1830       // checked earlier in isLiteralImm()
1831 
1832       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1833       Inst.addOperand(MCOperand::createImm(ImmVal));
1834       return;
1835     }
1836     default:
1837       llvm_unreachable("invalid operand size");
1838     }
1839 
1840     return;
1841   }
1842 
1843   // We got int literal token.
1844   // Only sign extend inline immediates.
1845   switch (OpTy) {
1846   case AMDGPU::OPERAND_REG_IMM_INT32:
1847   case AMDGPU::OPERAND_REG_IMM_FP32:
1848   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1849   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1850   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1851   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1852   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1853   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1854     if (isSafeTruncation(Val, 32) &&
1855         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1856                                      AsmParser->hasInv2PiInlineImm())) {
1857       Inst.addOperand(MCOperand::createImm(Val));
1858       return;
1859     }
1860 
1861     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1862     return;
1863 
1864   case AMDGPU::OPERAND_REG_IMM_INT64:
1865   case AMDGPU::OPERAND_REG_IMM_FP64:
1866   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1867   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1868     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1869       Inst.addOperand(MCOperand::createImm(Val));
1870       return;
1871     }
1872 
1873     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1874     return;
1875 
1876   case AMDGPU::OPERAND_REG_IMM_INT16:
1877   case AMDGPU::OPERAND_REG_IMM_FP16:
1878   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1879   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1880   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1881   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1882     if (isSafeTruncation(Val, 16) &&
1883         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1884                                      AsmParser->hasInv2PiInlineImm())) {
1885       Inst.addOperand(MCOperand::createImm(Val));
1886       return;
1887     }
1888 
1889     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1890     return;
1891 
1892   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1893   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1894   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1895   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1896     assert(isSafeTruncation(Val, 16));
1897     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1898                                         AsmParser->hasInv2PiInlineImm()));
1899 
1900     Inst.addOperand(MCOperand::createImm(Val));
1901     return;
1902   }
1903   default:
1904     llvm_unreachable("invalid operand size");
1905   }
1906 }
1907 
1908 template <unsigned Bitwidth>
1909 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1910   APInt Literal(64, Imm.Val);
1911 
1912   if (!Imm.IsFPImm) {
1913     // We got int literal token.
1914     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1915     return;
1916   }
1917 
1918   bool Lost;
1919   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1920   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1921                     APFloat::rmNearestTiesToEven, &Lost);
1922   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1923 }
1924 
1925 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1926   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1927 }
1928 
1929 static bool isInlineValue(unsigned Reg) {
1930   switch (Reg) {
1931   case AMDGPU::SRC_SHARED_BASE:
1932   case AMDGPU::SRC_SHARED_LIMIT:
1933   case AMDGPU::SRC_PRIVATE_BASE:
1934   case AMDGPU::SRC_PRIVATE_LIMIT:
1935   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1936     return true;
1937   case AMDGPU::SRC_VCCZ:
1938   case AMDGPU::SRC_EXECZ:
1939   case AMDGPU::SRC_SCC:
1940     return true;
1941   case AMDGPU::SGPR_NULL:
1942     return true;
1943   default:
1944     return false;
1945   }
1946 }
1947 
1948 bool AMDGPUOperand::isInlineValue() const {
1949   return isRegKind() && ::isInlineValue(getReg());
1950 }
1951 
1952 //===----------------------------------------------------------------------===//
1953 // AsmParser
1954 //===----------------------------------------------------------------------===//
1955 
1956 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1957   if (Is == IS_VGPR) {
1958     switch (RegWidth) {
1959       default: return -1;
1960       case 1: return AMDGPU::VGPR_32RegClassID;
1961       case 2: return AMDGPU::VReg_64RegClassID;
1962       case 3: return AMDGPU::VReg_96RegClassID;
1963       case 4: return AMDGPU::VReg_128RegClassID;
1964       case 5: return AMDGPU::VReg_160RegClassID;
1965       case 6: return AMDGPU::VReg_192RegClassID;
1966       case 8: return AMDGPU::VReg_256RegClassID;
1967       case 16: return AMDGPU::VReg_512RegClassID;
1968       case 32: return AMDGPU::VReg_1024RegClassID;
1969     }
1970   } else if (Is == IS_TTMP) {
1971     switch (RegWidth) {
1972       default: return -1;
1973       case 1: return AMDGPU::TTMP_32RegClassID;
1974       case 2: return AMDGPU::TTMP_64RegClassID;
1975       case 4: return AMDGPU::TTMP_128RegClassID;
1976       case 8: return AMDGPU::TTMP_256RegClassID;
1977       case 16: return AMDGPU::TTMP_512RegClassID;
1978     }
1979   } else if (Is == IS_SGPR) {
1980     switch (RegWidth) {
1981       default: return -1;
1982       case 1: return AMDGPU::SGPR_32RegClassID;
1983       case 2: return AMDGPU::SGPR_64RegClassID;
1984       case 3: return AMDGPU::SGPR_96RegClassID;
1985       case 4: return AMDGPU::SGPR_128RegClassID;
1986       case 5: return AMDGPU::SGPR_160RegClassID;
1987       case 6: return AMDGPU::SGPR_192RegClassID;
1988       case 8: return AMDGPU::SGPR_256RegClassID;
1989       case 16: return AMDGPU::SGPR_512RegClassID;
1990     }
1991   } else if (Is == IS_AGPR) {
1992     switch (RegWidth) {
1993       default: return -1;
1994       case 1: return AMDGPU::AGPR_32RegClassID;
1995       case 2: return AMDGPU::AReg_64RegClassID;
1996       case 3: return AMDGPU::AReg_96RegClassID;
1997       case 4: return AMDGPU::AReg_128RegClassID;
1998       case 5: return AMDGPU::AReg_160RegClassID;
1999       case 6: return AMDGPU::AReg_192RegClassID;
2000       case 8: return AMDGPU::AReg_256RegClassID;
2001       case 16: return AMDGPU::AReg_512RegClassID;
2002       case 32: return AMDGPU::AReg_1024RegClassID;
2003     }
2004   }
2005   return -1;
2006 }
2007 
2008 static unsigned getSpecialRegForName(StringRef RegName) {
2009   return StringSwitch<unsigned>(RegName)
2010     .Case("exec", AMDGPU::EXEC)
2011     .Case("vcc", AMDGPU::VCC)
2012     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2013     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2014     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2015     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2016     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2017     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2018     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2019     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2020     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2021     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2022     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2023     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2024     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2025     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2026     .Case("m0", AMDGPU::M0)
2027     .Case("vccz", AMDGPU::SRC_VCCZ)
2028     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2029     .Case("execz", AMDGPU::SRC_EXECZ)
2030     .Case("src_execz", AMDGPU::SRC_EXECZ)
2031     .Case("scc", AMDGPU::SRC_SCC)
2032     .Case("src_scc", AMDGPU::SRC_SCC)
2033     .Case("tba", AMDGPU::TBA)
2034     .Case("tma", AMDGPU::TMA)
2035     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2036     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2037     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2038     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2039     .Case("vcc_lo", AMDGPU::VCC_LO)
2040     .Case("vcc_hi", AMDGPU::VCC_HI)
2041     .Case("exec_lo", AMDGPU::EXEC_LO)
2042     .Case("exec_hi", AMDGPU::EXEC_HI)
2043     .Case("tma_lo", AMDGPU::TMA_LO)
2044     .Case("tma_hi", AMDGPU::TMA_HI)
2045     .Case("tba_lo", AMDGPU::TBA_LO)
2046     .Case("tba_hi", AMDGPU::TBA_HI)
2047     .Case("pc", AMDGPU::PC_REG)
2048     .Case("null", AMDGPU::SGPR_NULL)
2049     .Default(AMDGPU::NoRegister);
2050 }
2051 
2052 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2053                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2054   auto R = parseRegister();
2055   if (!R) return true;
2056   assert(R->isReg());
2057   RegNo = R->getReg();
2058   StartLoc = R->getStartLoc();
2059   EndLoc = R->getEndLoc();
2060   return false;
2061 }
2062 
2063 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2064                                     SMLoc &EndLoc) {
2065   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2066 }
2067 
2068 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2069                                                        SMLoc &StartLoc,
2070                                                        SMLoc &EndLoc) {
2071   bool Result =
2072       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2073   bool PendingErrors = getParser().hasPendingError();
2074   getParser().clearPendingErrors();
2075   if (PendingErrors)
2076     return MatchOperand_ParseFail;
2077   if (Result)
2078     return MatchOperand_NoMatch;
2079   return MatchOperand_Success;
2080 }
2081 
2082 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2083                                             RegisterKind RegKind, unsigned Reg1,
2084                                             SMLoc Loc) {
2085   switch (RegKind) {
2086   case IS_SPECIAL:
2087     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2088       Reg = AMDGPU::EXEC;
2089       RegWidth = 2;
2090       return true;
2091     }
2092     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2093       Reg = AMDGPU::FLAT_SCR;
2094       RegWidth = 2;
2095       return true;
2096     }
2097     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2098       Reg = AMDGPU::XNACK_MASK;
2099       RegWidth = 2;
2100       return true;
2101     }
2102     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2103       Reg = AMDGPU::VCC;
2104       RegWidth = 2;
2105       return true;
2106     }
2107     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2108       Reg = AMDGPU::TBA;
2109       RegWidth = 2;
2110       return true;
2111     }
2112     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2113       Reg = AMDGPU::TMA;
2114       RegWidth = 2;
2115       return true;
2116     }
2117     Error(Loc, "register does not fit in the list");
2118     return false;
2119   case IS_VGPR:
2120   case IS_SGPR:
2121   case IS_AGPR:
2122   case IS_TTMP:
2123     if (Reg1 != Reg + RegWidth) {
2124       Error(Loc, "registers in a list must have consecutive indices");
2125       return false;
2126     }
2127     RegWidth++;
2128     return true;
2129   default:
2130     llvm_unreachable("unexpected register kind");
2131   }
2132 }
2133 
2134 struct RegInfo {
2135   StringLiteral Name;
2136   RegisterKind Kind;
2137 };
2138 
2139 static constexpr RegInfo RegularRegisters[] = {
2140   {{"v"},    IS_VGPR},
2141   {{"s"},    IS_SGPR},
2142   {{"ttmp"}, IS_TTMP},
2143   {{"acc"},  IS_AGPR},
2144   {{"a"},    IS_AGPR},
2145 };
2146 
2147 static bool isRegularReg(RegisterKind Kind) {
2148   return Kind == IS_VGPR ||
2149          Kind == IS_SGPR ||
2150          Kind == IS_TTMP ||
2151          Kind == IS_AGPR;
2152 }
2153 
2154 static const RegInfo* getRegularRegInfo(StringRef Str) {
2155   for (const RegInfo &Reg : RegularRegisters)
2156     if (Str.startswith(Reg.Name))
2157       return &Reg;
2158   return nullptr;
2159 }
2160 
2161 static bool getRegNum(StringRef Str, unsigned& Num) {
2162   return !Str.getAsInteger(10, Num);
2163 }
2164 
2165 bool
2166 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2167                             const AsmToken &NextToken) const {
2168 
2169   // A list of consecutive registers: [s0,s1,s2,s3]
2170   if (Token.is(AsmToken::LBrac))
2171     return true;
2172 
2173   if (!Token.is(AsmToken::Identifier))
2174     return false;
2175 
2176   // A single register like s0 or a range of registers like s[0:1]
2177 
2178   StringRef Str = Token.getString();
2179   const RegInfo *Reg = getRegularRegInfo(Str);
2180   if (Reg) {
2181     StringRef RegName = Reg->Name;
2182     StringRef RegSuffix = Str.substr(RegName.size());
2183     if (!RegSuffix.empty()) {
2184       unsigned Num;
2185       // A single register with an index: rXX
2186       if (getRegNum(RegSuffix, Num))
2187         return true;
2188     } else {
2189       // A range of registers: r[XX:YY].
2190       if (NextToken.is(AsmToken::LBrac))
2191         return true;
2192     }
2193   }
2194 
2195   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2196 }
2197 
2198 bool
2199 AMDGPUAsmParser::isRegister()
2200 {
2201   return isRegister(getToken(), peekToken());
2202 }
2203 
2204 unsigned
2205 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2206                                unsigned RegNum,
2207                                unsigned RegWidth,
2208                                SMLoc Loc) {
2209 
2210   assert(isRegularReg(RegKind));
2211 
2212   unsigned AlignSize = 1;
2213   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2214     // SGPR and TTMP registers must be aligned.
2215     // Max required alignment is 4 dwords.
2216     AlignSize = std::min(RegWidth, 4u);
2217   }
2218 
2219   if (RegNum % AlignSize != 0) {
2220     Error(Loc, "invalid register alignment");
2221     return AMDGPU::NoRegister;
2222   }
2223 
2224   unsigned RegIdx = RegNum / AlignSize;
2225   int RCID = getRegClass(RegKind, RegWidth);
2226   if (RCID == -1) {
2227     Error(Loc, "invalid or unsupported register size");
2228     return AMDGPU::NoRegister;
2229   }
2230 
2231   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2232   const MCRegisterClass RC = TRI->getRegClass(RCID);
2233   if (RegIdx >= RC.getNumRegs()) {
2234     Error(Loc, "register index is out of range");
2235     return AMDGPU::NoRegister;
2236   }
2237 
2238   return RC.getRegister(RegIdx);
2239 }
2240 
2241 bool
2242 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2243   int64_t RegLo, RegHi;
2244   if (!skipToken(AsmToken::LBrac, "missing register index"))
2245     return false;
2246 
2247   SMLoc FirstIdxLoc = getLoc();
2248   SMLoc SecondIdxLoc;
2249 
2250   if (!parseExpr(RegLo))
2251     return false;
2252 
2253   if (trySkipToken(AsmToken::Colon)) {
2254     SecondIdxLoc = getLoc();
2255     if (!parseExpr(RegHi))
2256       return false;
2257   } else {
2258     RegHi = RegLo;
2259   }
2260 
2261   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2262     return false;
2263 
2264   if (!isUInt<32>(RegLo)) {
2265     Error(FirstIdxLoc, "invalid register index");
2266     return false;
2267   }
2268 
2269   if (!isUInt<32>(RegHi)) {
2270     Error(SecondIdxLoc, "invalid register index");
2271     return false;
2272   }
2273 
2274   if (RegLo > RegHi) {
2275     Error(FirstIdxLoc, "first register index should not exceed second index");
2276     return false;
2277   }
2278 
2279   Num = static_cast<unsigned>(RegLo);
2280   Width = (RegHi - RegLo) + 1;
2281   return true;
2282 }
2283 
2284 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2285                                           unsigned &RegNum, unsigned &RegWidth,
2286                                           SmallVectorImpl<AsmToken> &Tokens) {
2287   assert(isToken(AsmToken::Identifier));
2288   unsigned Reg = getSpecialRegForName(getTokenStr());
2289   if (Reg) {
2290     RegNum = 0;
2291     RegWidth = 1;
2292     RegKind = IS_SPECIAL;
2293     Tokens.push_back(getToken());
2294     lex(); // skip register name
2295   }
2296   return Reg;
2297 }
2298 
2299 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2300                                           unsigned &RegNum, unsigned &RegWidth,
2301                                           SmallVectorImpl<AsmToken> &Tokens) {
2302   assert(isToken(AsmToken::Identifier));
2303   StringRef RegName = getTokenStr();
2304   auto Loc = getLoc();
2305 
2306   const RegInfo *RI = getRegularRegInfo(RegName);
2307   if (!RI) {
2308     Error(Loc, "invalid register name");
2309     return AMDGPU::NoRegister;
2310   }
2311 
2312   Tokens.push_back(getToken());
2313   lex(); // skip register name
2314 
2315   RegKind = RI->Kind;
2316   StringRef RegSuffix = RegName.substr(RI->Name.size());
2317   if (!RegSuffix.empty()) {
2318     // Single 32-bit register: vXX.
2319     if (!getRegNum(RegSuffix, RegNum)) {
2320       Error(Loc, "invalid register index");
2321       return AMDGPU::NoRegister;
2322     }
2323     RegWidth = 1;
2324   } else {
2325     // Range of registers: v[XX:YY]. ":YY" is optional.
2326     if (!ParseRegRange(RegNum, RegWidth))
2327       return AMDGPU::NoRegister;
2328   }
2329 
2330   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2331 }
2332 
2333 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2334                                        unsigned &RegWidth,
2335                                        SmallVectorImpl<AsmToken> &Tokens) {
2336   unsigned Reg = AMDGPU::NoRegister;
2337   auto ListLoc = getLoc();
2338 
2339   if (!skipToken(AsmToken::LBrac,
2340                  "expected a register or a list of registers")) {
2341     return AMDGPU::NoRegister;
2342   }
2343 
2344   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2345 
2346   auto Loc = getLoc();
2347   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2348     return AMDGPU::NoRegister;
2349   if (RegWidth != 1) {
2350     Error(Loc, "expected a single 32-bit register");
2351     return AMDGPU::NoRegister;
2352   }
2353 
2354   for (; trySkipToken(AsmToken::Comma); ) {
2355     RegisterKind NextRegKind;
2356     unsigned NextReg, NextRegNum, NextRegWidth;
2357     Loc = getLoc();
2358 
2359     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2360                              NextRegNum, NextRegWidth,
2361                              Tokens)) {
2362       return AMDGPU::NoRegister;
2363     }
2364     if (NextRegWidth != 1) {
2365       Error(Loc, "expected a single 32-bit register");
2366       return AMDGPU::NoRegister;
2367     }
2368     if (NextRegKind != RegKind) {
2369       Error(Loc, "registers in a list must be of the same kind");
2370       return AMDGPU::NoRegister;
2371     }
2372     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2373       return AMDGPU::NoRegister;
2374   }
2375 
2376   if (!skipToken(AsmToken::RBrac,
2377                  "expected a comma or a closing square bracket")) {
2378     return AMDGPU::NoRegister;
2379   }
2380 
2381   if (isRegularReg(RegKind))
2382     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2383 
2384   return Reg;
2385 }
2386 
2387 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2388                                           unsigned &RegNum, unsigned &RegWidth,
2389                                           SmallVectorImpl<AsmToken> &Tokens) {
2390   auto Loc = getLoc();
2391   Reg = AMDGPU::NoRegister;
2392 
2393   if (isToken(AsmToken::Identifier)) {
2394     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2395     if (Reg == AMDGPU::NoRegister)
2396       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2397   } else {
2398     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2399   }
2400 
2401   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2402   if (Reg == AMDGPU::NoRegister) {
2403     assert(Parser.hasPendingError());
2404     return false;
2405   }
2406 
2407   if (!subtargetHasRegister(*TRI, Reg)) {
2408     if (Reg == AMDGPU::SGPR_NULL) {
2409       Error(Loc, "'null' operand is not supported on this GPU");
2410     } else {
2411       Error(Loc, "register not available on this GPU");
2412     }
2413     return false;
2414   }
2415 
2416   return true;
2417 }
2418 
2419 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2420                                           unsigned &RegNum, unsigned &RegWidth,
2421                                           bool RestoreOnFailure /*=false*/) {
2422   Reg = AMDGPU::NoRegister;
2423 
2424   SmallVector<AsmToken, 1> Tokens;
2425   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2426     if (RestoreOnFailure) {
2427       while (!Tokens.empty()) {
2428         getLexer().UnLex(Tokens.pop_back_val());
2429       }
2430     }
2431     return true;
2432   }
2433   return false;
2434 }
2435 
2436 Optional<StringRef>
2437 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2438   switch (RegKind) {
2439   case IS_VGPR:
2440     return StringRef(".amdgcn.next_free_vgpr");
2441   case IS_SGPR:
2442     return StringRef(".amdgcn.next_free_sgpr");
2443   default:
2444     return None;
2445   }
2446 }
2447 
2448 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2449   auto SymbolName = getGprCountSymbolName(RegKind);
2450   assert(SymbolName && "initializing invalid register kind");
2451   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2452   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2453 }
2454 
2455 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2456                                             unsigned DwordRegIndex,
2457                                             unsigned RegWidth) {
2458   // Symbols are only defined for GCN targets
2459   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2460     return true;
2461 
2462   auto SymbolName = getGprCountSymbolName(RegKind);
2463   if (!SymbolName)
2464     return true;
2465   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2466 
2467   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2468   int64_t OldCount;
2469 
2470   if (!Sym->isVariable())
2471     return !Error(getParser().getTok().getLoc(),
2472                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2473   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2474     return !Error(
2475         getParser().getTok().getLoc(),
2476         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2477 
2478   if (OldCount <= NewMax)
2479     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2480 
2481   return true;
2482 }
2483 
2484 std::unique_ptr<AMDGPUOperand>
2485 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2486   const auto &Tok = Parser.getTok();
2487   SMLoc StartLoc = Tok.getLoc();
2488   SMLoc EndLoc = Tok.getEndLoc();
2489   RegisterKind RegKind;
2490   unsigned Reg, RegNum, RegWidth;
2491 
2492   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2493     return nullptr;
2494   }
2495   if (isHsaAbiVersion3(&getSTI())) {
2496     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2497       return nullptr;
2498   } else
2499     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2500   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2501 }
2502 
2503 OperandMatchResultTy
2504 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2505   // TODO: add syntactic sugar for 1/(2*PI)
2506 
2507   assert(!isRegister());
2508   assert(!isModifier());
2509 
2510   const auto& Tok = getToken();
2511   const auto& NextTok = peekToken();
2512   bool IsReal = Tok.is(AsmToken::Real);
2513   SMLoc S = getLoc();
2514   bool Negate = false;
2515 
2516   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2517     lex();
2518     IsReal = true;
2519     Negate = true;
2520   }
2521 
2522   if (IsReal) {
2523     // Floating-point expressions are not supported.
2524     // Can only allow floating-point literals with an
2525     // optional sign.
2526 
2527     StringRef Num = getTokenStr();
2528     lex();
2529 
2530     APFloat RealVal(APFloat::IEEEdouble());
2531     auto roundMode = APFloat::rmNearestTiesToEven;
2532     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2533       return MatchOperand_ParseFail;
2534     }
2535     if (Negate)
2536       RealVal.changeSign();
2537 
2538     Operands.push_back(
2539       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2540                                AMDGPUOperand::ImmTyNone, true));
2541 
2542     return MatchOperand_Success;
2543 
2544   } else {
2545     int64_t IntVal;
2546     const MCExpr *Expr;
2547     SMLoc S = getLoc();
2548 
2549     if (HasSP3AbsModifier) {
2550       // This is a workaround for handling expressions
2551       // as arguments of SP3 'abs' modifier, for example:
2552       //     |1.0|
2553       //     |-1|
2554       //     |1+x|
2555       // This syntax is not compatible with syntax of standard
2556       // MC expressions (due to the trailing '|').
2557       SMLoc EndLoc;
2558       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2559         return MatchOperand_ParseFail;
2560     } else {
2561       if (Parser.parseExpression(Expr))
2562         return MatchOperand_ParseFail;
2563     }
2564 
2565     if (Expr->evaluateAsAbsolute(IntVal)) {
2566       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2567     } else {
2568       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2569     }
2570 
2571     return MatchOperand_Success;
2572   }
2573 
2574   return MatchOperand_NoMatch;
2575 }
2576 
2577 OperandMatchResultTy
2578 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2579   if (!isRegister())
2580     return MatchOperand_NoMatch;
2581 
2582   if (auto R = parseRegister()) {
2583     assert(R->isReg());
2584     Operands.push_back(std::move(R));
2585     return MatchOperand_Success;
2586   }
2587   return MatchOperand_ParseFail;
2588 }
2589 
2590 OperandMatchResultTy
2591 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2592   auto res = parseReg(Operands);
2593   if (res != MatchOperand_NoMatch) {
2594     return res;
2595   } else if (isModifier()) {
2596     return MatchOperand_NoMatch;
2597   } else {
2598     return parseImm(Operands, HasSP3AbsMod);
2599   }
2600 }
2601 
2602 bool
2603 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2604   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2605     const auto &str = Token.getString();
2606     return str == "abs" || str == "neg" || str == "sext";
2607   }
2608   return false;
2609 }
2610 
2611 bool
2612 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2613   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2614 }
2615 
2616 bool
2617 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2618   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2619 }
2620 
2621 bool
2622 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2623   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2624 }
2625 
2626 // Check if this is an operand modifier or an opcode modifier
2627 // which may look like an expression but it is not. We should
2628 // avoid parsing these modifiers as expressions. Currently
2629 // recognized sequences are:
2630 //   |...|
2631 //   abs(...)
2632 //   neg(...)
2633 //   sext(...)
2634 //   -reg
2635 //   -|...|
2636 //   -abs(...)
2637 //   name:...
2638 // Note that simple opcode modifiers like 'gds' may be parsed as
2639 // expressions; this is a special case. See getExpressionAsToken.
2640 //
2641 bool
2642 AMDGPUAsmParser::isModifier() {
2643 
2644   AsmToken Tok = getToken();
2645   AsmToken NextToken[2];
2646   peekTokens(NextToken);
2647 
2648   return isOperandModifier(Tok, NextToken[0]) ||
2649          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2650          isOpcodeModifierWithVal(Tok, NextToken[0]);
2651 }
2652 
2653 // Check if the current token is an SP3 'neg' modifier.
2654 // Currently this modifier is allowed in the following context:
2655 //
2656 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2657 // 2. Before an 'abs' modifier: -abs(...)
2658 // 3. Before an SP3 'abs' modifier: -|...|
2659 //
2660 // In all other cases "-" is handled as a part
2661 // of an expression that follows the sign.
2662 //
2663 // Note: When "-" is followed by an integer literal,
2664 // this is interpreted as integer negation rather
2665 // than a floating-point NEG modifier applied to N.
2666 // Beside being contr-intuitive, such use of floating-point
2667 // NEG modifier would have resulted in different meaning
2668 // of integer literals used with VOP1/2/C and VOP3,
2669 // for example:
2670 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2671 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2672 // Negative fp literals with preceding "-" are
2673 // handled likewise for unifomtity
2674 //
2675 bool
2676 AMDGPUAsmParser::parseSP3NegModifier() {
2677 
2678   AsmToken NextToken[2];
2679   peekTokens(NextToken);
2680 
2681   if (isToken(AsmToken::Minus) &&
2682       (isRegister(NextToken[0], NextToken[1]) ||
2683        NextToken[0].is(AsmToken::Pipe) ||
2684        isId(NextToken[0], "abs"))) {
2685     lex();
2686     return true;
2687   }
2688 
2689   return false;
2690 }
2691 
2692 OperandMatchResultTy
2693 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2694                                               bool AllowImm) {
2695   bool Neg, SP3Neg;
2696   bool Abs, SP3Abs;
2697   SMLoc Loc;
2698 
2699   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2700   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2701     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2702     return MatchOperand_ParseFail;
2703   }
2704 
2705   SP3Neg = parseSP3NegModifier();
2706 
2707   Loc = getLoc();
2708   Neg = trySkipId("neg");
2709   if (Neg && SP3Neg) {
2710     Error(Loc, "expected register or immediate");
2711     return MatchOperand_ParseFail;
2712   }
2713   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2714     return MatchOperand_ParseFail;
2715 
2716   Abs = trySkipId("abs");
2717   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2718     return MatchOperand_ParseFail;
2719 
2720   Loc = getLoc();
2721   SP3Abs = trySkipToken(AsmToken::Pipe);
2722   if (Abs && SP3Abs) {
2723     Error(Loc, "expected register or immediate");
2724     return MatchOperand_ParseFail;
2725   }
2726 
2727   OperandMatchResultTy Res;
2728   if (AllowImm) {
2729     Res = parseRegOrImm(Operands, SP3Abs);
2730   } else {
2731     Res = parseReg(Operands);
2732   }
2733   if (Res != MatchOperand_Success) {
2734     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2735   }
2736 
2737   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2738     return MatchOperand_ParseFail;
2739   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2740     return MatchOperand_ParseFail;
2741   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2742     return MatchOperand_ParseFail;
2743 
2744   AMDGPUOperand::Modifiers Mods;
2745   Mods.Abs = Abs || SP3Abs;
2746   Mods.Neg = Neg || SP3Neg;
2747 
2748   if (Mods.hasFPModifiers()) {
2749     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2750     if (Op.isExpr()) {
2751       Error(Op.getStartLoc(), "expected an absolute expression");
2752       return MatchOperand_ParseFail;
2753     }
2754     Op.setModifiers(Mods);
2755   }
2756   return MatchOperand_Success;
2757 }
2758 
2759 OperandMatchResultTy
2760 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2761                                                bool AllowImm) {
2762   bool Sext = trySkipId("sext");
2763   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2764     return MatchOperand_ParseFail;
2765 
2766   OperandMatchResultTy Res;
2767   if (AllowImm) {
2768     Res = parseRegOrImm(Operands);
2769   } else {
2770     Res = parseReg(Operands);
2771   }
2772   if (Res != MatchOperand_Success) {
2773     return Sext? MatchOperand_ParseFail : Res;
2774   }
2775 
2776   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2777     return MatchOperand_ParseFail;
2778 
2779   AMDGPUOperand::Modifiers Mods;
2780   Mods.Sext = Sext;
2781 
2782   if (Mods.hasIntModifiers()) {
2783     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2784     if (Op.isExpr()) {
2785       Error(Op.getStartLoc(), "expected an absolute expression");
2786       return MatchOperand_ParseFail;
2787     }
2788     Op.setModifiers(Mods);
2789   }
2790 
2791   return MatchOperand_Success;
2792 }
2793 
2794 OperandMatchResultTy
2795 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2796   return parseRegOrImmWithFPInputMods(Operands, false);
2797 }
2798 
2799 OperandMatchResultTy
2800 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2801   return parseRegOrImmWithIntInputMods(Operands, false);
2802 }
2803 
2804 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2805   auto Loc = getLoc();
2806   if (trySkipId("off")) {
2807     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2808                                                 AMDGPUOperand::ImmTyOff, false));
2809     return MatchOperand_Success;
2810   }
2811 
2812   if (!isRegister())
2813     return MatchOperand_NoMatch;
2814 
2815   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2816   if (Reg) {
2817     Operands.push_back(std::move(Reg));
2818     return MatchOperand_Success;
2819   }
2820 
2821   return MatchOperand_ParseFail;
2822 
2823 }
2824 
2825 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2826   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2827 
2828   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2829       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2830       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2831       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2832     return Match_InvalidOperand;
2833 
2834   if ((TSFlags & SIInstrFlags::VOP3) &&
2835       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2836       getForcedEncodingSize() != 64)
2837     return Match_PreferE32;
2838 
2839   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2840       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2841     // v_mac_f32/16 allow only dst_sel == DWORD;
2842     auto OpNum =
2843         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2844     const auto &Op = Inst.getOperand(OpNum);
2845     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2846       return Match_InvalidOperand;
2847     }
2848   }
2849 
2850   return Match_Success;
2851 }
2852 
2853 static ArrayRef<unsigned> getAllVariants() {
2854   static const unsigned Variants[] = {
2855     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2856     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2857   };
2858 
2859   return makeArrayRef(Variants);
2860 }
2861 
2862 // What asm variants we should check
2863 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2864   if (getForcedEncodingSize() == 32) {
2865     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2866     return makeArrayRef(Variants);
2867   }
2868 
2869   if (isForcedVOP3()) {
2870     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2871     return makeArrayRef(Variants);
2872   }
2873 
2874   if (isForcedSDWA()) {
2875     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2876                                         AMDGPUAsmVariants::SDWA9};
2877     return makeArrayRef(Variants);
2878   }
2879 
2880   if (isForcedDPP()) {
2881     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2882     return makeArrayRef(Variants);
2883   }
2884 
2885   return getAllVariants();
2886 }
2887 
2888 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
2889   if (getForcedEncodingSize() == 32)
2890     return "e32";
2891 
2892   if (isForcedVOP3())
2893     return "e64";
2894 
2895   if (isForcedSDWA())
2896     return "sdwa";
2897 
2898   if (isForcedDPP())
2899     return "dpp";
2900 
2901   return "";
2902 }
2903 
2904 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2905   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2906   const unsigned Num = Desc.getNumImplicitUses();
2907   for (unsigned i = 0; i < Num; ++i) {
2908     unsigned Reg = Desc.ImplicitUses[i];
2909     switch (Reg) {
2910     case AMDGPU::FLAT_SCR:
2911     case AMDGPU::VCC:
2912     case AMDGPU::VCC_LO:
2913     case AMDGPU::VCC_HI:
2914     case AMDGPU::M0:
2915       return Reg;
2916     default:
2917       break;
2918     }
2919   }
2920   return AMDGPU::NoRegister;
2921 }
2922 
2923 // NB: This code is correct only when used to check constant
2924 // bus limitations because GFX7 support no f16 inline constants.
2925 // Note that there are no cases when a GFX7 opcode violates
2926 // constant bus limitations due to the use of an f16 constant.
2927 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2928                                        unsigned OpIdx) const {
2929   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2930 
2931   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2932     return false;
2933   }
2934 
2935   const MCOperand &MO = Inst.getOperand(OpIdx);
2936 
2937   int64_t Val = MO.getImm();
2938   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2939 
2940   switch (OpSize) { // expected operand size
2941   case 8:
2942     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2943   case 4:
2944     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2945   case 2: {
2946     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2947     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
2948         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
2949         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
2950       return AMDGPU::isInlinableIntLiteral(Val);
2951 
2952     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2953         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2954         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
2955       return AMDGPU::isInlinableIntLiteralV216(Val);
2956 
2957     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2958         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2959         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
2960       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2961 
2962     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2963   }
2964   default:
2965     llvm_unreachable("invalid operand size");
2966   }
2967 }
2968 
2969 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2970   if (!isGFX10())
2971     return 1;
2972 
2973   switch (Opcode) {
2974   // 64-bit shift instructions can use only one scalar value input
2975   case AMDGPU::V_LSHLREV_B64:
2976   case AMDGPU::V_LSHLREV_B64_gfx10:
2977   case AMDGPU::V_LSHL_B64:
2978   case AMDGPU::V_LSHRREV_B64:
2979   case AMDGPU::V_LSHRREV_B64_gfx10:
2980   case AMDGPU::V_LSHR_B64:
2981   case AMDGPU::V_ASHRREV_I64:
2982   case AMDGPU::V_ASHRREV_I64_gfx10:
2983   case AMDGPU::V_ASHR_I64:
2984     return 1;
2985   default:
2986     return 2;
2987   }
2988 }
2989 
2990 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2991   const MCOperand &MO = Inst.getOperand(OpIdx);
2992   if (MO.isImm()) {
2993     return !isInlineConstant(Inst, OpIdx);
2994   } else if (MO.isReg()) {
2995     auto Reg = MO.getReg();
2996     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2997     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2998   } else {
2999     return true;
3000   }
3001 }
3002 
3003 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
3004   const unsigned Opcode = Inst.getOpcode();
3005   const MCInstrDesc &Desc = MII.get(Opcode);
3006   unsigned ConstantBusUseCount = 0;
3007   unsigned NumLiterals = 0;
3008   unsigned LiteralSize;
3009 
3010   if (Desc.TSFlags &
3011       (SIInstrFlags::VOPC |
3012        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3013        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3014        SIInstrFlags::SDWA)) {
3015     // Check special imm operands (used by madmk, etc)
3016     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3017       ++ConstantBusUseCount;
3018     }
3019 
3020     SmallDenseSet<unsigned> SGPRsUsed;
3021     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3022     if (SGPRUsed != AMDGPU::NoRegister) {
3023       SGPRsUsed.insert(SGPRUsed);
3024       ++ConstantBusUseCount;
3025     }
3026 
3027     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3028     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3029     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3030 
3031     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3032 
3033     for (int OpIdx : OpIndices) {
3034       if (OpIdx == -1) break;
3035 
3036       const MCOperand &MO = Inst.getOperand(OpIdx);
3037       if (usesConstantBus(Inst, OpIdx)) {
3038         if (MO.isReg()) {
3039           const unsigned Reg = mc2PseudoReg(MO.getReg());
3040           // Pairs of registers with a partial intersections like these
3041           //   s0, s[0:1]
3042           //   flat_scratch_lo, flat_scratch
3043           //   flat_scratch_lo, flat_scratch_hi
3044           // are theoretically valid but they are disabled anyway.
3045           // Note that this code mimics SIInstrInfo::verifyInstruction
3046           if (!SGPRsUsed.count(Reg)) {
3047             SGPRsUsed.insert(Reg);
3048             ++ConstantBusUseCount;
3049           }
3050         } else { // Expression or a literal
3051 
3052           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3053             continue; // special operand like VINTERP attr_chan
3054 
3055           // An instruction may use only one literal.
3056           // This has been validated on the previous step.
3057           // See validateVOP3Literal.
3058           // This literal may be used as more than one operand.
3059           // If all these operands are of the same size,
3060           // this literal counts as one scalar value.
3061           // Otherwise it counts as 2 scalar values.
3062           // See "GFX10 Shader Programming", section 3.6.2.3.
3063 
3064           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3065           if (Size < 4) Size = 4;
3066 
3067           if (NumLiterals == 0) {
3068             NumLiterals = 1;
3069             LiteralSize = Size;
3070           } else if (LiteralSize != Size) {
3071             NumLiterals = 2;
3072           }
3073         }
3074       }
3075     }
3076   }
3077   ConstantBusUseCount += NumLiterals;
3078 
3079   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
3080 }
3081 
3082 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
3083   const unsigned Opcode = Inst.getOpcode();
3084   const MCInstrDesc &Desc = MII.get(Opcode);
3085 
3086   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3087   if (DstIdx == -1 ||
3088       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3089     return true;
3090   }
3091 
3092   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3093 
3094   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3095   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3096   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3097 
3098   assert(DstIdx != -1);
3099   const MCOperand &Dst = Inst.getOperand(DstIdx);
3100   assert(Dst.isReg());
3101   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3102 
3103   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3104 
3105   for (int SrcIdx : SrcIndices) {
3106     if (SrcIdx == -1) break;
3107     const MCOperand &Src = Inst.getOperand(SrcIdx);
3108     if (Src.isReg()) {
3109       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3110       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3111         return false;
3112       }
3113     }
3114   }
3115 
3116   return true;
3117 }
3118 
3119 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3120 
3121   const unsigned Opc = Inst.getOpcode();
3122   const MCInstrDesc &Desc = MII.get(Opc);
3123 
3124   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3125     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3126     assert(ClampIdx != -1);
3127     return Inst.getOperand(ClampIdx).getImm() == 0;
3128   }
3129 
3130   return true;
3131 }
3132 
3133 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3134 
3135   const unsigned Opc = Inst.getOpcode();
3136   const MCInstrDesc &Desc = MII.get(Opc);
3137 
3138   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3139     return true;
3140 
3141   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3142   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3143   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3144 
3145   assert(VDataIdx != -1);
3146 
3147   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3148     return true;
3149 
3150   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3151   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3152   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3153   if (DMask == 0)
3154     DMask = 1;
3155 
3156   unsigned DataSize =
3157     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3158   if (hasPackedD16()) {
3159     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3160     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3161       DataSize = (DataSize + 1) / 2;
3162   }
3163 
3164   return (VDataSize / 4) == DataSize + TFESize;
3165 }
3166 
3167 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3168   const unsigned Opc = Inst.getOpcode();
3169   const MCInstrDesc &Desc = MII.get(Opc);
3170 
3171   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
3172     return true;
3173 
3174   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3175 
3176   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3177       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3178   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3179   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3180   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3181 
3182   assert(VAddr0Idx != -1);
3183   assert(SrsrcIdx != -1);
3184   assert(SrsrcIdx > VAddr0Idx);
3185 
3186   if (DimIdx == -1)
3187     return true; // intersect_ray
3188 
3189   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3190   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3191   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3192   unsigned VAddrSize =
3193       IsNSA ? SrsrcIdx - VAddr0Idx
3194             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3195 
3196   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3197                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3198                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3199                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3200   if (!IsNSA) {
3201     if (AddrSize > 8)
3202       AddrSize = 16;
3203     else if (AddrSize > 4)
3204       AddrSize = 8;
3205   }
3206 
3207   return VAddrSize == AddrSize;
3208 }
3209 
3210 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3211 
3212   const unsigned Opc = Inst.getOpcode();
3213   const MCInstrDesc &Desc = MII.get(Opc);
3214 
3215   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3216     return true;
3217   if (!Desc.mayLoad() || !Desc.mayStore())
3218     return true; // Not atomic
3219 
3220   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3221   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3222 
3223   // This is an incomplete check because image_atomic_cmpswap
3224   // may only use 0x3 and 0xf while other atomic operations
3225   // may use 0x1 and 0x3. However these limitations are
3226   // verified when we check that dmask matches dst size.
3227   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3228 }
3229 
3230 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3231 
3232   const unsigned Opc = Inst.getOpcode();
3233   const MCInstrDesc &Desc = MII.get(Opc);
3234 
3235   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3236     return true;
3237 
3238   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3239   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3240 
3241   // GATHER4 instructions use dmask in a different fashion compared to
3242   // other MIMG instructions. The only useful DMASK values are
3243   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3244   // (red,red,red,red) etc.) The ISA document doesn't mention
3245   // this.
3246   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3247 }
3248 
3249 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3250 {
3251   switch (Opcode) {
3252   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3253   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3254   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3255     return true;
3256   default:
3257     return false;
3258   }
3259 }
3260 
3261 // movrels* opcodes should only allow VGPRS as src0.
3262 // This is specified in .td description for vop1/vop3,
3263 // but sdwa is handled differently. See isSDWAOperand.
3264 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3265 
3266   const unsigned Opc = Inst.getOpcode();
3267   const MCInstrDesc &Desc = MII.get(Opc);
3268 
3269   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3270     return true;
3271 
3272   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3273   assert(Src0Idx != -1);
3274 
3275   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3276   if (!Src0.isReg())
3277     return false;
3278 
3279   auto Reg = Src0.getReg();
3280   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3281   return !isSGPR(mc2PseudoReg(Reg), TRI);
3282 }
3283 
3284 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) {
3285 
3286   const unsigned Opc = Inst.getOpcode();
3287 
3288   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3289     return true;
3290 
3291   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3292   assert(Src0Idx != -1);
3293 
3294   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3295   if (!Src0.isReg())
3296     return true;
3297 
3298   auto Reg = Src0.getReg();
3299   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3300   if (isSGPR(mc2PseudoReg(Reg), TRI)) {
3301     Error(getLoc(), "source operand must be either a VGPR or an inline constant");
3302     return false;
3303   }
3304 
3305   return true;
3306 }
3307 
3308 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3309   switch (Inst.getOpcode()) {
3310   default:
3311     return true;
3312   case V_DIV_SCALE_F32_gfx6_gfx7:
3313   case V_DIV_SCALE_F32_vi:
3314   case V_DIV_SCALE_F32_gfx10:
3315   case V_DIV_SCALE_F64_gfx6_gfx7:
3316   case V_DIV_SCALE_F64_vi:
3317   case V_DIV_SCALE_F64_gfx10:
3318     break;
3319   }
3320 
3321   // TODO: Check that src0 = src1 or src2.
3322 
3323   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3324                     AMDGPU::OpName::src2_modifiers,
3325                     AMDGPU::OpName::src2_modifiers}) {
3326     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3327             .getImm() &
3328         SISrcMods::ABS) {
3329       Error(getLoc(), "ABS not allowed in VOP3B instructions");
3330       return false;
3331     }
3332   }
3333 
3334   return true;
3335 }
3336 
3337 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3338 
3339   const unsigned Opc = Inst.getOpcode();
3340   const MCInstrDesc &Desc = MII.get(Opc);
3341 
3342   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3343     return true;
3344 
3345   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3346   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3347     if (isCI() || isSI())
3348       return false;
3349   }
3350 
3351   return true;
3352 }
3353 
3354 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3355   const unsigned Opc = Inst.getOpcode();
3356   const MCInstrDesc &Desc = MII.get(Opc);
3357 
3358   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3359     return true;
3360 
3361   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3362   if (DimIdx < 0)
3363     return true;
3364 
3365   long Imm = Inst.getOperand(DimIdx).getImm();
3366   if (Imm < 0 || Imm >= 8)
3367     return false;
3368 
3369   return true;
3370 }
3371 
3372 static bool IsRevOpcode(const unsigned Opcode)
3373 {
3374   switch (Opcode) {
3375   case AMDGPU::V_SUBREV_F32_e32:
3376   case AMDGPU::V_SUBREV_F32_e64:
3377   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3378   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3379   case AMDGPU::V_SUBREV_F32_e32_vi:
3380   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3381   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3382   case AMDGPU::V_SUBREV_F32_e64_vi:
3383 
3384   case AMDGPU::V_SUBREV_CO_U32_e32:
3385   case AMDGPU::V_SUBREV_CO_U32_e64:
3386   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3387   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3388 
3389   case AMDGPU::V_SUBBREV_U32_e32:
3390   case AMDGPU::V_SUBBREV_U32_e64:
3391   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3392   case AMDGPU::V_SUBBREV_U32_e32_vi:
3393   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3394   case AMDGPU::V_SUBBREV_U32_e64_vi:
3395 
3396   case AMDGPU::V_SUBREV_U32_e32:
3397   case AMDGPU::V_SUBREV_U32_e64:
3398   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3399   case AMDGPU::V_SUBREV_U32_e32_vi:
3400   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3401   case AMDGPU::V_SUBREV_U32_e64_vi:
3402 
3403   case AMDGPU::V_SUBREV_F16_e32:
3404   case AMDGPU::V_SUBREV_F16_e64:
3405   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3406   case AMDGPU::V_SUBREV_F16_e32_vi:
3407   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3408   case AMDGPU::V_SUBREV_F16_e64_vi:
3409 
3410   case AMDGPU::V_SUBREV_U16_e32:
3411   case AMDGPU::V_SUBREV_U16_e64:
3412   case AMDGPU::V_SUBREV_U16_e32_vi:
3413   case AMDGPU::V_SUBREV_U16_e64_vi:
3414 
3415   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3416   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3417   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3418 
3419   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3420   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3421 
3422   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3423   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3424 
3425   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3426   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3427 
3428   case AMDGPU::V_LSHRREV_B32_e32:
3429   case AMDGPU::V_LSHRREV_B32_e64:
3430   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3431   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3432   case AMDGPU::V_LSHRREV_B32_e32_vi:
3433   case AMDGPU::V_LSHRREV_B32_e64_vi:
3434   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3435   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3436 
3437   case AMDGPU::V_ASHRREV_I32_e32:
3438   case AMDGPU::V_ASHRREV_I32_e64:
3439   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3440   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3441   case AMDGPU::V_ASHRREV_I32_e32_vi:
3442   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3443   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3444   case AMDGPU::V_ASHRREV_I32_e64_vi:
3445 
3446   case AMDGPU::V_LSHLREV_B32_e32:
3447   case AMDGPU::V_LSHLREV_B32_e64:
3448   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3449   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3450   case AMDGPU::V_LSHLREV_B32_e32_vi:
3451   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3452   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3453   case AMDGPU::V_LSHLREV_B32_e64_vi:
3454 
3455   case AMDGPU::V_LSHLREV_B16_e32:
3456   case AMDGPU::V_LSHLREV_B16_e64:
3457   case AMDGPU::V_LSHLREV_B16_e32_vi:
3458   case AMDGPU::V_LSHLREV_B16_e64_vi:
3459   case AMDGPU::V_LSHLREV_B16_gfx10:
3460 
3461   case AMDGPU::V_LSHRREV_B16_e32:
3462   case AMDGPU::V_LSHRREV_B16_e64:
3463   case AMDGPU::V_LSHRREV_B16_e32_vi:
3464   case AMDGPU::V_LSHRREV_B16_e64_vi:
3465   case AMDGPU::V_LSHRREV_B16_gfx10:
3466 
3467   case AMDGPU::V_ASHRREV_I16_e32:
3468   case AMDGPU::V_ASHRREV_I16_e64:
3469   case AMDGPU::V_ASHRREV_I16_e32_vi:
3470   case AMDGPU::V_ASHRREV_I16_e64_vi:
3471   case AMDGPU::V_ASHRREV_I16_gfx10:
3472 
3473   case AMDGPU::V_LSHLREV_B64:
3474   case AMDGPU::V_LSHLREV_B64_gfx10:
3475   case AMDGPU::V_LSHLREV_B64_vi:
3476 
3477   case AMDGPU::V_LSHRREV_B64:
3478   case AMDGPU::V_LSHRREV_B64_gfx10:
3479   case AMDGPU::V_LSHRREV_B64_vi:
3480 
3481   case AMDGPU::V_ASHRREV_I64:
3482   case AMDGPU::V_ASHRREV_I64_gfx10:
3483   case AMDGPU::V_ASHRREV_I64_vi:
3484 
3485   case AMDGPU::V_PK_LSHLREV_B16:
3486   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3487   case AMDGPU::V_PK_LSHLREV_B16_vi:
3488 
3489   case AMDGPU::V_PK_LSHRREV_B16:
3490   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3491   case AMDGPU::V_PK_LSHRREV_B16_vi:
3492   case AMDGPU::V_PK_ASHRREV_I16:
3493   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3494   case AMDGPU::V_PK_ASHRREV_I16_vi:
3495     return true;
3496   default:
3497     return false;
3498   }
3499 }
3500 
3501 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3502 
3503   using namespace SIInstrFlags;
3504   const unsigned Opcode = Inst.getOpcode();
3505   const MCInstrDesc &Desc = MII.get(Opcode);
3506 
3507   // lds_direct register is defined so that it can be used
3508   // with 9-bit operands only. Ignore encodings which do not accept these.
3509   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3510     return true;
3511 
3512   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3513   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3514   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3515 
3516   const int SrcIndices[] = { Src1Idx, Src2Idx };
3517 
3518   // lds_direct cannot be specified as either src1 or src2.
3519   for (int SrcIdx : SrcIndices) {
3520     if (SrcIdx == -1) break;
3521     const MCOperand &Src = Inst.getOperand(SrcIdx);
3522     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3523       return false;
3524     }
3525   }
3526 
3527   if (Src0Idx == -1)
3528     return true;
3529 
3530   const MCOperand &Src = Inst.getOperand(Src0Idx);
3531   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3532     return true;
3533 
3534   // lds_direct is specified as src0. Check additional limitations.
3535   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3536 }
3537 
3538 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3539   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3540     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3541     if (Op.isFlatOffset())
3542       return Op.getStartLoc();
3543   }
3544   return getLoc();
3545 }
3546 
3547 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3548                                          const OperandVector &Operands) {
3549   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3550   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3551     return true;
3552 
3553   auto Opcode = Inst.getOpcode();
3554   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3555   assert(OpNum != -1);
3556 
3557   const auto &Op = Inst.getOperand(OpNum);
3558   if (!hasFlatOffsets() && Op.getImm() != 0) {
3559     Error(getFlatOffsetLoc(Operands),
3560           "flat offset modifier is not supported on this GPU");
3561     return false;
3562   }
3563 
3564   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3565   // For FLAT segment the offset must be positive;
3566   // MSB is ignored and forced to zero.
3567   unsigned OffsetSize = isGFX9() ? 13 : 12;
3568   if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) {
3569     if (!isIntN(OffsetSize, Op.getImm())) {
3570       Error(getFlatOffsetLoc(Operands),
3571             isGFX9() ? "expected a 13-bit signed offset" :
3572                        "expected a 12-bit signed offset");
3573       return false;
3574     }
3575   } else {
3576     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3577       Error(getFlatOffsetLoc(Operands),
3578             isGFX9() ? "expected a 12-bit unsigned offset" :
3579                        "expected an 11-bit unsigned offset");
3580       return false;
3581     }
3582   }
3583 
3584   return true;
3585 }
3586 
3587 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3588   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3589     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3590     if (Op.isSMEMOffset())
3591       return Op.getStartLoc();
3592   }
3593   return getLoc();
3594 }
3595 
3596 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3597                                          const OperandVector &Operands) {
3598   if (isCI() || isSI())
3599     return true;
3600 
3601   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3602   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3603     return true;
3604 
3605   auto Opcode = Inst.getOpcode();
3606   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3607   if (OpNum == -1)
3608     return true;
3609 
3610   const auto &Op = Inst.getOperand(OpNum);
3611   if (!Op.isImm())
3612     return true;
3613 
3614   uint64_t Offset = Op.getImm();
3615   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3616   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3617       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3618     return true;
3619 
3620   Error(getSMEMOffsetLoc(Operands),
3621         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3622                                "expected a 21-bit signed offset");
3623 
3624   return false;
3625 }
3626 
3627 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3628   unsigned Opcode = Inst.getOpcode();
3629   const MCInstrDesc &Desc = MII.get(Opcode);
3630   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3631     return true;
3632 
3633   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3634   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3635 
3636   const int OpIndices[] = { Src0Idx, Src1Idx };
3637 
3638   unsigned NumExprs = 0;
3639   unsigned NumLiterals = 0;
3640   uint32_t LiteralValue;
3641 
3642   for (int OpIdx : OpIndices) {
3643     if (OpIdx == -1) break;
3644 
3645     const MCOperand &MO = Inst.getOperand(OpIdx);
3646     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3647     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3648       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3649         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3650         if (NumLiterals == 0 || LiteralValue != Value) {
3651           LiteralValue = Value;
3652           ++NumLiterals;
3653         }
3654       } else if (MO.isExpr()) {
3655         ++NumExprs;
3656       }
3657     }
3658   }
3659 
3660   return NumLiterals + NumExprs <= 1;
3661 }
3662 
3663 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3664   const unsigned Opc = Inst.getOpcode();
3665   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3666       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3667     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3668     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3669 
3670     if (OpSel & ~3)
3671       return false;
3672   }
3673   return true;
3674 }
3675 
3676 // Check if VCC register matches wavefront size
3677 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3678   auto FB = getFeatureBits();
3679   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3680     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3681 }
3682 
3683 // VOP3 literal is only allowed in GFX10+ and only one can be used
3684 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3685   unsigned Opcode = Inst.getOpcode();
3686   const MCInstrDesc &Desc = MII.get(Opcode);
3687   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3688     return true;
3689 
3690   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3691   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3692   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3693 
3694   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3695 
3696   unsigned NumExprs = 0;
3697   unsigned NumLiterals = 0;
3698   uint32_t LiteralValue;
3699 
3700   for (int OpIdx : OpIndices) {
3701     if (OpIdx == -1) break;
3702 
3703     const MCOperand &MO = Inst.getOperand(OpIdx);
3704     if (!MO.isImm() && !MO.isExpr())
3705       continue;
3706     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3707       continue;
3708 
3709     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3710         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3711       return false;
3712 
3713     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3714       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3715       if (NumLiterals == 0 || LiteralValue != Value) {
3716         LiteralValue = Value;
3717         ++NumLiterals;
3718       }
3719     } else if (MO.isExpr()) {
3720       ++NumExprs;
3721     }
3722   }
3723   NumLiterals += NumExprs;
3724 
3725   return !NumLiterals ||
3726          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3727 }
3728 
3729 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3730                                           const SMLoc &IDLoc,
3731                                           const OperandVector &Operands) {
3732   if (!validateLdsDirect(Inst)) {
3733     Error(IDLoc,
3734       "invalid use of lds_direct");
3735     return false;
3736   }
3737   if (!validateSOPLiteral(Inst)) {
3738     Error(IDLoc,
3739       "only one literal operand is allowed");
3740     return false;
3741   }
3742   if (!validateVOP3Literal(Inst)) {
3743     Error(IDLoc,
3744       "invalid literal operand");
3745     return false;
3746   }
3747   if (!validateConstantBusLimitations(Inst)) {
3748     Error(IDLoc,
3749       "invalid operand (violates constant bus restrictions)");
3750     return false;
3751   }
3752   if (!validateEarlyClobberLimitations(Inst)) {
3753     Error(IDLoc,
3754       "destination must be different than all sources");
3755     return false;
3756   }
3757   if (!validateIntClampSupported(Inst)) {
3758     Error(IDLoc,
3759       "integer clamping is not supported on this GPU");
3760     return false;
3761   }
3762   if (!validateOpSel(Inst)) {
3763     Error(IDLoc,
3764       "invalid op_sel operand");
3765     return false;
3766   }
3767   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3768   if (!validateMIMGD16(Inst)) {
3769     Error(IDLoc,
3770       "d16 modifier is not supported on this GPU");
3771     return false;
3772   }
3773   if (!validateMIMGDim(Inst)) {
3774     Error(IDLoc, "dim modifier is required on this GPU");
3775     return false;
3776   }
3777   if (!validateMIMGDataSize(Inst)) {
3778     Error(IDLoc,
3779       "image data size does not match dmask and tfe");
3780     return false;
3781   }
3782   if (!validateMIMGAddrSize(Inst)) {
3783     Error(IDLoc,
3784       "image address size does not match dim and a16");
3785     return false;
3786   }
3787   if (!validateMIMGAtomicDMask(Inst)) {
3788     Error(IDLoc,
3789       "invalid atomic image dmask");
3790     return false;
3791   }
3792   if (!validateMIMGGatherDMask(Inst)) {
3793     Error(IDLoc,
3794       "invalid image_gather dmask: only one bit must be set");
3795     return false;
3796   }
3797   if (!validateMovrels(Inst)) {
3798     Error(IDLoc, "source operand must be a VGPR");
3799     return false;
3800   }
3801   if (!validateFlatOffset(Inst, Operands)) {
3802     return false;
3803   }
3804   if (!validateSMEMOffset(Inst, Operands)) {
3805     return false;
3806   }
3807   if (!validateMAIAccWrite(Inst)) {
3808     return false;
3809   }
3810   if (!validateDivScale(Inst)) {
3811     return false;
3812   }
3813 
3814   return true;
3815 }
3816 
3817 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3818                                             const FeatureBitset &FBS,
3819                                             unsigned VariantID = 0);
3820 
3821 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
3822                                 const FeatureBitset &AvailableFeatures,
3823                                 unsigned VariantID);
3824 
3825 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3826                                        const FeatureBitset &FBS) {
3827   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
3828 }
3829 
3830 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3831                                        const FeatureBitset &FBS,
3832                                        ArrayRef<unsigned> Variants) {
3833   for (auto Variant : Variants) {
3834     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
3835       return true;
3836   }
3837 
3838   return false;
3839 }
3840 
3841 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
3842                                                   const SMLoc &IDLoc) {
3843   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3844 
3845   // Check if requested instruction variant is supported.
3846   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
3847     return false;
3848 
3849   // This instruction is not supported.
3850   // Clear any other pending errors because they are no longer relevant.
3851   getParser().clearPendingErrors();
3852 
3853   // Requested instruction variant is not supported.
3854   // Check if any other variants are supported.
3855   StringRef VariantName = getMatchedVariantName();
3856   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
3857     return Error(IDLoc,
3858                  Twine(VariantName,
3859                        " variant of this instruction is not supported"));
3860   }
3861 
3862   // Finally check if this instruction is supported on any other GPU.
3863   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
3864     return Error(IDLoc, "instruction not supported on this GPU");
3865   }
3866 
3867   // Instruction not supported on any GPU. Probably a typo.
3868   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
3869   return Error(IDLoc, "invalid instruction" + Suggestion);
3870 }
3871 
3872 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3873                                               OperandVector &Operands,
3874                                               MCStreamer &Out,
3875                                               uint64_t &ErrorInfo,
3876                                               bool MatchingInlineAsm) {
3877   MCInst Inst;
3878   unsigned Result = Match_Success;
3879   for (auto Variant : getMatchedVariants()) {
3880     uint64_t EI;
3881     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3882                                   Variant);
3883     // We order match statuses from least to most specific. We use most specific
3884     // status as resulting
3885     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3886     if ((R == Match_Success) ||
3887         (R == Match_PreferE32) ||
3888         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3889         (R == Match_InvalidOperand && Result != Match_MissingFeature
3890                                    && Result != Match_PreferE32) ||
3891         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3892                                    && Result != Match_MissingFeature
3893                                    && Result != Match_PreferE32)) {
3894       Result = R;
3895       ErrorInfo = EI;
3896     }
3897     if (R == Match_Success)
3898       break;
3899   }
3900 
3901   if (Result == Match_Success) {
3902     if (!validateInstruction(Inst, IDLoc, Operands)) {
3903       return true;
3904     }
3905     Inst.setLoc(IDLoc);
3906     Out.emitInstruction(Inst, getSTI());
3907     return false;
3908   }
3909 
3910   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
3911   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
3912     return true;
3913   }
3914 
3915   switch (Result) {
3916   default: break;
3917   case Match_MissingFeature:
3918     // It has been verified that the specified instruction
3919     // mnemonic is valid. A match was found but it requires
3920     // features which are not supported on this GPU.
3921     return Error(IDLoc, "operands are not valid for this GPU or mode");
3922 
3923   case Match_InvalidOperand: {
3924     SMLoc ErrorLoc = IDLoc;
3925     if (ErrorInfo != ~0ULL) {
3926       if (ErrorInfo >= Operands.size()) {
3927         return Error(IDLoc, "too few operands for instruction");
3928       }
3929       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3930       if (ErrorLoc == SMLoc())
3931         ErrorLoc = IDLoc;
3932     }
3933     return Error(ErrorLoc, "invalid operand for instruction");
3934   }
3935 
3936   case Match_PreferE32:
3937     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3938                         "should be encoded as e32");
3939   case Match_MnemonicFail:
3940     llvm_unreachable("Invalid instructions should have been handled already");
3941   }
3942   llvm_unreachable("Implement any new match types added!");
3943 }
3944 
3945 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3946   int64_t Tmp = -1;
3947   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3948     return true;
3949   }
3950   if (getParser().parseAbsoluteExpression(Tmp)) {
3951     return true;
3952   }
3953   Ret = static_cast<uint32_t>(Tmp);
3954   return false;
3955 }
3956 
3957 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3958                                                uint32_t &Minor) {
3959   if (ParseAsAbsoluteExpression(Major))
3960     return TokError("invalid major version");
3961 
3962   if (getLexer().isNot(AsmToken::Comma))
3963     return TokError("minor version number required, comma expected");
3964   Lex();
3965 
3966   if (ParseAsAbsoluteExpression(Minor))
3967     return TokError("invalid minor version");
3968 
3969   return false;
3970 }
3971 
3972 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3973   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3974     return TokError("directive only supported for amdgcn architecture");
3975 
3976   std::string Target;
3977 
3978   SMLoc TargetStart = getTok().getLoc();
3979   if (getParser().parseEscapedString(Target))
3980     return true;
3981   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3982 
3983   std::string ExpectedTarget;
3984   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3985   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3986 
3987   if (Target != ExpectedTargetOS.str())
3988     return getParser().Error(TargetRange.Start, "target must match options",
3989                              TargetRange);
3990 
3991   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3992   return false;
3993 }
3994 
3995 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3996   return getParser().Error(Range.Start, "value out of range", Range);
3997 }
3998 
3999 bool AMDGPUAsmParser::calculateGPRBlocks(
4000     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4001     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4002     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4003     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4004   // TODO(scott.linder): These calculations are duplicated from
4005   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4006   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4007 
4008   unsigned NumVGPRs = NextFreeVGPR;
4009   unsigned NumSGPRs = NextFreeSGPR;
4010 
4011   if (Version.Major >= 10)
4012     NumSGPRs = 0;
4013   else {
4014     unsigned MaxAddressableNumSGPRs =
4015         IsaInfo::getAddressableNumSGPRs(&getSTI());
4016 
4017     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4018         NumSGPRs > MaxAddressableNumSGPRs)
4019       return OutOfRangeError(SGPRRange);
4020 
4021     NumSGPRs +=
4022         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4023 
4024     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4025         NumSGPRs > MaxAddressableNumSGPRs)
4026       return OutOfRangeError(SGPRRange);
4027 
4028     if (Features.test(FeatureSGPRInitBug))
4029       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4030   }
4031 
4032   VGPRBlocks =
4033       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4034   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4035 
4036   return false;
4037 }
4038 
4039 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4040   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4041     return TokError("directive only supported for amdgcn architecture");
4042 
4043   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4044     return TokError("directive only supported for amdhsa OS");
4045 
4046   StringRef KernelName;
4047   if (getParser().parseIdentifier(KernelName))
4048     return true;
4049 
4050   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4051 
4052   StringSet<> Seen;
4053 
4054   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4055 
4056   SMRange VGPRRange;
4057   uint64_t NextFreeVGPR = 0;
4058   SMRange SGPRRange;
4059   uint64_t NextFreeSGPR = 0;
4060   unsigned UserSGPRCount = 0;
4061   bool ReserveVCC = true;
4062   bool ReserveFlatScr = true;
4063   bool ReserveXNACK = hasXNACK();
4064   Optional<bool> EnableWavefrontSize32;
4065 
4066   while (true) {
4067     while (getLexer().is(AsmToken::EndOfStatement))
4068       Lex();
4069 
4070     if (getLexer().isNot(AsmToken::Identifier))
4071       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
4072 
4073     StringRef ID = getTok().getIdentifier();
4074     SMRange IDRange = getTok().getLocRange();
4075     Lex();
4076 
4077     if (ID == ".end_amdhsa_kernel")
4078       break;
4079 
4080     if (Seen.find(ID) != Seen.end())
4081       return TokError(".amdhsa_ directives cannot be repeated");
4082     Seen.insert(ID);
4083 
4084     SMLoc ValStart = getTok().getLoc();
4085     int64_t IVal;
4086     if (getParser().parseAbsoluteExpression(IVal))
4087       return true;
4088     SMLoc ValEnd = getTok().getLoc();
4089     SMRange ValRange = SMRange(ValStart, ValEnd);
4090 
4091     if (IVal < 0)
4092       return OutOfRangeError(ValRange);
4093 
4094     uint64_t Val = IVal;
4095 
4096 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4097   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4098     return OutOfRangeError(RANGE);                                             \
4099   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4100 
4101     if (ID == ".amdhsa_group_segment_fixed_size") {
4102       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4103         return OutOfRangeError(ValRange);
4104       KD.group_segment_fixed_size = Val;
4105     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4106       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4107         return OutOfRangeError(ValRange);
4108       KD.private_segment_fixed_size = Val;
4109     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4110       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4111                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4112                        Val, ValRange);
4113       if (Val)
4114         UserSGPRCount += 4;
4115     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4116       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4117                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4118                        ValRange);
4119       if (Val)
4120         UserSGPRCount += 2;
4121     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4122       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4123                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4124                        ValRange);
4125       if (Val)
4126         UserSGPRCount += 2;
4127     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4128       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4129                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4130                        Val, ValRange);
4131       if (Val)
4132         UserSGPRCount += 2;
4133     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4134       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4135                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4136                        ValRange);
4137       if (Val)
4138         UserSGPRCount += 2;
4139     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4140       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4141                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4142                        ValRange);
4143       if (Val)
4144         UserSGPRCount += 2;
4145     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4146       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4147                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4148                        Val, ValRange);
4149       if (Val)
4150         UserSGPRCount += 1;
4151     } else if (ID == ".amdhsa_wavefront_size32") {
4152       if (IVersion.Major < 10)
4153         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4154                                  IDRange);
4155       EnableWavefrontSize32 = Val;
4156       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4157                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4158                        Val, ValRange);
4159     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4160       PARSE_BITS_ENTRY(
4161           KD.compute_pgm_rsrc2,
4162           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
4163           ValRange);
4164     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4165       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4166                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4167                        ValRange);
4168     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4169       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4170                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4171                        ValRange);
4172     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4173       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4174                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4175                        ValRange);
4176     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4177       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4178                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4179                        ValRange);
4180     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4181       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4182                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4183                        ValRange);
4184     } else if (ID == ".amdhsa_next_free_vgpr") {
4185       VGPRRange = ValRange;
4186       NextFreeVGPR = Val;
4187     } else if (ID == ".amdhsa_next_free_sgpr") {
4188       SGPRRange = ValRange;
4189       NextFreeSGPR = Val;
4190     } else if (ID == ".amdhsa_reserve_vcc") {
4191       if (!isUInt<1>(Val))
4192         return OutOfRangeError(ValRange);
4193       ReserveVCC = Val;
4194     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4195       if (IVersion.Major < 7)
4196         return getParser().Error(IDRange.Start, "directive requires gfx7+",
4197                                  IDRange);
4198       if (!isUInt<1>(Val))
4199         return OutOfRangeError(ValRange);
4200       ReserveFlatScr = Val;
4201     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4202       if (IVersion.Major < 8)
4203         return getParser().Error(IDRange.Start, "directive requires gfx8+",
4204                                  IDRange);
4205       if (!isUInt<1>(Val))
4206         return OutOfRangeError(ValRange);
4207       ReserveXNACK = Val;
4208     } else if (ID == ".amdhsa_float_round_mode_32") {
4209       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4210                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4211     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4212       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4213                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4214     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4215       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4216                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4217     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4218       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4219                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4220                        ValRange);
4221     } else if (ID == ".amdhsa_dx10_clamp") {
4222       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4223                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4224     } else if (ID == ".amdhsa_ieee_mode") {
4225       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4226                        Val, ValRange);
4227     } else if (ID == ".amdhsa_fp16_overflow") {
4228       if (IVersion.Major < 9)
4229         return getParser().Error(IDRange.Start, "directive requires gfx9+",
4230                                  IDRange);
4231       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4232                        ValRange);
4233     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4234       if (IVersion.Major < 10)
4235         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4236                                  IDRange);
4237       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4238                        ValRange);
4239     } else if (ID == ".amdhsa_memory_ordered") {
4240       if (IVersion.Major < 10)
4241         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4242                                  IDRange);
4243       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4244                        ValRange);
4245     } else if (ID == ".amdhsa_forward_progress") {
4246       if (IVersion.Major < 10)
4247         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4248                                  IDRange);
4249       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4250                        ValRange);
4251     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4252       PARSE_BITS_ENTRY(
4253           KD.compute_pgm_rsrc2,
4254           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4255           ValRange);
4256     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4257       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4258                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4259                        Val, ValRange);
4260     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4261       PARSE_BITS_ENTRY(
4262           KD.compute_pgm_rsrc2,
4263           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4264           ValRange);
4265     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4266       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4267                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4268                        Val, ValRange);
4269     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4270       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4271                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4272                        Val, ValRange);
4273     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4274       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4275                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4276                        Val, ValRange);
4277     } else if (ID == ".amdhsa_exception_int_div_zero") {
4278       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4279                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4280                        Val, ValRange);
4281     } else {
4282       return getParser().Error(IDRange.Start,
4283                                "unknown .amdhsa_kernel directive", IDRange);
4284     }
4285 
4286 #undef PARSE_BITS_ENTRY
4287   }
4288 
4289   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4290     return TokError(".amdhsa_next_free_vgpr directive is required");
4291 
4292   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4293     return TokError(".amdhsa_next_free_sgpr directive is required");
4294 
4295   unsigned VGPRBlocks;
4296   unsigned SGPRBlocks;
4297   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4298                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4299                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4300                          SGPRBlocks))
4301     return true;
4302 
4303   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4304           VGPRBlocks))
4305     return OutOfRangeError(VGPRRange);
4306   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4307                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4308 
4309   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4310           SGPRBlocks))
4311     return OutOfRangeError(SGPRRange);
4312   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4313                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4314                   SGPRBlocks);
4315 
4316   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4317     return TokError("too many user SGPRs enabled");
4318   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4319                   UserSGPRCount);
4320 
4321   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4322       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4323       ReserveFlatScr, ReserveXNACK);
4324   return false;
4325 }
4326 
4327 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4328   uint32_t Major;
4329   uint32_t Minor;
4330 
4331   if (ParseDirectiveMajorMinor(Major, Minor))
4332     return true;
4333 
4334   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4335   return false;
4336 }
4337 
4338 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4339   uint32_t Major;
4340   uint32_t Minor;
4341   uint32_t Stepping;
4342   StringRef VendorName;
4343   StringRef ArchName;
4344 
4345   // If this directive has no arguments, then use the ISA version for the
4346   // targeted GPU.
4347   if (getLexer().is(AsmToken::EndOfStatement)) {
4348     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4349     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4350                                                       ISA.Stepping,
4351                                                       "AMD", "AMDGPU");
4352     return false;
4353   }
4354 
4355   if (ParseDirectiveMajorMinor(Major, Minor))
4356     return true;
4357 
4358   if (getLexer().isNot(AsmToken::Comma))
4359     return TokError("stepping version number required, comma expected");
4360   Lex();
4361 
4362   if (ParseAsAbsoluteExpression(Stepping))
4363     return TokError("invalid stepping version");
4364 
4365   if (getLexer().isNot(AsmToken::Comma))
4366     return TokError("vendor name required, comma expected");
4367   Lex();
4368 
4369   if (getLexer().isNot(AsmToken::String))
4370     return TokError("invalid vendor name");
4371 
4372   VendorName = getLexer().getTok().getStringContents();
4373   Lex();
4374 
4375   if (getLexer().isNot(AsmToken::Comma))
4376     return TokError("arch name required, comma expected");
4377   Lex();
4378 
4379   if (getLexer().isNot(AsmToken::String))
4380     return TokError("invalid arch name");
4381 
4382   ArchName = getLexer().getTok().getStringContents();
4383   Lex();
4384 
4385   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4386                                                     VendorName, ArchName);
4387   return false;
4388 }
4389 
4390 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4391                                                amd_kernel_code_t &Header) {
4392   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4393   // assembly for backwards compatibility.
4394   if (ID == "max_scratch_backing_memory_byte_size") {
4395     Parser.eatToEndOfStatement();
4396     return false;
4397   }
4398 
4399   SmallString<40> ErrStr;
4400   raw_svector_ostream Err(ErrStr);
4401   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4402     return TokError(Err.str());
4403   }
4404   Lex();
4405 
4406   if (ID == "enable_wavefront_size32") {
4407     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4408       if (!isGFX10())
4409         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4410       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4411         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4412     } else {
4413       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4414         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4415     }
4416   }
4417 
4418   if (ID == "wavefront_size") {
4419     if (Header.wavefront_size == 5) {
4420       if (!isGFX10())
4421         return TokError("wavefront_size=5 is only allowed on GFX10+");
4422       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4423         return TokError("wavefront_size=5 requires +WavefrontSize32");
4424     } else if (Header.wavefront_size == 6) {
4425       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4426         return TokError("wavefront_size=6 requires +WavefrontSize64");
4427     }
4428   }
4429 
4430   if (ID == "enable_wgp_mode") {
4431     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4432       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4433   }
4434 
4435   if (ID == "enable_mem_ordered") {
4436     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4437       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4438   }
4439 
4440   if (ID == "enable_fwd_progress") {
4441     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4442       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4443   }
4444 
4445   return false;
4446 }
4447 
4448 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4449   amd_kernel_code_t Header;
4450   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4451 
4452   while (true) {
4453     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4454     // will set the current token to EndOfStatement.
4455     while(getLexer().is(AsmToken::EndOfStatement))
4456       Lex();
4457 
4458     if (getLexer().isNot(AsmToken::Identifier))
4459       return TokError("expected value identifier or .end_amd_kernel_code_t");
4460 
4461     StringRef ID = getLexer().getTok().getIdentifier();
4462     Lex();
4463 
4464     if (ID == ".end_amd_kernel_code_t")
4465       break;
4466 
4467     if (ParseAMDKernelCodeTValue(ID, Header))
4468       return true;
4469   }
4470 
4471   getTargetStreamer().EmitAMDKernelCodeT(Header);
4472 
4473   return false;
4474 }
4475 
4476 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4477   if (getLexer().isNot(AsmToken::Identifier))
4478     return TokError("expected symbol name");
4479 
4480   StringRef KernelName = Parser.getTok().getString();
4481 
4482   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4483                                            ELF::STT_AMDGPU_HSA_KERNEL);
4484   Lex();
4485 
4486   KernelScope.initialize(getContext());
4487   return false;
4488 }
4489 
4490 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4491   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4492     return Error(getParser().getTok().getLoc(),
4493                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4494                  "architectures");
4495   }
4496 
4497   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4498 
4499   std::string ISAVersionStringFromSTI;
4500   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4501   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4502 
4503   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4504     return Error(getParser().getTok().getLoc(),
4505                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4506                  "arguments specified through the command line");
4507   }
4508 
4509   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4510   Lex();
4511 
4512   return false;
4513 }
4514 
4515 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4516   const char *AssemblerDirectiveBegin;
4517   const char *AssemblerDirectiveEnd;
4518   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4519       isHsaAbiVersion3(&getSTI())
4520           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4521                             HSAMD::V3::AssemblerDirectiveEnd)
4522           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4523                             HSAMD::AssemblerDirectiveEnd);
4524 
4525   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4526     return Error(getParser().getTok().getLoc(),
4527                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4528                  "not available on non-amdhsa OSes")).str());
4529   }
4530 
4531   std::string HSAMetadataString;
4532   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4533                           HSAMetadataString))
4534     return true;
4535 
4536   if (isHsaAbiVersion3(&getSTI())) {
4537     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4538       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4539   } else {
4540     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4541       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4542   }
4543 
4544   return false;
4545 }
4546 
4547 /// Common code to parse out a block of text (typically YAML) between start and
4548 /// end directives.
4549 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4550                                           const char *AssemblerDirectiveEnd,
4551                                           std::string &CollectString) {
4552 
4553   raw_string_ostream CollectStream(CollectString);
4554 
4555   getLexer().setSkipSpace(false);
4556 
4557   bool FoundEnd = false;
4558   while (!getLexer().is(AsmToken::Eof)) {
4559     while (getLexer().is(AsmToken::Space)) {
4560       CollectStream << getLexer().getTok().getString();
4561       Lex();
4562     }
4563 
4564     if (getLexer().is(AsmToken::Identifier)) {
4565       StringRef ID = getLexer().getTok().getIdentifier();
4566       if (ID == AssemblerDirectiveEnd) {
4567         Lex();
4568         FoundEnd = true;
4569         break;
4570       }
4571     }
4572 
4573     CollectStream << Parser.parseStringToEndOfStatement()
4574                   << getContext().getAsmInfo()->getSeparatorString();
4575 
4576     Parser.eatToEndOfStatement();
4577   }
4578 
4579   getLexer().setSkipSpace(true);
4580 
4581   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4582     return TokError(Twine("expected directive ") +
4583                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4584   }
4585 
4586   CollectStream.flush();
4587   return false;
4588 }
4589 
4590 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4591 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4592   std::string String;
4593   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4594                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4595     return true;
4596 
4597   auto PALMetadata = getTargetStreamer().getPALMetadata();
4598   if (!PALMetadata->setFromString(String))
4599     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4600   return false;
4601 }
4602 
4603 /// Parse the assembler directive for old linear-format PAL metadata.
4604 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4605   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4606     return Error(getParser().getTok().getLoc(),
4607                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4608                  "not available on non-amdpal OSes")).str());
4609   }
4610 
4611   auto PALMetadata = getTargetStreamer().getPALMetadata();
4612   PALMetadata->setLegacy();
4613   for (;;) {
4614     uint32_t Key, Value;
4615     if (ParseAsAbsoluteExpression(Key)) {
4616       return TokError(Twine("invalid value in ") +
4617                       Twine(PALMD::AssemblerDirective));
4618     }
4619     if (getLexer().isNot(AsmToken::Comma)) {
4620       return TokError(Twine("expected an even number of values in ") +
4621                       Twine(PALMD::AssemblerDirective));
4622     }
4623     Lex();
4624     if (ParseAsAbsoluteExpression(Value)) {
4625       return TokError(Twine("invalid value in ") +
4626                       Twine(PALMD::AssemblerDirective));
4627     }
4628     PALMetadata->setRegister(Key, Value);
4629     if (getLexer().isNot(AsmToken::Comma))
4630       break;
4631     Lex();
4632   }
4633   return false;
4634 }
4635 
4636 /// ParseDirectiveAMDGPULDS
4637 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4638 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4639   if (getParser().checkForValidSection())
4640     return true;
4641 
4642   StringRef Name;
4643   SMLoc NameLoc = getLexer().getLoc();
4644   if (getParser().parseIdentifier(Name))
4645     return TokError("expected identifier in directive");
4646 
4647   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4648   if (parseToken(AsmToken::Comma, "expected ','"))
4649     return true;
4650 
4651   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4652 
4653   int64_t Size;
4654   SMLoc SizeLoc = getLexer().getLoc();
4655   if (getParser().parseAbsoluteExpression(Size))
4656     return true;
4657   if (Size < 0)
4658     return Error(SizeLoc, "size must be non-negative");
4659   if (Size > LocalMemorySize)
4660     return Error(SizeLoc, "size is too large");
4661 
4662   int64_t Alignment = 4;
4663   if (getLexer().is(AsmToken::Comma)) {
4664     Lex();
4665     SMLoc AlignLoc = getLexer().getLoc();
4666     if (getParser().parseAbsoluteExpression(Alignment))
4667       return true;
4668     if (Alignment < 0 || !isPowerOf2_64(Alignment))
4669       return Error(AlignLoc, "alignment must be a power of two");
4670 
4671     // Alignment larger than the size of LDS is possible in theory, as long
4672     // as the linker manages to place to symbol at address 0, but we do want
4673     // to make sure the alignment fits nicely into a 32-bit integer.
4674     if (Alignment >= 1u << 31)
4675       return Error(AlignLoc, "alignment is too large");
4676   }
4677 
4678   if (parseToken(AsmToken::EndOfStatement,
4679                  "unexpected token in '.amdgpu_lds' directive"))
4680     return true;
4681 
4682   Symbol->redefineIfPossible();
4683   if (!Symbol->isUndefined())
4684     return Error(NameLoc, "invalid symbol redefinition");
4685 
4686   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
4687   return false;
4688 }
4689 
4690 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4691   StringRef IDVal = DirectiveID.getString();
4692 
4693   if (isHsaAbiVersion3(&getSTI())) {
4694     if (IDVal == ".amdgcn_target")
4695       return ParseDirectiveAMDGCNTarget();
4696 
4697     if (IDVal == ".amdhsa_kernel")
4698       return ParseDirectiveAMDHSAKernel();
4699 
4700     // TODO: Restructure/combine with PAL metadata directive.
4701     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4702       return ParseDirectiveHSAMetadata();
4703   } else {
4704     if (IDVal == ".hsa_code_object_version")
4705       return ParseDirectiveHSACodeObjectVersion();
4706 
4707     if (IDVal == ".hsa_code_object_isa")
4708       return ParseDirectiveHSACodeObjectISA();
4709 
4710     if (IDVal == ".amd_kernel_code_t")
4711       return ParseDirectiveAMDKernelCodeT();
4712 
4713     if (IDVal == ".amdgpu_hsa_kernel")
4714       return ParseDirectiveAMDGPUHsaKernel();
4715 
4716     if (IDVal == ".amd_amdgpu_isa")
4717       return ParseDirectiveISAVersion();
4718 
4719     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4720       return ParseDirectiveHSAMetadata();
4721   }
4722 
4723   if (IDVal == ".amdgpu_lds")
4724     return ParseDirectiveAMDGPULDS();
4725 
4726   if (IDVal == PALMD::AssemblerDirectiveBegin)
4727     return ParseDirectivePALMetadataBegin();
4728 
4729   if (IDVal == PALMD::AssemblerDirective)
4730     return ParseDirectivePALMetadata();
4731 
4732   return true;
4733 }
4734 
4735 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4736                                            unsigned RegNo) const {
4737 
4738   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4739        R.isValid(); ++R) {
4740     if (*R == RegNo)
4741       return isGFX9Plus();
4742   }
4743 
4744   // GFX10 has 2 more SGPRs 104 and 105.
4745   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4746        R.isValid(); ++R) {
4747     if (*R == RegNo)
4748       return hasSGPR104_SGPR105();
4749   }
4750 
4751   switch (RegNo) {
4752   case AMDGPU::SRC_SHARED_BASE:
4753   case AMDGPU::SRC_SHARED_LIMIT:
4754   case AMDGPU::SRC_PRIVATE_BASE:
4755   case AMDGPU::SRC_PRIVATE_LIMIT:
4756   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4757     return !isCI() && !isSI() && !isVI();
4758   case AMDGPU::TBA:
4759   case AMDGPU::TBA_LO:
4760   case AMDGPU::TBA_HI:
4761   case AMDGPU::TMA:
4762   case AMDGPU::TMA_LO:
4763   case AMDGPU::TMA_HI:
4764     return !isGFX9() && !isGFX10();
4765   case AMDGPU::XNACK_MASK:
4766   case AMDGPU::XNACK_MASK_LO:
4767   case AMDGPU::XNACK_MASK_HI:
4768     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4769   case AMDGPU::SGPR_NULL:
4770     return isGFX10();
4771   default:
4772     break;
4773   }
4774 
4775   if (isCI())
4776     return true;
4777 
4778   if (isSI() || isGFX10()) {
4779     // No flat_scr on SI.
4780     // On GFX10 flat scratch is not a valid register operand and can only be
4781     // accessed with s_setreg/s_getreg.
4782     switch (RegNo) {
4783     case AMDGPU::FLAT_SCR:
4784     case AMDGPU::FLAT_SCR_LO:
4785     case AMDGPU::FLAT_SCR_HI:
4786       return false;
4787     default:
4788       return true;
4789     }
4790   }
4791 
4792   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4793   // SI/CI have.
4794   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4795        R.isValid(); ++R) {
4796     if (*R == RegNo)
4797       return hasSGPR102_SGPR103();
4798   }
4799 
4800   return true;
4801 }
4802 
4803 OperandMatchResultTy
4804 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4805                               OperandMode Mode) {
4806   // Try to parse with a custom parser
4807   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4808 
4809   // If we successfully parsed the operand or if there as an error parsing,
4810   // we are done.
4811   //
4812   // If we are parsing after we reach EndOfStatement then this means we
4813   // are appending default values to the Operands list.  This is only done
4814   // by custom parser, so we shouldn't continue on to the generic parsing.
4815   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4816       getLexer().is(AsmToken::EndOfStatement))
4817     return ResTy;
4818 
4819   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4820     unsigned Prefix = Operands.size();
4821     SMLoc LBraceLoc = getTok().getLoc();
4822     Parser.Lex(); // eat the '['
4823 
4824     for (;;) {
4825       ResTy = parseReg(Operands);
4826       if (ResTy != MatchOperand_Success)
4827         return ResTy;
4828 
4829       if (getLexer().is(AsmToken::RBrac))
4830         break;
4831 
4832       if (getLexer().isNot(AsmToken::Comma))
4833         return MatchOperand_ParseFail;
4834       Parser.Lex();
4835     }
4836 
4837     if (Operands.size() - Prefix > 1) {
4838       Operands.insert(Operands.begin() + Prefix,
4839                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4840       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4841                                                     getTok().getLoc()));
4842     }
4843 
4844     Parser.Lex(); // eat the ']'
4845     return MatchOperand_Success;
4846   }
4847 
4848   return parseRegOrImm(Operands);
4849 }
4850 
4851 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4852   // Clear any forced encodings from the previous instruction.
4853   setForcedEncodingSize(0);
4854   setForcedDPP(false);
4855   setForcedSDWA(false);
4856 
4857   if (Name.endswith("_e64")) {
4858     setForcedEncodingSize(64);
4859     return Name.substr(0, Name.size() - 4);
4860   } else if (Name.endswith("_e32")) {
4861     setForcedEncodingSize(32);
4862     return Name.substr(0, Name.size() - 4);
4863   } else if (Name.endswith("_dpp")) {
4864     setForcedDPP(true);
4865     return Name.substr(0, Name.size() - 4);
4866   } else if (Name.endswith("_sdwa")) {
4867     setForcedSDWA(true);
4868     return Name.substr(0, Name.size() - 5);
4869   }
4870   return Name;
4871 }
4872 
4873 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4874                                        StringRef Name,
4875                                        SMLoc NameLoc, OperandVector &Operands) {
4876   // Add the instruction mnemonic
4877   Name = parseMnemonicSuffix(Name);
4878   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4879 
4880   bool IsMIMG = Name.startswith("image_");
4881 
4882   while (!getLexer().is(AsmToken::EndOfStatement)) {
4883     OperandMode Mode = OperandMode_Default;
4884     if (IsMIMG && isGFX10() && Operands.size() == 2)
4885       Mode = OperandMode_NSA;
4886     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4887 
4888     // Eat the comma or space if there is one.
4889     if (getLexer().is(AsmToken::Comma))
4890       Parser.Lex();
4891 
4892     if (Res != MatchOperand_Success) {
4893       checkUnsupportedInstruction(Name, NameLoc);
4894       if (!Parser.hasPendingError()) {
4895         // FIXME: use real operand location rather than the current location.
4896         StringRef Msg =
4897           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
4898                                             "not a valid operand.";
4899         Error(getLexer().getLoc(), Msg);
4900       }
4901       while (!getLexer().is(AsmToken::EndOfStatement)) {
4902         Parser.Lex();
4903       }
4904       return true;
4905     }
4906   }
4907 
4908   return false;
4909 }
4910 
4911 //===----------------------------------------------------------------------===//
4912 // Utility functions
4913 //===----------------------------------------------------------------------===//
4914 
4915 OperandMatchResultTy
4916 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4917 
4918   if (!trySkipId(Prefix, AsmToken::Colon))
4919     return MatchOperand_NoMatch;
4920 
4921   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4922 }
4923 
4924 OperandMatchResultTy
4925 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4926                                     AMDGPUOperand::ImmTy ImmTy,
4927                                     bool (*ConvertResult)(int64_t&)) {
4928   SMLoc S = getLoc();
4929   int64_t Value = 0;
4930 
4931   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4932   if (Res != MatchOperand_Success)
4933     return Res;
4934 
4935   if (ConvertResult && !ConvertResult(Value)) {
4936     Error(S, "invalid " + StringRef(Prefix) + " value.");
4937   }
4938 
4939   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4940   return MatchOperand_Success;
4941 }
4942 
4943 OperandMatchResultTy
4944 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4945                                              OperandVector &Operands,
4946                                              AMDGPUOperand::ImmTy ImmTy,
4947                                              bool (*ConvertResult)(int64_t&)) {
4948   SMLoc S = getLoc();
4949   if (!trySkipId(Prefix, AsmToken::Colon))
4950     return MatchOperand_NoMatch;
4951 
4952   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4953     return MatchOperand_ParseFail;
4954 
4955   unsigned Val = 0;
4956   const unsigned MaxSize = 4;
4957 
4958   // FIXME: How to verify the number of elements matches the number of src
4959   // operands?
4960   for (int I = 0; ; ++I) {
4961     int64_t Op;
4962     SMLoc Loc = getLoc();
4963     if (!parseExpr(Op))
4964       return MatchOperand_ParseFail;
4965 
4966     if (Op != 0 && Op != 1) {
4967       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4968       return MatchOperand_ParseFail;
4969     }
4970 
4971     Val |= (Op << I);
4972 
4973     if (trySkipToken(AsmToken::RBrac))
4974       break;
4975 
4976     if (I + 1 == MaxSize) {
4977       Error(getLoc(), "expected a closing square bracket");
4978       return MatchOperand_ParseFail;
4979     }
4980 
4981     if (!skipToken(AsmToken::Comma, "expected a comma"))
4982       return MatchOperand_ParseFail;
4983   }
4984 
4985   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4986   return MatchOperand_Success;
4987 }
4988 
4989 OperandMatchResultTy
4990 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4991                                AMDGPUOperand::ImmTy ImmTy) {
4992   int64_t Bit = 0;
4993   SMLoc S = Parser.getTok().getLoc();
4994 
4995   // We are at the end of the statement, and this is a default argument, so
4996   // use a default value.
4997   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4998     switch(getLexer().getKind()) {
4999       case AsmToken::Identifier: {
5000         StringRef Tok = Parser.getTok().getString();
5001         if (Tok == Name) {
5002           if (Tok == "r128" && !hasMIMG_R128())
5003             Error(S, "r128 modifier is not supported on this GPU");
5004           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
5005             Error(S, "a16 modifier is not supported on this GPU");
5006           Bit = 1;
5007           Parser.Lex();
5008         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
5009           Bit = 0;
5010           Parser.Lex();
5011         } else {
5012           return MatchOperand_NoMatch;
5013         }
5014         break;
5015       }
5016       default:
5017         return MatchOperand_NoMatch;
5018     }
5019   }
5020 
5021   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
5022     return MatchOperand_ParseFail;
5023 
5024   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5025     ImmTy = AMDGPUOperand::ImmTyR128A16;
5026 
5027   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5028   return MatchOperand_Success;
5029 }
5030 
5031 static void addOptionalImmOperand(
5032   MCInst& Inst, const OperandVector& Operands,
5033   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5034   AMDGPUOperand::ImmTy ImmT,
5035   int64_t Default = 0) {
5036   auto i = OptionalIdx.find(ImmT);
5037   if (i != OptionalIdx.end()) {
5038     unsigned Idx = i->second;
5039     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5040   } else {
5041     Inst.addOperand(MCOperand::createImm(Default));
5042   }
5043 }
5044 
5045 OperandMatchResultTy
5046 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
5047   if (getLexer().isNot(AsmToken::Identifier)) {
5048     return MatchOperand_NoMatch;
5049   }
5050   StringRef Tok = Parser.getTok().getString();
5051   if (Tok != Prefix) {
5052     return MatchOperand_NoMatch;
5053   }
5054 
5055   Parser.Lex();
5056   if (getLexer().isNot(AsmToken::Colon)) {
5057     return MatchOperand_ParseFail;
5058   }
5059 
5060   Parser.Lex();
5061   if (getLexer().isNot(AsmToken::Identifier)) {
5062     return MatchOperand_ParseFail;
5063   }
5064 
5065   Value = Parser.getTok().getString();
5066   return MatchOperand_Success;
5067 }
5068 
5069 //===----------------------------------------------------------------------===//
5070 // MTBUF format
5071 //===----------------------------------------------------------------------===//
5072 
5073 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5074                                   int64_t MaxVal,
5075                                   int64_t &Fmt) {
5076   int64_t Val;
5077   SMLoc Loc = getLoc();
5078 
5079   auto Res = parseIntWithPrefix(Pref, Val);
5080   if (Res == MatchOperand_ParseFail)
5081     return false;
5082   if (Res == MatchOperand_NoMatch)
5083     return true;
5084 
5085   if (Val < 0 || Val > MaxVal) {
5086     Error(Loc, Twine("out of range ", StringRef(Pref)));
5087     return false;
5088   }
5089 
5090   Fmt = Val;
5091   return true;
5092 }
5093 
5094 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5095 // values to live in a joint format operand in the MCInst encoding.
5096 OperandMatchResultTy
5097 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5098   using namespace llvm::AMDGPU::MTBUFFormat;
5099 
5100   int64_t Dfmt = DFMT_UNDEF;
5101   int64_t Nfmt = NFMT_UNDEF;
5102 
5103   // dfmt and nfmt can appear in either order, and each is optional.
5104   for (int I = 0; I < 2; ++I) {
5105     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5106       return MatchOperand_ParseFail;
5107 
5108     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5109       return MatchOperand_ParseFail;
5110     }
5111     // Skip optional comma between dfmt/nfmt
5112     // but guard against 2 commas following each other.
5113     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5114         !peekToken().is(AsmToken::Comma)) {
5115       trySkipToken(AsmToken::Comma);
5116     }
5117   }
5118 
5119   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5120     return MatchOperand_NoMatch;
5121 
5122   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5123   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5124 
5125   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5126   return MatchOperand_Success;
5127 }
5128 
5129 OperandMatchResultTy
5130 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5131   using namespace llvm::AMDGPU::MTBUFFormat;
5132 
5133   int64_t Fmt = UFMT_UNDEF;
5134 
5135   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5136     return MatchOperand_ParseFail;
5137 
5138   if (Fmt == UFMT_UNDEF)
5139     return MatchOperand_NoMatch;
5140 
5141   Format = Fmt;
5142   return MatchOperand_Success;
5143 }
5144 
5145 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5146                                     int64_t &Nfmt,
5147                                     StringRef FormatStr,
5148                                     SMLoc Loc) {
5149   using namespace llvm::AMDGPU::MTBUFFormat;
5150   int64_t Format;
5151 
5152   Format = getDfmt(FormatStr);
5153   if (Format != DFMT_UNDEF) {
5154     Dfmt = Format;
5155     return true;
5156   }
5157 
5158   Format = getNfmt(FormatStr, getSTI());
5159   if (Format != NFMT_UNDEF) {
5160     Nfmt = Format;
5161     return true;
5162   }
5163 
5164   Error(Loc, "unsupported format");
5165   return false;
5166 }
5167 
5168 OperandMatchResultTy
5169 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5170                                           SMLoc FormatLoc,
5171                                           int64_t &Format) {
5172   using namespace llvm::AMDGPU::MTBUFFormat;
5173 
5174   int64_t Dfmt = DFMT_UNDEF;
5175   int64_t Nfmt = NFMT_UNDEF;
5176   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5177     return MatchOperand_ParseFail;
5178 
5179   if (trySkipToken(AsmToken::Comma)) {
5180     StringRef Str;
5181     SMLoc Loc = getLoc();
5182     if (!parseId(Str, "expected a format string") ||
5183         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5184       return MatchOperand_ParseFail;
5185     }
5186     if (Dfmt == DFMT_UNDEF) {
5187       Error(Loc, "duplicate numeric format");
5188       return MatchOperand_ParseFail;
5189     } else if (Nfmt == NFMT_UNDEF) {
5190       Error(Loc, "duplicate data format");
5191       return MatchOperand_ParseFail;
5192     }
5193   }
5194 
5195   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5196   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5197 
5198   if (isGFX10()) {
5199     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5200     if (Ufmt == UFMT_UNDEF) {
5201       Error(FormatLoc, "unsupported format");
5202       return MatchOperand_ParseFail;
5203     }
5204     Format = Ufmt;
5205   } else {
5206     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5207   }
5208 
5209   return MatchOperand_Success;
5210 }
5211 
5212 OperandMatchResultTy
5213 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5214                                             SMLoc Loc,
5215                                             int64_t &Format) {
5216   using namespace llvm::AMDGPU::MTBUFFormat;
5217 
5218   auto Id = getUnifiedFormat(FormatStr);
5219   if (Id == UFMT_UNDEF)
5220     return MatchOperand_NoMatch;
5221 
5222   if (!isGFX10()) {
5223     Error(Loc, "unified format is not supported on this GPU");
5224     return MatchOperand_ParseFail;
5225   }
5226 
5227   Format = Id;
5228   return MatchOperand_Success;
5229 }
5230 
5231 OperandMatchResultTy
5232 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5233   using namespace llvm::AMDGPU::MTBUFFormat;
5234   SMLoc Loc = getLoc();
5235 
5236   if (!parseExpr(Format))
5237     return MatchOperand_ParseFail;
5238   if (!isValidFormatEncoding(Format, getSTI())) {
5239     Error(Loc, "out of range format");
5240     return MatchOperand_ParseFail;
5241   }
5242 
5243   return MatchOperand_Success;
5244 }
5245 
5246 OperandMatchResultTy
5247 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5248   using namespace llvm::AMDGPU::MTBUFFormat;
5249 
5250   if (!trySkipId("format", AsmToken::Colon))
5251     return MatchOperand_NoMatch;
5252 
5253   if (trySkipToken(AsmToken::LBrac)) {
5254     StringRef FormatStr;
5255     SMLoc Loc = getLoc();
5256     if (!parseId(FormatStr, "expected a format string"))
5257       return MatchOperand_ParseFail;
5258 
5259     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5260     if (Res == MatchOperand_NoMatch)
5261       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5262     if (Res != MatchOperand_Success)
5263       return Res;
5264 
5265     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5266       return MatchOperand_ParseFail;
5267 
5268     return MatchOperand_Success;
5269   }
5270 
5271   return parseNumericFormat(Format);
5272 }
5273 
5274 OperandMatchResultTy
5275 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5276   using namespace llvm::AMDGPU::MTBUFFormat;
5277 
5278   int64_t Format = getDefaultFormatEncoding(getSTI());
5279   OperandMatchResultTy Res;
5280   SMLoc Loc = getLoc();
5281 
5282   // Parse legacy format syntax.
5283   Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5284   if (Res == MatchOperand_ParseFail)
5285     return Res;
5286 
5287   bool FormatFound = (Res == MatchOperand_Success);
5288 
5289   Operands.push_back(
5290     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5291 
5292   if (FormatFound)
5293     trySkipToken(AsmToken::Comma);
5294 
5295   if (isToken(AsmToken::EndOfStatement)) {
5296     // We are expecting an soffset operand,
5297     // but let matcher handle the error.
5298     return MatchOperand_Success;
5299   }
5300 
5301   // Parse soffset.
5302   Res = parseRegOrImm(Operands);
5303   if (Res != MatchOperand_Success)
5304     return Res;
5305 
5306   trySkipToken(AsmToken::Comma);
5307 
5308   if (!FormatFound) {
5309     Res = parseSymbolicOrNumericFormat(Format);
5310     if (Res == MatchOperand_ParseFail)
5311       return Res;
5312     if (Res == MatchOperand_Success) {
5313       auto Size = Operands.size();
5314       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5315       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5316       Op.setImm(Format);
5317     }
5318     return MatchOperand_Success;
5319   }
5320 
5321   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5322     Error(getLoc(), "duplicate format");
5323     return MatchOperand_ParseFail;
5324   }
5325   return MatchOperand_Success;
5326 }
5327 
5328 //===----------------------------------------------------------------------===//
5329 // ds
5330 //===----------------------------------------------------------------------===//
5331 
5332 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5333                                     const OperandVector &Operands) {
5334   OptionalImmIndexMap OptionalIdx;
5335 
5336   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5337     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5338 
5339     // Add the register arguments
5340     if (Op.isReg()) {
5341       Op.addRegOperands(Inst, 1);
5342       continue;
5343     }
5344 
5345     // Handle optional arguments
5346     OptionalIdx[Op.getImmTy()] = i;
5347   }
5348 
5349   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5350   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5351   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5352 
5353   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5354 }
5355 
5356 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5357                                 bool IsGdsHardcoded) {
5358   OptionalImmIndexMap OptionalIdx;
5359 
5360   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5361     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5362 
5363     // Add the register arguments
5364     if (Op.isReg()) {
5365       Op.addRegOperands(Inst, 1);
5366       continue;
5367     }
5368 
5369     if (Op.isToken() && Op.getToken() == "gds") {
5370       IsGdsHardcoded = true;
5371       continue;
5372     }
5373 
5374     // Handle optional arguments
5375     OptionalIdx[Op.getImmTy()] = i;
5376   }
5377 
5378   AMDGPUOperand::ImmTy OffsetType =
5379     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5380      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5381      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5382                                                       AMDGPUOperand::ImmTyOffset;
5383 
5384   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5385 
5386   if (!IsGdsHardcoded) {
5387     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5388   }
5389   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5390 }
5391 
5392 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5393   OptionalImmIndexMap OptionalIdx;
5394 
5395   unsigned OperandIdx[4];
5396   unsigned EnMask = 0;
5397   int SrcIdx = 0;
5398 
5399   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5400     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5401 
5402     // Add the register arguments
5403     if (Op.isReg()) {
5404       assert(SrcIdx < 4);
5405       OperandIdx[SrcIdx] = Inst.size();
5406       Op.addRegOperands(Inst, 1);
5407       ++SrcIdx;
5408       continue;
5409     }
5410 
5411     if (Op.isOff()) {
5412       assert(SrcIdx < 4);
5413       OperandIdx[SrcIdx] = Inst.size();
5414       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5415       ++SrcIdx;
5416       continue;
5417     }
5418 
5419     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5420       Op.addImmOperands(Inst, 1);
5421       continue;
5422     }
5423 
5424     if (Op.isToken() && Op.getToken() == "done")
5425       continue;
5426 
5427     // Handle optional arguments
5428     OptionalIdx[Op.getImmTy()] = i;
5429   }
5430 
5431   assert(SrcIdx == 4);
5432 
5433   bool Compr = false;
5434   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5435     Compr = true;
5436     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5437     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5438     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5439   }
5440 
5441   for (auto i = 0; i < SrcIdx; ++i) {
5442     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5443       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5444     }
5445   }
5446 
5447   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5448   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5449 
5450   Inst.addOperand(MCOperand::createImm(EnMask));
5451 }
5452 
5453 //===----------------------------------------------------------------------===//
5454 // s_waitcnt
5455 //===----------------------------------------------------------------------===//
5456 
5457 static bool
5458 encodeCnt(
5459   const AMDGPU::IsaVersion ISA,
5460   int64_t &IntVal,
5461   int64_t CntVal,
5462   bool Saturate,
5463   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5464   unsigned (*decode)(const IsaVersion &Version, unsigned))
5465 {
5466   bool Failed = false;
5467 
5468   IntVal = encode(ISA, IntVal, CntVal);
5469   if (CntVal != decode(ISA, IntVal)) {
5470     if (Saturate) {
5471       IntVal = encode(ISA, IntVal, -1);
5472     } else {
5473       Failed = true;
5474     }
5475   }
5476   return Failed;
5477 }
5478 
5479 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5480 
5481   SMLoc CntLoc = getLoc();
5482   StringRef CntName = getTokenStr();
5483 
5484   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5485       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5486     return false;
5487 
5488   int64_t CntVal;
5489   SMLoc ValLoc = getLoc();
5490   if (!parseExpr(CntVal))
5491     return false;
5492 
5493   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5494 
5495   bool Failed = true;
5496   bool Sat = CntName.endswith("_sat");
5497 
5498   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5499     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5500   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5501     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5502   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5503     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5504   } else {
5505     Error(CntLoc, "invalid counter name " + CntName);
5506     return false;
5507   }
5508 
5509   if (Failed) {
5510     Error(ValLoc, "too large value for " + CntName);
5511     return false;
5512   }
5513 
5514   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5515     return false;
5516 
5517   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5518     if (isToken(AsmToken::EndOfStatement)) {
5519       Error(getLoc(), "expected a counter name");
5520       return false;
5521     }
5522   }
5523 
5524   return true;
5525 }
5526 
5527 OperandMatchResultTy
5528 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5529   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5530   int64_t Waitcnt = getWaitcntBitMask(ISA);
5531   SMLoc S = getLoc();
5532 
5533   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5534     while (!isToken(AsmToken::EndOfStatement)) {
5535       if (!parseCnt(Waitcnt))
5536         return MatchOperand_ParseFail;
5537     }
5538   } else {
5539     if (!parseExpr(Waitcnt))
5540       return MatchOperand_ParseFail;
5541   }
5542 
5543   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5544   return MatchOperand_Success;
5545 }
5546 
5547 bool
5548 AMDGPUOperand::isSWaitCnt() const {
5549   return isImm();
5550 }
5551 
5552 //===----------------------------------------------------------------------===//
5553 // hwreg
5554 //===----------------------------------------------------------------------===//
5555 
5556 bool
5557 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5558                                 int64_t &Offset,
5559                                 int64_t &Width) {
5560   using namespace llvm::AMDGPU::Hwreg;
5561 
5562   // The register may be specified by name or using a numeric code
5563   if (isToken(AsmToken::Identifier) &&
5564       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5565     HwReg.IsSymbolic = true;
5566     lex(); // skip message name
5567   } else if (!parseExpr(HwReg.Id)) {
5568     return false;
5569   }
5570 
5571   if (trySkipToken(AsmToken::RParen))
5572     return true;
5573 
5574   // parse optional params
5575   return
5576     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
5577     parseExpr(Offset) &&
5578     skipToken(AsmToken::Comma, "expected a comma") &&
5579     parseExpr(Width) &&
5580     skipToken(AsmToken::RParen, "expected a closing parenthesis");
5581 }
5582 
5583 bool
5584 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5585                                const int64_t Offset,
5586                                const int64_t Width,
5587                                const SMLoc Loc) {
5588 
5589   using namespace llvm::AMDGPU::Hwreg;
5590 
5591   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5592     Error(Loc, "specified hardware register is not supported on this GPU");
5593     return false;
5594   } else if (!isValidHwreg(HwReg.Id)) {
5595     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
5596     return false;
5597   } else if (!isValidHwregOffset(Offset)) {
5598     Error(Loc, "invalid bit offset: only 5-bit values are legal");
5599     return false;
5600   } else if (!isValidHwregWidth(Width)) {
5601     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
5602     return false;
5603   }
5604   return true;
5605 }
5606 
5607 OperandMatchResultTy
5608 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5609   using namespace llvm::AMDGPU::Hwreg;
5610 
5611   int64_t ImmVal = 0;
5612   SMLoc Loc = getLoc();
5613 
5614   if (trySkipId("hwreg", AsmToken::LParen)) {
5615     OperandInfoTy HwReg(ID_UNKNOWN_);
5616     int64_t Offset = OFFSET_DEFAULT_;
5617     int64_t Width = WIDTH_DEFAULT_;
5618     if (parseHwregBody(HwReg, Offset, Width) &&
5619         validateHwreg(HwReg, Offset, Width, Loc)) {
5620       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5621     } else {
5622       return MatchOperand_ParseFail;
5623     }
5624   } else if (parseExpr(ImmVal)) {
5625     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5626       Error(Loc, "invalid immediate: only 16-bit values are legal");
5627       return MatchOperand_ParseFail;
5628     }
5629   } else {
5630     return MatchOperand_ParseFail;
5631   }
5632 
5633   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5634   return MatchOperand_Success;
5635 }
5636 
5637 bool AMDGPUOperand::isHwreg() const {
5638   return isImmTy(ImmTyHwreg);
5639 }
5640 
5641 //===----------------------------------------------------------------------===//
5642 // sendmsg
5643 //===----------------------------------------------------------------------===//
5644 
5645 bool
5646 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5647                                   OperandInfoTy &Op,
5648                                   OperandInfoTy &Stream) {
5649   using namespace llvm::AMDGPU::SendMsg;
5650 
5651   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5652     Msg.IsSymbolic = true;
5653     lex(); // skip message name
5654   } else if (!parseExpr(Msg.Id)) {
5655     return false;
5656   }
5657 
5658   if (trySkipToken(AsmToken::Comma)) {
5659     Op.IsDefined = true;
5660     if (isToken(AsmToken::Identifier) &&
5661         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5662       lex(); // skip operation name
5663     } else if (!parseExpr(Op.Id)) {
5664       return false;
5665     }
5666 
5667     if (trySkipToken(AsmToken::Comma)) {
5668       Stream.IsDefined = true;
5669       if (!parseExpr(Stream.Id))
5670         return false;
5671     }
5672   }
5673 
5674   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5675 }
5676 
5677 bool
5678 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5679                                  const OperandInfoTy &Op,
5680                                  const OperandInfoTy &Stream,
5681                                  const SMLoc S) {
5682   using namespace llvm::AMDGPU::SendMsg;
5683 
5684   // Validation strictness depends on whether message is specified
5685   // in a symbolc or in a numeric form. In the latter case
5686   // only encoding possibility is checked.
5687   bool Strict = Msg.IsSymbolic;
5688 
5689   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5690     Error(S, "invalid message id");
5691     return false;
5692   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5693     Error(S, Op.IsDefined ?
5694              "message does not support operations" :
5695              "missing message operation");
5696     return false;
5697   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5698     Error(S, "invalid operation id");
5699     return false;
5700   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5701     Error(S, "message operation does not support streams");
5702     return false;
5703   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5704     Error(S, "invalid message stream id");
5705     return false;
5706   }
5707   return true;
5708 }
5709 
5710 OperandMatchResultTy
5711 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5712   using namespace llvm::AMDGPU::SendMsg;
5713 
5714   int64_t ImmVal = 0;
5715   SMLoc Loc = getLoc();
5716 
5717   if (trySkipId("sendmsg", AsmToken::LParen)) {
5718     OperandInfoTy Msg(ID_UNKNOWN_);
5719     OperandInfoTy Op(OP_NONE_);
5720     OperandInfoTy Stream(STREAM_ID_NONE_);
5721     if (parseSendMsgBody(Msg, Op, Stream) &&
5722         validateSendMsg(Msg, Op, Stream, Loc)) {
5723       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5724     } else {
5725       return MatchOperand_ParseFail;
5726     }
5727   } else if (parseExpr(ImmVal)) {
5728     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5729       Error(Loc, "invalid immediate: only 16-bit values are legal");
5730       return MatchOperand_ParseFail;
5731     }
5732   } else {
5733     return MatchOperand_ParseFail;
5734   }
5735 
5736   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5737   return MatchOperand_Success;
5738 }
5739 
5740 bool AMDGPUOperand::isSendMsg() const {
5741   return isImmTy(ImmTySendMsg);
5742 }
5743 
5744 //===----------------------------------------------------------------------===//
5745 // v_interp
5746 //===----------------------------------------------------------------------===//
5747 
5748 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5749   if (getLexer().getKind() != AsmToken::Identifier)
5750     return MatchOperand_NoMatch;
5751 
5752   StringRef Str = Parser.getTok().getString();
5753   int Slot = StringSwitch<int>(Str)
5754     .Case("p10", 0)
5755     .Case("p20", 1)
5756     .Case("p0", 2)
5757     .Default(-1);
5758 
5759   SMLoc S = Parser.getTok().getLoc();
5760   if (Slot == -1)
5761     return MatchOperand_ParseFail;
5762 
5763   Parser.Lex();
5764   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5765                                               AMDGPUOperand::ImmTyInterpSlot));
5766   return MatchOperand_Success;
5767 }
5768 
5769 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5770   if (getLexer().getKind() != AsmToken::Identifier)
5771     return MatchOperand_NoMatch;
5772 
5773   StringRef Str = Parser.getTok().getString();
5774   if (!Str.startswith("attr"))
5775     return MatchOperand_NoMatch;
5776 
5777   StringRef Chan = Str.take_back(2);
5778   int AttrChan = StringSwitch<int>(Chan)
5779     .Case(".x", 0)
5780     .Case(".y", 1)
5781     .Case(".z", 2)
5782     .Case(".w", 3)
5783     .Default(-1);
5784   if (AttrChan == -1)
5785     return MatchOperand_ParseFail;
5786 
5787   Str = Str.drop_back(2).drop_front(4);
5788 
5789   uint8_t Attr;
5790   if (Str.getAsInteger(10, Attr))
5791     return MatchOperand_ParseFail;
5792 
5793   SMLoc S = Parser.getTok().getLoc();
5794   Parser.Lex();
5795   if (Attr > 63) {
5796     Error(S, "out of bounds attr");
5797     return MatchOperand_ParseFail;
5798   }
5799 
5800   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5801 
5802   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5803                                               AMDGPUOperand::ImmTyInterpAttr));
5804   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5805                                               AMDGPUOperand::ImmTyAttrChan));
5806   return MatchOperand_Success;
5807 }
5808 
5809 //===----------------------------------------------------------------------===//
5810 // exp
5811 //===----------------------------------------------------------------------===//
5812 
5813 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5814                                                       uint8_t &Val) {
5815   if (Str == "null") {
5816     Val = 9;
5817     return MatchOperand_Success;
5818   }
5819 
5820   if (Str.startswith("mrt")) {
5821     Str = Str.drop_front(3);
5822     if (Str == "z") { // == mrtz
5823       Val = 8;
5824       return MatchOperand_Success;
5825     }
5826 
5827     if (Str.getAsInteger(10, Val))
5828       return MatchOperand_ParseFail;
5829 
5830     if (Val > 7) {
5831       Error(getLoc(), "invalid exp target");
5832       return MatchOperand_ParseFail;
5833     }
5834 
5835     return MatchOperand_Success;
5836   }
5837 
5838   if (Str.startswith("pos")) {
5839     Str = Str.drop_front(3);
5840     if (Str.getAsInteger(10, Val))
5841       return MatchOperand_ParseFail;
5842 
5843     if (Val > 4 || (Val == 4 && !isGFX10())) {
5844       Error(getLoc(), "invalid exp target");
5845       return MatchOperand_ParseFail;
5846     }
5847 
5848     Val += 12;
5849     return MatchOperand_Success;
5850   }
5851 
5852   if (isGFX10() && Str == "prim") {
5853     Val = 20;
5854     return MatchOperand_Success;
5855   }
5856 
5857   if (Str.startswith("param")) {
5858     Str = Str.drop_front(5);
5859     if (Str.getAsInteger(10, Val))
5860       return MatchOperand_ParseFail;
5861 
5862     if (Val >= 32) {
5863       Error(getLoc(), "invalid exp target");
5864       return MatchOperand_ParseFail;
5865     }
5866 
5867     Val += 32;
5868     return MatchOperand_Success;
5869   }
5870 
5871   if (Str.startswith("invalid_target_")) {
5872     Str = Str.drop_front(15);
5873     if (Str.getAsInteger(10, Val))
5874       return MatchOperand_ParseFail;
5875 
5876     Error(getLoc(), "invalid exp target");
5877     return MatchOperand_ParseFail;
5878   }
5879 
5880   return MatchOperand_NoMatch;
5881 }
5882 
5883 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5884   uint8_t Val;
5885   StringRef Str = Parser.getTok().getString();
5886 
5887   auto Res = parseExpTgtImpl(Str, Val);
5888   if (Res != MatchOperand_Success)
5889     return Res;
5890 
5891   SMLoc S = Parser.getTok().getLoc();
5892   Parser.Lex();
5893 
5894   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5895                                               AMDGPUOperand::ImmTyExpTgt));
5896   return MatchOperand_Success;
5897 }
5898 
5899 //===----------------------------------------------------------------------===//
5900 // parser helpers
5901 //===----------------------------------------------------------------------===//
5902 
5903 bool
5904 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5905   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5906 }
5907 
5908 bool
5909 AMDGPUAsmParser::isId(const StringRef Id) const {
5910   return isId(getToken(), Id);
5911 }
5912 
5913 bool
5914 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5915   return getTokenKind() == Kind;
5916 }
5917 
5918 bool
5919 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5920   if (isId(Id)) {
5921     lex();
5922     return true;
5923   }
5924   return false;
5925 }
5926 
5927 bool
5928 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5929   if (isId(Id) && peekToken().is(Kind)) {
5930     lex();
5931     lex();
5932     return true;
5933   }
5934   return false;
5935 }
5936 
5937 bool
5938 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5939   if (isToken(Kind)) {
5940     lex();
5941     return true;
5942   }
5943   return false;
5944 }
5945 
5946 bool
5947 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5948                            const StringRef ErrMsg) {
5949   if (!trySkipToken(Kind)) {
5950     Error(getLoc(), ErrMsg);
5951     return false;
5952   }
5953   return true;
5954 }
5955 
5956 bool
5957 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5958   return !getParser().parseAbsoluteExpression(Imm);
5959 }
5960 
5961 bool
5962 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5963   SMLoc S = getLoc();
5964 
5965   const MCExpr *Expr;
5966   if (Parser.parseExpression(Expr))
5967     return false;
5968 
5969   int64_t IntVal;
5970   if (Expr->evaluateAsAbsolute(IntVal)) {
5971     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5972   } else {
5973     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5974   }
5975   return true;
5976 }
5977 
5978 bool
5979 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5980   if (isToken(AsmToken::String)) {
5981     Val = getToken().getStringContents();
5982     lex();
5983     return true;
5984   } else {
5985     Error(getLoc(), ErrMsg);
5986     return false;
5987   }
5988 }
5989 
5990 bool
5991 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
5992   if (isToken(AsmToken::Identifier)) {
5993     Val = getTokenStr();
5994     lex();
5995     return true;
5996   } else {
5997     Error(getLoc(), ErrMsg);
5998     return false;
5999   }
6000 }
6001 
6002 AsmToken
6003 AMDGPUAsmParser::getToken() const {
6004   return Parser.getTok();
6005 }
6006 
6007 AsmToken
6008 AMDGPUAsmParser::peekToken() {
6009   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6010 }
6011 
6012 void
6013 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6014   auto TokCount = getLexer().peekTokens(Tokens);
6015 
6016   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6017     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6018 }
6019 
6020 AsmToken::TokenKind
6021 AMDGPUAsmParser::getTokenKind() const {
6022   return getLexer().getKind();
6023 }
6024 
6025 SMLoc
6026 AMDGPUAsmParser::getLoc() const {
6027   return getToken().getLoc();
6028 }
6029 
6030 StringRef
6031 AMDGPUAsmParser::getTokenStr() const {
6032   return getToken().getString();
6033 }
6034 
6035 void
6036 AMDGPUAsmParser::lex() {
6037   Parser.Lex();
6038 }
6039 
6040 //===----------------------------------------------------------------------===//
6041 // swizzle
6042 //===----------------------------------------------------------------------===//
6043 
6044 LLVM_READNONE
6045 static unsigned
6046 encodeBitmaskPerm(const unsigned AndMask,
6047                   const unsigned OrMask,
6048                   const unsigned XorMask) {
6049   using namespace llvm::AMDGPU::Swizzle;
6050 
6051   return BITMASK_PERM_ENC |
6052          (AndMask << BITMASK_AND_SHIFT) |
6053          (OrMask  << BITMASK_OR_SHIFT)  |
6054          (XorMask << BITMASK_XOR_SHIFT);
6055 }
6056 
6057 bool
6058 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6059                                       const unsigned MinVal,
6060                                       const unsigned MaxVal,
6061                                       const StringRef ErrMsg) {
6062   for (unsigned i = 0; i < OpNum; ++i) {
6063     if (!skipToken(AsmToken::Comma, "expected a comma")){
6064       return false;
6065     }
6066     SMLoc ExprLoc = Parser.getTok().getLoc();
6067     if (!parseExpr(Op[i])) {
6068       return false;
6069     }
6070     if (Op[i] < MinVal || Op[i] > MaxVal) {
6071       Error(ExprLoc, ErrMsg);
6072       return false;
6073     }
6074   }
6075 
6076   return true;
6077 }
6078 
6079 bool
6080 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6081   using namespace llvm::AMDGPU::Swizzle;
6082 
6083   int64_t Lane[LANE_NUM];
6084   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6085                            "expected a 2-bit lane id")) {
6086     Imm = QUAD_PERM_ENC;
6087     for (unsigned I = 0; I < LANE_NUM; ++I) {
6088       Imm |= Lane[I] << (LANE_SHIFT * I);
6089     }
6090     return true;
6091   }
6092   return false;
6093 }
6094 
6095 bool
6096 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6097   using namespace llvm::AMDGPU::Swizzle;
6098 
6099   SMLoc S = Parser.getTok().getLoc();
6100   int64_t GroupSize;
6101   int64_t LaneIdx;
6102 
6103   if (!parseSwizzleOperands(1, &GroupSize,
6104                             2, 32,
6105                             "group size must be in the interval [2,32]")) {
6106     return false;
6107   }
6108   if (!isPowerOf2_64(GroupSize)) {
6109     Error(S, "group size must be a power of two");
6110     return false;
6111   }
6112   if (parseSwizzleOperands(1, &LaneIdx,
6113                            0, GroupSize - 1,
6114                            "lane id must be in the interval [0,group size - 1]")) {
6115     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6116     return true;
6117   }
6118   return false;
6119 }
6120 
6121 bool
6122 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6123   using namespace llvm::AMDGPU::Swizzle;
6124 
6125   SMLoc S = Parser.getTok().getLoc();
6126   int64_t GroupSize;
6127 
6128   if (!parseSwizzleOperands(1, &GroupSize,
6129       2, 32, "group size must be in the interval [2,32]")) {
6130     return false;
6131   }
6132   if (!isPowerOf2_64(GroupSize)) {
6133     Error(S, "group size must be a power of two");
6134     return false;
6135   }
6136 
6137   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6138   return true;
6139 }
6140 
6141 bool
6142 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6143   using namespace llvm::AMDGPU::Swizzle;
6144 
6145   SMLoc S = Parser.getTok().getLoc();
6146   int64_t GroupSize;
6147 
6148   if (!parseSwizzleOperands(1, &GroupSize,
6149       1, 16, "group size must be in the interval [1,16]")) {
6150     return false;
6151   }
6152   if (!isPowerOf2_64(GroupSize)) {
6153     Error(S, "group size must be a power of two");
6154     return false;
6155   }
6156 
6157   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6158   return true;
6159 }
6160 
6161 bool
6162 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6163   using namespace llvm::AMDGPU::Swizzle;
6164 
6165   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6166     return false;
6167   }
6168 
6169   StringRef Ctl;
6170   SMLoc StrLoc = Parser.getTok().getLoc();
6171   if (!parseString(Ctl)) {
6172     return false;
6173   }
6174   if (Ctl.size() != BITMASK_WIDTH) {
6175     Error(StrLoc, "expected a 5-character mask");
6176     return false;
6177   }
6178 
6179   unsigned AndMask = 0;
6180   unsigned OrMask = 0;
6181   unsigned XorMask = 0;
6182 
6183   for (size_t i = 0; i < Ctl.size(); ++i) {
6184     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6185     switch(Ctl[i]) {
6186     default:
6187       Error(StrLoc, "invalid mask");
6188       return false;
6189     case '0':
6190       break;
6191     case '1':
6192       OrMask |= Mask;
6193       break;
6194     case 'p':
6195       AndMask |= Mask;
6196       break;
6197     case 'i':
6198       AndMask |= Mask;
6199       XorMask |= Mask;
6200       break;
6201     }
6202   }
6203 
6204   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6205   return true;
6206 }
6207 
6208 bool
6209 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6210 
6211   SMLoc OffsetLoc = Parser.getTok().getLoc();
6212 
6213   if (!parseExpr(Imm)) {
6214     return false;
6215   }
6216   if (!isUInt<16>(Imm)) {
6217     Error(OffsetLoc, "expected a 16-bit offset");
6218     return false;
6219   }
6220   return true;
6221 }
6222 
6223 bool
6224 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6225   using namespace llvm::AMDGPU::Swizzle;
6226 
6227   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6228 
6229     SMLoc ModeLoc = Parser.getTok().getLoc();
6230     bool Ok = false;
6231 
6232     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6233       Ok = parseSwizzleQuadPerm(Imm);
6234     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6235       Ok = parseSwizzleBitmaskPerm(Imm);
6236     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6237       Ok = parseSwizzleBroadcast(Imm);
6238     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6239       Ok = parseSwizzleSwap(Imm);
6240     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6241       Ok = parseSwizzleReverse(Imm);
6242     } else {
6243       Error(ModeLoc, "expected a swizzle mode");
6244     }
6245 
6246     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6247   }
6248 
6249   return false;
6250 }
6251 
6252 OperandMatchResultTy
6253 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6254   SMLoc S = Parser.getTok().getLoc();
6255   int64_t Imm = 0;
6256 
6257   if (trySkipId("offset")) {
6258 
6259     bool Ok = false;
6260     if (skipToken(AsmToken::Colon, "expected a colon")) {
6261       if (trySkipId("swizzle")) {
6262         Ok = parseSwizzleMacro(Imm);
6263       } else {
6264         Ok = parseSwizzleOffset(Imm);
6265       }
6266     }
6267 
6268     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6269 
6270     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6271   } else {
6272     // Swizzle "offset" operand is optional.
6273     // If it is omitted, try parsing other optional operands.
6274     return parseOptionalOpr(Operands);
6275   }
6276 }
6277 
6278 bool
6279 AMDGPUOperand::isSwizzle() const {
6280   return isImmTy(ImmTySwizzle);
6281 }
6282 
6283 //===----------------------------------------------------------------------===//
6284 // VGPR Index Mode
6285 //===----------------------------------------------------------------------===//
6286 
6287 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6288 
6289   using namespace llvm::AMDGPU::VGPRIndexMode;
6290 
6291   if (trySkipToken(AsmToken::RParen)) {
6292     return OFF;
6293   }
6294 
6295   int64_t Imm = 0;
6296 
6297   while (true) {
6298     unsigned Mode = 0;
6299     SMLoc S = Parser.getTok().getLoc();
6300 
6301     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6302       if (trySkipId(IdSymbolic[ModeId])) {
6303         Mode = 1 << ModeId;
6304         break;
6305       }
6306     }
6307 
6308     if (Mode == 0) {
6309       Error(S, (Imm == 0)?
6310                "expected a VGPR index mode or a closing parenthesis" :
6311                "expected a VGPR index mode");
6312       return UNDEF;
6313     }
6314 
6315     if (Imm & Mode) {
6316       Error(S, "duplicate VGPR index mode");
6317       return UNDEF;
6318     }
6319     Imm |= Mode;
6320 
6321     if (trySkipToken(AsmToken::RParen))
6322       break;
6323     if (!skipToken(AsmToken::Comma,
6324                    "expected a comma or a closing parenthesis"))
6325       return UNDEF;
6326   }
6327 
6328   return Imm;
6329 }
6330 
6331 OperandMatchResultTy
6332 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6333 
6334   using namespace llvm::AMDGPU::VGPRIndexMode;
6335 
6336   int64_t Imm = 0;
6337   SMLoc S = Parser.getTok().getLoc();
6338 
6339   if (getLexer().getKind() == AsmToken::Identifier &&
6340       Parser.getTok().getString() == "gpr_idx" &&
6341       getLexer().peekTok().is(AsmToken::LParen)) {
6342 
6343     Parser.Lex();
6344     Parser.Lex();
6345 
6346     Imm = parseGPRIdxMacro();
6347     if (Imm == UNDEF)
6348       return MatchOperand_ParseFail;
6349 
6350   } else {
6351     if (getParser().parseAbsoluteExpression(Imm))
6352       return MatchOperand_ParseFail;
6353     if (Imm < 0 || !isUInt<4>(Imm)) {
6354       Error(S, "invalid immediate: only 4-bit values are legal");
6355       return MatchOperand_ParseFail;
6356     }
6357   }
6358 
6359   Operands.push_back(
6360       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6361   return MatchOperand_Success;
6362 }
6363 
6364 bool AMDGPUOperand::isGPRIdxMode() const {
6365   return isImmTy(ImmTyGprIdxMode);
6366 }
6367 
6368 //===----------------------------------------------------------------------===//
6369 // sopp branch targets
6370 //===----------------------------------------------------------------------===//
6371 
6372 OperandMatchResultTy
6373 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6374 
6375   // Make sure we are not parsing something
6376   // that looks like a label or an expression but is not.
6377   // This will improve error messages.
6378   if (isRegister() || isModifier())
6379     return MatchOperand_NoMatch;
6380 
6381   if (!parseExpr(Operands))
6382     return MatchOperand_ParseFail;
6383 
6384   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6385   assert(Opr.isImm() || Opr.isExpr());
6386   SMLoc Loc = Opr.getStartLoc();
6387 
6388   // Currently we do not support arbitrary expressions as branch targets.
6389   // Only labels and absolute expressions are accepted.
6390   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6391     Error(Loc, "expected an absolute expression or a label");
6392   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6393     Error(Loc, "expected a 16-bit signed jump offset");
6394   }
6395 
6396   return MatchOperand_Success;
6397 }
6398 
6399 //===----------------------------------------------------------------------===//
6400 // Boolean holding registers
6401 //===----------------------------------------------------------------------===//
6402 
6403 OperandMatchResultTy
6404 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6405   return parseReg(Operands);
6406 }
6407 
6408 //===----------------------------------------------------------------------===//
6409 // mubuf
6410 //===----------------------------------------------------------------------===//
6411 
6412 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
6413   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
6414 }
6415 
6416 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
6417   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
6418 }
6419 
6420 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
6421   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
6422 }
6423 
6424 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6425                                const OperandVector &Operands,
6426                                bool IsAtomic,
6427                                bool IsAtomicReturn,
6428                                bool IsLds) {
6429   bool IsLdsOpcode = IsLds;
6430   bool HasLdsModifier = false;
6431   OptionalImmIndexMap OptionalIdx;
6432   assert(IsAtomicReturn ? IsAtomic : true);
6433   unsigned FirstOperandIdx = 1;
6434 
6435   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6436     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6437 
6438     // Add the register arguments
6439     if (Op.isReg()) {
6440       Op.addRegOperands(Inst, 1);
6441       // Insert a tied src for atomic return dst.
6442       // This cannot be postponed as subsequent calls to
6443       // addImmOperands rely on correct number of MC operands.
6444       if (IsAtomicReturn && i == FirstOperandIdx)
6445         Op.addRegOperands(Inst, 1);
6446       continue;
6447     }
6448 
6449     // Handle the case where soffset is an immediate
6450     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6451       Op.addImmOperands(Inst, 1);
6452       continue;
6453     }
6454 
6455     HasLdsModifier |= Op.isLDS();
6456 
6457     // Handle tokens like 'offen' which are sometimes hard-coded into the
6458     // asm string.  There are no MCInst operands for these.
6459     if (Op.isToken()) {
6460       continue;
6461     }
6462     assert(Op.isImm());
6463 
6464     // Handle optional arguments
6465     OptionalIdx[Op.getImmTy()] = i;
6466   }
6467 
6468   // This is a workaround for an llvm quirk which may result in an
6469   // incorrect instruction selection. Lds and non-lds versions of
6470   // MUBUF instructions are identical except that lds versions
6471   // have mandatory 'lds' modifier. However this modifier follows
6472   // optional modifiers and llvm asm matcher regards this 'lds'
6473   // modifier as an optional one. As a result, an lds version
6474   // of opcode may be selected even if it has no 'lds' modifier.
6475   if (IsLdsOpcode && !HasLdsModifier) {
6476     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6477     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6478       Inst.setOpcode(NoLdsOpcode);
6479       IsLdsOpcode = false;
6480     }
6481   }
6482 
6483   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6484   if (!IsAtomic) { // glc is hard-coded.
6485     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6486   }
6487   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6488 
6489   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6490     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6491   }
6492 
6493   if (isGFX10())
6494     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6495 }
6496 
6497 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6498   OptionalImmIndexMap OptionalIdx;
6499 
6500   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6501     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6502 
6503     // Add the register arguments
6504     if (Op.isReg()) {
6505       Op.addRegOperands(Inst, 1);
6506       continue;
6507     }
6508 
6509     // Handle the case where soffset is an immediate
6510     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6511       Op.addImmOperands(Inst, 1);
6512       continue;
6513     }
6514 
6515     // Handle tokens like 'offen' which are sometimes hard-coded into the
6516     // asm string.  There are no MCInst operands for these.
6517     if (Op.isToken()) {
6518       continue;
6519     }
6520     assert(Op.isImm());
6521 
6522     // Handle optional arguments
6523     OptionalIdx[Op.getImmTy()] = i;
6524   }
6525 
6526   addOptionalImmOperand(Inst, Operands, OptionalIdx,
6527                         AMDGPUOperand::ImmTyOffset);
6528   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6529   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6530   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6531   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6532 
6533   if (isGFX10())
6534     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6535 }
6536 
6537 //===----------------------------------------------------------------------===//
6538 // mimg
6539 //===----------------------------------------------------------------------===//
6540 
6541 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6542                               bool IsAtomic) {
6543   unsigned I = 1;
6544   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6545   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6546     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6547   }
6548 
6549   if (IsAtomic) {
6550     // Add src, same as dst
6551     assert(Desc.getNumDefs() == 1);
6552     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6553   }
6554 
6555   OptionalImmIndexMap OptionalIdx;
6556 
6557   for (unsigned E = Operands.size(); I != E; ++I) {
6558     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6559 
6560     // Add the register arguments
6561     if (Op.isReg()) {
6562       Op.addRegOperands(Inst, 1);
6563     } else if (Op.isImmModifier()) {
6564       OptionalIdx[Op.getImmTy()] = I;
6565     } else if (!Op.isToken()) {
6566       llvm_unreachable("unexpected operand type");
6567     }
6568   }
6569 
6570   bool IsGFX10 = isGFX10();
6571 
6572   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6573   if (IsGFX10)
6574     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6575   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6576   if (IsGFX10)
6577     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6578   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6579   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6580   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6581   if (IsGFX10)
6582     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6583   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6584   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6585   if (!IsGFX10)
6586     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6587   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6588 }
6589 
6590 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6591   cvtMIMG(Inst, Operands, true);
6592 }
6593 
6594 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
6595                                       const OperandVector &Operands) {
6596   for (unsigned I = 1; I < Operands.size(); ++I) {
6597     auto &Operand = (AMDGPUOperand &)*Operands[I];
6598     if (Operand.isReg())
6599       Operand.addRegOperands(Inst, 1);
6600   }
6601 
6602   Inst.addOperand(MCOperand::createImm(1)); // a16
6603 }
6604 
6605 //===----------------------------------------------------------------------===//
6606 // smrd
6607 //===----------------------------------------------------------------------===//
6608 
6609 bool AMDGPUOperand::isSMRDOffset8() const {
6610   return isImm() && isUInt<8>(getImm());
6611 }
6612 
6613 bool AMDGPUOperand::isSMEMOffset() const {
6614   return isImm(); // Offset range is checked later by validator.
6615 }
6616 
6617 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6618   // 32-bit literals are only supported on CI and we only want to use them
6619   // when the offset is > 8-bits.
6620   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6621 }
6622 
6623 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6624   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6625 }
6626 
6627 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
6628   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6629 }
6630 
6631 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6632   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6633 }
6634 
6635 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6636   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6637 }
6638 
6639 //===----------------------------------------------------------------------===//
6640 // vop3
6641 //===----------------------------------------------------------------------===//
6642 
6643 static bool ConvertOmodMul(int64_t &Mul) {
6644   if (Mul != 1 && Mul != 2 && Mul != 4)
6645     return false;
6646 
6647   Mul >>= 1;
6648   return true;
6649 }
6650 
6651 static bool ConvertOmodDiv(int64_t &Div) {
6652   if (Div == 1) {
6653     Div = 0;
6654     return true;
6655   }
6656 
6657   if (Div == 2) {
6658     Div = 3;
6659     return true;
6660   }
6661 
6662   return false;
6663 }
6664 
6665 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6666   if (BoundCtrl == 0) {
6667     BoundCtrl = 1;
6668     return true;
6669   }
6670 
6671   if (BoundCtrl == -1) {
6672     BoundCtrl = 0;
6673     return true;
6674   }
6675 
6676   return false;
6677 }
6678 
6679 // Note: the order in this table matches the order of operands in AsmString.
6680 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6681   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6682   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6683   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6684   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6685   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6686   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6687   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6688   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6689   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6690   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6691   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6692   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6693   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6694   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6695   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6696   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6697   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6698   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6699   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6700   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6701   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6702   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6703   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6704   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6705   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6706   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6707   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6708   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6709   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6710   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6711   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6712   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6713   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6714   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6715   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6716   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6717   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6718   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6719   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6720   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6721   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6722   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6723   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6724 };
6725 
6726 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6727 
6728   OperandMatchResultTy res = parseOptionalOpr(Operands);
6729 
6730   // This is a hack to enable hardcoded mandatory operands which follow
6731   // optional operands.
6732   //
6733   // Current design assumes that all operands after the first optional operand
6734   // are also optional. However implementation of some instructions violates
6735   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6736   //
6737   // To alleviate this problem, we have to (implicitly) parse extra operands
6738   // to make sure autogenerated parser of custom operands never hit hardcoded
6739   // mandatory operands.
6740 
6741   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6742     if (res != MatchOperand_Success ||
6743         isToken(AsmToken::EndOfStatement))
6744       break;
6745 
6746     trySkipToken(AsmToken::Comma);
6747     res = parseOptionalOpr(Operands);
6748   }
6749 
6750   return res;
6751 }
6752 
6753 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6754   OperandMatchResultTy res;
6755   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6756     // try to parse any optional operand here
6757     if (Op.IsBit) {
6758       res = parseNamedBit(Op.Name, Operands, Op.Type);
6759     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6760       res = parseOModOperand(Operands);
6761     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6762                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6763                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6764       res = parseSDWASel(Operands, Op.Name, Op.Type);
6765     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6766       res = parseSDWADstUnused(Operands);
6767     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6768                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6769                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6770                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6771       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6772                                         Op.ConvertResult);
6773     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6774       res = parseDim(Operands);
6775     } else {
6776       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6777     }
6778     if (res != MatchOperand_NoMatch) {
6779       return res;
6780     }
6781   }
6782   return MatchOperand_NoMatch;
6783 }
6784 
6785 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6786   StringRef Name = Parser.getTok().getString();
6787   if (Name == "mul") {
6788     return parseIntWithPrefix("mul", Operands,
6789                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6790   }
6791 
6792   if (Name == "div") {
6793     return parseIntWithPrefix("div", Operands,
6794                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6795   }
6796 
6797   return MatchOperand_NoMatch;
6798 }
6799 
6800 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6801   cvtVOP3P(Inst, Operands);
6802 
6803   int Opc = Inst.getOpcode();
6804 
6805   int SrcNum;
6806   const int Ops[] = { AMDGPU::OpName::src0,
6807                       AMDGPU::OpName::src1,
6808                       AMDGPU::OpName::src2 };
6809   for (SrcNum = 0;
6810        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6811        ++SrcNum);
6812   assert(SrcNum > 0);
6813 
6814   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6815   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6816 
6817   if ((OpSel & (1 << SrcNum)) != 0) {
6818     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6819     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6820     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6821   }
6822 }
6823 
6824 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6825       // 1. This operand is input modifiers
6826   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6827       // 2. This is not last operand
6828       && Desc.NumOperands > (OpNum + 1)
6829       // 3. Next operand is register class
6830       && Desc.OpInfo[OpNum + 1].RegClass != -1
6831       // 4. Next register is not tied to any other operand
6832       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6833 }
6834 
6835 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6836 {
6837   OptionalImmIndexMap OptionalIdx;
6838   unsigned Opc = Inst.getOpcode();
6839 
6840   unsigned I = 1;
6841   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6842   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6843     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6844   }
6845 
6846   for (unsigned E = Operands.size(); I != E; ++I) {
6847     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6848     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6849       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6850     } else if (Op.isInterpSlot() ||
6851                Op.isInterpAttr() ||
6852                Op.isAttrChan()) {
6853       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6854     } else if (Op.isImmModifier()) {
6855       OptionalIdx[Op.getImmTy()] = I;
6856     } else {
6857       llvm_unreachable("unhandled operand type");
6858     }
6859   }
6860 
6861   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6862     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6863   }
6864 
6865   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6866     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6867   }
6868 
6869   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6870     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6871   }
6872 }
6873 
6874 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6875                               OptionalImmIndexMap &OptionalIdx) {
6876   unsigned Opc = Inst.getOpcode();
6877 
6878   unsigned I = 1;
6879   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6880   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6881     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6882   }
6883 
6884   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6885     // This instruction has src modifiers
6886     for (unsigned E = Operands.size(); I != E; ++I) {
6887       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6888       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6889         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6890       } else if (Op.isImmModifier()) {
6891         OptionalIdx[Op.getImmTy()] = I;
6892       } else if (Op.isRegOrImm()) {
6893         Op.addRegOrImmOperands(Inst, 1);
6894       } else {
6895         llvm_unreachable("unhandled operand type");
6896       }
6897     }
6898   } else {
6899     // No src modifiers
6900     for (unsigned E = Operands.size(); I != E; ++I) {
6901       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6902       if (Op.isMod()) {
6903         OptionalIdx[Op.getImmTy()] = I;
6904       } else {
6905         Op.addRegOrImmOperands(Inst, 1);
6906       }
6907     }
6908   }
6909 
6910   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6911     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6912   }
6913 
6914   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6915     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6916   }
6917 
6918   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6919   // it has src2 register operand that is tied to dst operand
6920   // we don't allow modifiers for this operand in assembler so src2_modifiers
6921   // should be 0.
6922   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6923       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6924       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6925       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
6926       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
6927       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6928       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6929       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6930       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
6931       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6932     auto it = Inst.begin();
6933     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6934     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6935     ++it;
6936     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6937   }
6938 }
6939 
6940 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6941   OptionalImmIndexMap OptionalIdx;
6942   cvtVOP3(Inst, Operands, OptionalIdx);
6943 }
6944 
6945 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6946                                const OperandVector &Operands) {
6947   OptionalImmIndexMap OptIdx;
6948   const int Opc = Inst.getOpcode();
6949   const MCInstrDesc &Desc = MII.get(Opc);
6950 
6951   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6952 
6953   cvtVOP3(Inst, Operands, OptIdx);
6954 
6955   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6956     assert(!IsPacked);
6957     Inst.addOperand(Inst.getOperand(0));
6958   }
6959 
6960   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6961   // instruction, and then figure out where to actually put the modifiers
6962 
6963   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6964 
6965   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6966   if (OpSelHiIdx != -1) {
6967     int DefaultVal = IsPacked ? -1 : 0;
6968     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6969                           DefaultVal);
6970   }
6971 
6972   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6973   if (NegLoIdx != -1) {
6974     assert(IsPacked);
6975     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6976     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6977   }
6978 
6979   const int Ops[] = { AMDGPU::OpName::src0,
6980                       AMDGPU::OpName::src1,
6981                       AMDGPU::OpName::src2 };
6982   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6983                          AMDGPU::OpName::src1_modifiers,
6984                          AMDGPU::OpName::src2_modifiers };
6985 
6986   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6987 
6988   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6989   unsigned OpSelHi = 0;
6990   unsigned NegLo = 0;
6991   unsigned NegHi = 0;
6992 
6993   if (OpSelHiIdx != -1) {
6994     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6995   }
6996 
6997   if (NegLoIdx != -1) {
6998     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6999     NegLo = Inst.getOperand(NegLoIdx).getImm();
7000     NegHi = Inst.getOperand(NegHiIdx).getImm();
7001   }
7002 
7003   for (int J = 0; J < 3; ++J) {
7004     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7005     if (OpIdx == -1)
7006       break;
7007 
7008     uint32_t ModVal = 0;
7009 
7010     if ((OpSel & (1 << J)) != 0)
7011       ModVal |= SISrcMods::OP_SEL_0;
7012 
7013     if ((OpSelHi & (1 << J)) != 0)
7014       ModVal |= SISrcMods::OP_SEL_1;
7015 
7016     if ((NegLo & (1 << J)) != 0)
7017       ModVal |= SISrcMods::NEG;
7018 
7019     if ((NegHi & (1 << J)) != 0)
7020       ModVal |= SISrcMods::NEG_HI;
7021 
7022     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7023 
7024     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7025   }
7026 }
7027 
7028 //===----------------------------------------------------------------------===//
7029 // dpp
7030 //===----------------------------------------------------------------------===//
7031 
7032 bool AMDGPUOperand::isDPP8() const {
7033   return isImmTy(ImmTyDPP8);
7034 }
7035 
7036 bool AMDGPUOperand::isDPPCtrl() const {
7037   using namespace AMDGPU::DPP;
7038 
7039   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7040   if (result) {
7041     int64_t Imm = getImm();
7042     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7043            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7044            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7045            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7046            (Imm == DppCtrl::WAVE_SHL1) ||
7047            (Imm == DppCtrl::WAVE_ROL1) ||
7048            (Imm == DppCtrl::WAVE_SHR1) ||
7049            (Imm == DppCtrl::WAVE_ROR1) ||
7050            (Imm == DppCtrl::ROW_MIRROR) ||
7051            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7052            (Imm == DppCtrl::BCAST15) ||
7053            (Imm == DppCtrl::BCAST31) ||
7054            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7055            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7056   }
7057   return false;
7058 }
7059 
7060 //===----------------------------------------------------------------------===//
7061 // mAI
7062 //===----------------------------------------------------------------------===//
7063 
7064 bool AMDGPUOperand::isBLGP() const {
7065   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7066 }
7067 
7068 bool AMDGPUOperand::isCBSZ() const {
7069   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7070 }
7071 
7072 bool AMDGPUOperand::isABID() const {
7073   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7074 }
7075 
7076 bool AMDGPUOperand::isS16Imm() const {
7077   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7078 }
7079 
7080 bool AMDGPUOperand::isU16Imm() const {
7081   return isImm() && isUInt<16>(getImm());
7082 }
7083 
7084 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7085   if (!isGFX10())
7086     return MatchOperand_NoMatch;
7087 
7088   SMLoc S = Parser.getTok().getLoc();
7089 
7090   if (getLexer().isNot(AsmToken::Identifier))
7091     return MatchOperand_NoMatch;
7092   if (getLexer().getTok().getString() != "dim")
7093     return MatchOperand_NoMatch;
7094 
7095   Parser.Lex();
7096   if (getLexer().isNot(AsmToken::Colon))
7097     return MatchOperand_ParseFail;
7098 
7099   Parser.Lex();
7100 
7101   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
7102   // integer.
7103   std::string Token;
7104   if (getLexer().is(AsmToken::Integer)) {
7105     SMLoc Loc = getLexer().getTok().getEndLoc();
7106     Token = std::string(getLexer().getTok().getString());
7107     Parser.Lex();
7108     if (getLexer().getTok().getLoc() != Loc)
7109       return MatchOperand_ParseFail;
7110   }
7111   if (getLexer().isNot(AsmToken::Identifier))
7112     return MatchOperand_ParseFail;
7113   Token += getLexer().getTok().getString();
7114 
7115   StringRef DimId = Token;
7116   if (DimId.startswith("SQ_RSRC_IMG_"))
7117     DimId = DimId.substr(12);
7118 
7119   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7120   if (!DimInfo)
7121     return MatchOperand_ParseFail;
7122 
7123   Parser.Lex();
7124 
7125   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
7126                                               AMDGPUOperand::ImmTyDim));
7127   return MatchOperand_Success;
7128 }
7129 
7130 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7131   SMLoc S = Parser.getTok().getLoc();
7132   StringRef Prefix;
7133 
7134   if (getLexer().getKind() == AsmToken::Identifier) {
7135     Prefix = Parser.getTok().getString();
7136   } else {
7137     return MatchOperand_NoMatch;
7138   }
7139 
7140   if (Prefix != "dpp8")
7141     return parseDPPCtrl(Operands);
7142   if (!isGFX10())
7143     return MatchOperand_NoMatch;
7144 
7145   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7146 
7147   int64_t Sels[8];
7148 
7149   Parser.Lex();
7150   if (getLexer().isNot(AsmToken::Colon))
7151     return MatchOperand_ParseFail;
7152 
7153   Parser.Lex();
7154   if (getLexer().isNot(AsmToken::LBrac))
7155     return MatchOperand_ParseFail;
7156 
7157   Parser.Lex();
7158   if (getParser().parseAbsoluteExpression(Sels[0]))
7159     return MatchOperand_ParseFail;
7160   if (0 > Sels[0] || 7 < Sels[0])
7161     return MatchOperand_ParseFail;
7162 
7163   for (size_t i = 1; i < 8; ++i) {
7164     if (getLexer().isNot(AsmToken::Comma))
7165       return MatchOperand_ParseFail;
7166 
7167     Parser.Lex();
7168     if (getParser().parseAbsoluteExpression(Sels[i]))
7169       return MatchOperand_ParseFail;
7170     if (0 > Sels[i] || 7 < Sels[i])
7171       return MatchOperand_ParseFail;
7172   }
7173 
7174   if (getLexer().isNot(AsmToken::RBrac))
7175     return MatchOperand_ParseFail;
7176   Parser.Lex();
7177 
7178   unsigned DPP8 = 0;
7179   for (size_t i = 0; i < 8; ++i)
7180     DPP8 |= (Sels[i] << (i * 3));
7181 
7182   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7183   return MatchOperand_Success;
7184 }
7185 
7186 OperandMatchResultTy
7187 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7188   using namespace AMDGPU::DPP;
7189 
7190   SMLoc S = Parser.getTok().getLoc();
7191   StringRef Prefix;
7192   int64_t Int;
7193 
7194   if (getLexer().getKind() == AsmToken::Identifier) {
7195     Prefix = Parser.getTok().getString();
7196   } else {
7197     return MatchOperand_NoMatch;
7198   }
7199 
7200   if (Prefix == "row_mirror") {
7201     Int = DppCtrl::ROW_MIRROR;
7202     Parser.Lex();
7203   } else if (Prefix == "row_half_mirror") {
7204     Int = DppCtrl::ROW_HALF_MIRROR;
7205     Parser.Lex();
7206   } else {
7207     // Check to prevent parseDPPCtrlOps from eating invalid tokens
7208     if (Prefix != "quad_perm"
7209         && Prefix != "row_shl"
7210         && Prefix != "row_shr"
7211         && Prefix != "row_ror"
7212         && Prefix != "wave_shl"
7213         && Prefix != "wave_rol"
7214         && Prefix != "wave_shr"
7215         && Prefix != "wave_ror"
7216         && Prefix != "row_bcast"
7217         && Prefix != "row_share"
7218         && Prefix != "row_xmask") {
7219       return MatchOperand_NoMatch;
7220     }
7221 
7222     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
7223       return MatchOperand_NoMatch;
7224 
7225     if (!isVI() && !isGFX9() &&
7226         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
7227          Prefix == "wave_rol" || Prefix == "wave_ror" ||
7228          Prefix == "row_bcast"))
7229       return MatchOperand_NoMatch;
7230 
7231     Parser.Lex();
7232     if (getLexer().isNot(AsmToken::Colon))
7233       return MatchOperand_ParseFail;
7234 
7235     if (Prefix == "quad_perm") {
7236       // quad_perm:[%d,%d,%d,%d]
7237       Parser.Lex();
7238       if (getLexer().isNot(AsmToken::LBrac))
7239         return MatchOperand_ParseFail;
7240       Parser.Lex();
7241 
7242       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
7243         return MatchOperand_ParseFail;
7244 
7245       for (int i = 0; i < 3; ++i) {
7246         if (getLexer().isNot(AsmToken::Comma))
7247           return MatchOperand_ParseFail;
7248         Parser.Lex();
7249 
7250         int64_t Temp;
7251         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
7252           return MatchOperand_ParseFail;
7253         const int shift = i*2 + 2;
7254         Int += (Temp << shift);
7255       }
7256 
7257       if (getLexer().isNot(AsmToken::RBrac))
7258         return MatchOperand_ParseFail;
7259       Parser.Lex();
7260     } else {
7261       // sel:%d
7262       Parser.Lex();
7263       if (getParser().parseAbsoluteExpression(Int))
7264         return MatchOperand_ParseFail;
7265 
7266       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
7267         Int |= DppCtrl::ROW_SHL0;
7268       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
7269         Int |= DppCtrl::ROW_SHR0;
7270       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
7271         Int |= DppCtrl::ROW_ROR0;
7272       } else if (Prefix == "wave_shl" && 1 == Int) {
7273         Int = DppCtrl::WAVE_SHL1;
7274       } else if (Prefix == "wave_rol" && 1 == Int) {
7275         Int = DppCtrl::WAVE_ROL1;
7276       } else if (Prefix == "wave_shr" && 1 == Int) {
7277         Int = DppCtrl::WAVE_SHR1;
7278       } else if (Prefix == "wave_ror" && 1 == Int) {
7279         Int = DppCtrl::WAVE_ROR1;
7280       } else if (Prefix == "row_bcast") {
7281         if (Int == 15) {
7282           Int = DppCtrl::BCAST15;
7283         } else if (Int == 31) {
7284           Int = DppCtrl::BCAST31;
7285         } else {
7286           return MatchOperand_ParseFail;
7287         }
7288       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
7289         Int |= DppCtrl::ROW_SHARE_FIRST;
7290       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
7291         Int |= DppCtrl::ROW_XMASK_FIRST;
7292       } else {
7293         return MatchOperand_ParseFail;
7294       }
7295     }
7296   }
7297 
7298   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
7299   return MatchOperand_Success;
7300 }
7301 
7302 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7303   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7304 }
7305 
7306 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7307   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7308 }
7309 
7310 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7311   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7312 }
7313 
7314 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7315   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7316 }
7317 
7318 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7319   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7320 }
7321 
7322 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7323   OptionalImmIndexMap OptionalIdx;
7324 
7325   unsigned I = 1;
7326   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7327   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7328     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7329   }
7330 
7331   int Fi = 0;
7332   for (unsigned E = Operands.size(); I != E; ++I) {
7333     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7334                                             MCOI::TIED_TO);
7335     if (TiedTo != -1) {
7336       assert((unsigned)TiedTo < Inst.getNumOperands());
7337       // handle tied old or src2 for MAC instructions
7338       Inst.addOperand(Inst.getOperand(TiedTo));
7339     }
7340     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7341     // Add the register arguments
7342     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7343       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7344       // Skip it.
7345       continue;
7346     }
7347 
7348     if (IsDPP8) {
7349       if (Op.isDPP8()) {
7350         Op.addImmOperands(Inst, 1);
7351       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7352         Op.addRegWithFPInputModsOperands(Inst, 2);
7353       } else if (Op.isFI()) {
7354         Fi = Op.getImm();
7355       } else if (Op.isReg()) {
7356         Op.addRegOperands(Inst, 1);
7357       } else {
7358         llvm_unreachable("Invalid operand type");
7359       }
7360     } else {
7361       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7362         Op.addRegWithFPInputModsOperands(Inst, 2);
7363       } else if (Op.isDPPCtrl()) {
7364         Op.addImmOperands(Inst, 1);
7365       } else if (Op.isImm()) {
7366         // Handle optional arguments
7367         OptionalIdx[Op.getImmTy()] = I;
7368       } else {
7369         llvm_unreachable("Invalid operand type");
7370       }
7371     }
7372   }
7373 
7374   if (IsDPP8) {
7375     using namespace llvm::AMDGPU::DPP;
7376     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7377   } else {
7378     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7379     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7380     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7381     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7382       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7383     }
7384   }
7385 }
7386 
7387 //===----------------------------------------------------------------------===//
7388 // sdwa
7389 //===----------------------------------------------------------------------===//
7390 
7391 OperandMatchResultTy
7392 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7393                               AMDGPUOperand::ImmTy Type) {
7394   using namespace llvm::AMDGPU::SDWA;
7395 
7396   SMLoc S = Parser.getTok().getLoc();
7397   StringRef Value;
7398   OperandMatchResultTy res;
7399 
7400   res = parseStringWithPrefix(Prefix, Value);
7401   if (res != MatchOperand_Success) {
7402     return res;
7403   }
7404 
7405   int64_t Int;
7406   Int = StringSwitch<int64_t>(Value)
7407         .Case("BYTE_0", SdwaSel::BYTE_0)
7408         .Case("BYTE_1", SdwaSel::BYTE_1)
7409         .Case("BYTE_2", SdwaSel::BYTE_2)
7410         .Case("BYTE_3", SdwaSel::BYTE_3)
7411         .Case("WORD_0", SdwaSel::WORD_0)
7412         .Case("WORD_1", SdwaSel::WORD_1)
7413         .Case("DWORD", SdwaSel::DWORD)
7414         .Default(0xffffffff);
7415   Parser.Lex(); // eat last token
7416 
7417   if (Int == 0xffffffff) {
7418     return MatchOperand_ParseFail;
7419   }
7420 
7421   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
7422   return MatchOperand_Success;
7423 }
7424 
7425 OperandMatchResultTy
7426 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
7427   using namespace llvm::AMDGPU::SDWA;
7428 
7429   SMLoc S = Parser.getTok().getLoc();
7430   StringRef Value;
7431   OperandMatchResultTy res;
7432 
7433   res = parseStringWithPrefix("dst_unused", Value);
7434   if (res != MatchOperand_Success) {
7435     return res;
7436   }
7437 
7438   int64_t Int;
7439   Int = StringSwitch<int64_t>(Value)
7440         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
7441         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
7442         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
7443         .Default(0xffffffff);
7444   Parser.Lex(); // eat last token
7445 
7446   if (Int == 0xffffffff) {
7447     return MatchOperand_ParseFail;
7448   }
7449 
7450   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
7451   return MatchOperand_Success;
7452 }
7453 
7454 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
7455   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
7456 }
7457 
7458 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
7459   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
7460 }
7461 
7462 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
7463   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
7464 }
7465 
7466 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
7467   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
7468 }
7469 
7470 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
7471   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
7472 }
7473 
7474 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
7475                               uint64_t BasicInstType,
7476                               bool SkipDstVcc,
7477                               bool SkipSrcVcc) {
7478   using namespace llvm::AMDGPU::SDWA;
7479 
7480   OptionalImmIndexMap OptionalIdx;
7481   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
7482   bool SkippedVcc = false;
7483 
7484   unsigned I = 1;
7485   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7486   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7487     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7488   }
7489 
7490   for (unsigned E = Operands.size(); I != E; ++I) {
7491     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7492     if (SkipVcc && !SkippedVcc && Op.isReg() &&
7493         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7494       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7495       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7496       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7497       // Skip VCC only if we didn't skip it on previous iteration.
7498       // Note that src0 and src1 occupy 2 slots each because of modifiers.
7499       if (BasicInstType == SIInstrFlags::VOP2 &&
7500           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7501            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7502         SkippedVcc = true;
7503         continue;
7504       } else if (BasicInstType == SIInstrFlags::VOPC &&
7505                  Inst.getNumOperands() == 0) {
7506         SkippedVcc = true;
7507         continue;
7508       }
7509     }
7510     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7511       Op.addRegOrImmWithInputModsOperands(Inst, 2);
7512     } else if (Op.isImm()) {
7513       // Handle optional arguments
7514       OptionalIdx[Op.getImmTy()] = I;
7515     } else {
7516       llvm_unreachable("Invalid operand type");
7517     }
7518     SkippedVcc = false;
7519   }
7520 
7521   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7522       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7523       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7524     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7525     switch (BasicInstType) {
7526     case SIInstrFlags::VOP1:
7527       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7528       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7529         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7530       }
7531       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7532       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7533       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7534       break;
7535 
7536     case SIInstrFlags::VOP2:
7537       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7538       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7539         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7540       }
7541       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7542       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7543       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7544       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7545       break;
7546 
7547     case SIInstrFlags::VOPC:
7548       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7549         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7550       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7551       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7552       break;
7553 
7554     default:
7555       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7556     }
7557   }
7558 
7559   // special case v_mac_{f16, f32}:
7560   // it has src2 register operand that is tied to dst operand
7561   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7562       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7563     auto it = Inst.begin();
7564     std::advance(
7565       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7566     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7567   }
7568 }
7569 
7570 //===----------------------------------------------------------------------===//
7571 // mAI
7572 //===----------------------------------------------------------------------===//
7573 
7574 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7575   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7576 }
7577 
7578 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7579   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7580 }
7581 
7582 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7583   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7584 }
7585 
7586 /// Force static initialization.
7587 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7588   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7589   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7590 }
7591 
7592 #define GET_REGISTER_MATCHER
7593 #define GET_MATCHER_IMPLEMENTATION
7594 #define GET_MNEMONIC_SPELL_CHECKER
7595 #define GET_MNEMONIC_CHECKER
7596 #include "AMDGPUGenAsmMatcher.inc"
7597 
7598 // This fuction should be defined after auto-generated include so that we have
7599 // MatchClassKind enum defined
7600 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7601                                                      unsigned Kind) {
7602   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7603   // But MatchInstructionImpl() expects to meet token and fails to validate
7604   // operand. This method checks if we are given immediate operand but expect to
7605   // get corresponding token.
7606   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7607   switch (Kind) {
7608   case MCK_addr64:
7609     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7610   case MCK_gds:
7611     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7612   case MCK_lds:
7613     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7614   case MCK_glc:
7615     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7616   case MCK_idxen:
7617     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7618   case MCK_offen:
7619     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7620   case MCK_SSrcB32:
7621     // When operands have expression values, they will return true for isToken,
7622     // because it is not possible to distinguish between a token and an
7623     // expression at parse time. MatchInstructionImpl() will always try to
7624     // match an operand as a token, when isToken returns true, and when the
7625     // name of the expression is not a valid token, the match will fail,
7626     // so we need to handle it here.
7627     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7628   case MCK_SSrcF32:
7629     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7630   case MCK_SoppBrTarget:
7631     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7632   case MCK_VReg32OrOff:
7633     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7634   case MCK_InterpSlot:
7635     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7636   case MCK_Attr:
7637     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7638   case MCK_AttrChan:
7639     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7640   case MCK_ImmSMEMOffset:
7641     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
7642   case MCK_SReg_64:
7643   case MCK_SReg_64_XEXEC:
7644     // Null is defined as a 32-bit register but
7645     // it should also be enabled with 64-bit operands.
7646     // The following code enables it for SReg_64 operands
7647     // used as source and destination. Remaining source
7648     // operands are handled in isInlinableImm.
7649     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7650   default:
7651     return Match_InvalidOperand;
7652   }
7653 }
7654 
7655 //===----------------------------------------------------------------------===//
7656 // endpgm
7657 //===----------------------------------------------------------------------===//
7658 
7659 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7660   SMLoc S = Parser.getTok().getLoc();
7661   int64_t Imm = 0;
7662 
7663   if (!parseExpr(Imm)) {
7664     // The operand is optional, if not present default to 0
7665     Imm = 0;
7666   }
7667 
7668   if (!isUInt<16>(Imm)) {
7669     Error(S, "expected a 16-bit value");
7670     return MatchOperand_ParseFail;
7671   }
7672 
7673   Operands.push_back(
7674       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7675   return MatchOperand_Success;
7676 }
7677 
7678 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7679