1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyA16,
167     ImmTyLWE,
168     ImmTyExpTgt,
169     ImmTyExpCompr,
170     ImmTyExpVM,
171     ImmTyFORMAT,
172     ImmTyHwreg,
173     ImmTyOff,
174     ImmTySendMsg,
175     ImmTyInterpSlot,
176     ImmTyInterpAttr,
177     ImmTyAttrChan,
178     ImmTyOpSel,
179     ImmTyOpSelHi,
180     ImmTyNegLo,
181     ImmTyNegHi,
182     ImmTySwizzle,
183     ImmTyGprIdxMode,
184     ImmTyHigh,
185     ImmTyBLGP,
186     ImmTyCBSZ,
187     ImmTyABID,
188     ImmTyEndpgm,
189   };
190 
191 private:
192   struct TokOp {
193     const char *Data;
194     unsigned Length;
195   };
196 
197   struct ImmOp {
198     int64_t Val;
199     ImmTy Type;
200     bool IsFPImm;
201     Modifiers Mods;
202   };
203 
204   struct RegOp {
205     unsigned RegNo;
206     Modifiers Mods;
207   };
208 
209   union {
210     TokOp Tok;
211     ImmOp Imm;
212     RegOp Reg;
213     const MCExpr *Expr;
214   };
215 
216 public:
217   bool isToken() const override {
218     if (Kind == Token)
219       return true;
220 
221     // When parsing operands, we can't always tell if something was meant to be
222     // a token, like 'gds', or an expression that references a global variable.
223     // In this case, we assume the string is an expression, and if we need to
224     // interpret is a token, then we treat the symbol name as the token.
225     return isSymbolRefExpr();
226   }
227 
228   bool isSymbolRefExpr() const {
229     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230   }
231 
232   bool isImm() const override {
233     return Kind == Immediate;
234   }
235 
236   bool isInlinableImm(MVT type) const;
237   bool isLiteralImm(MVT type) const;
238 
239   bool isRegKind() const {
240     return Kind == Register;
241   }
242 
243   bool isReg() const override {
244     return isRegKind() && !hasModifiers();
245   }
246 
247   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
248     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
249   }
250 
251   bool isRegOrImmWithInt16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
253   }
254 
255   bool isRegOrImmWithInt32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
257   }
258 
259   bool isRegOrImmWithInt64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
261   }
262 
263   bool isRegOrImmWithFP16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
265   }
266 
267   bool isRegOrImmWithFP32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
269   }
270 
271   bool isRegOrImmWithFP64InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
273   }
274 
275   bool isVReg() const {
276     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
277            isRegClass(AMDGPU::VReg_64RegClassID) ||
278            isRegClass(AMDGPU::VReg_96RegClassID) ||
279            isRegClass(AMDGPU::VReg_128RegClassID) ||
280            isRegClass(AMDGPU::VReg_160RegClassID) ||
281            isRegClass(AMDGPU::VReg_192RegClassID) ||
282            isRegClass(AMDGPU::VReg_256RegClassID) ||
283            isRegClass(AMDGPU::VReg_512RegClassID) ||
284            isRegClass(AMDGPU::VReg_1024RegClassID);
285   }
286 
287   bool isVReg32() const {
288     return isRegClass(AMDGPU::VGPR_32RegClassID);
289   }
290 
291   bool isVReg32OrOff() const {
292     return isOff() || isVReg32();
293   }
294 
295   bool isNull() const {
296     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
297   }
298 
299   bool isSDWAOperand(MVT type) const;
300   bool isSDWAFP16Operand() const;
301   bool isSDWAFP32Operand() const;
302   bool isSDWAInt16Operand() const;
303   bool isSDWAInt32Operand() const;
304 
305   bool isImmTy(ImmTy ImmT) const {
306     return isImm() && Imm.Type == ImmT;
307   }
308 
309   bool isImmModifier() const {
310     return isImm() && Imm.Type != ImmTyNone;
311   }
312 
313   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
314   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
315   bool isDMask() const { return isImmTy(ImmTyDMask); }
316   bool isDim() const { return isImmTy(ImmTyDim); }
317   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
318   bool isDA() const { return isImmTy(ImmTyDA); }
319   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
320   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
321   bool isLWE() const { return isImmTy(ImmTyLWE); }
322   bool isOff() const { return isImmTy(ImmTyOff); }
323   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
324   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
325   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
326   bool isOffen() const { return isImmTy(ImmTyOffen); }
327   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
328   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
329   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
330   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
331   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
332 
333   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
334   bool isGDS() const { return isImmTy(ImmTyGDS); }
335   bool isLDS() const { return isImmTy(ImmTyLDS); }
336   bool isDLC() const { return isImmTy(ImmTyDLC); }
337   bool isGLC() const { return isImmTy(ImmTyGLC); }
338   bool isSLC() const { return isImmTy(ImmTySLC); }
339   bool isSWZ() const { return isImmTy(ImmTySWZ); }
340   bool isTFE() const { return isImmTy(ImmTyTFE); }
341   bool isD16() const { return isImmTy(ImmTyD16); }
342   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
343   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
344   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
345   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
346   bool isFI() const { return isImmTy(ImmTyDppFi); }
347   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
348   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
349   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
350   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
351   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
352   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
353   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
354   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
355   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
356   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
357   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
358   bool isHigh() const { return isImmTy(ImmTyHigh); }
359 
360   bool isMod() const {
361     return isClampSI() || isOModSI();
362   }
363 
364   bool isRegOrImm() const {
365     return isReg() || isImm();
366   }
367 
368   bool isRegClass(unsigned RCID) const;
369 
370   bool isInlineValue() const;
371 
372   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
373     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
374   }
375 
376   bool isSCSrcB16() const {
377     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
378   }
379 
380   bool isSCSrcV2B16() const {
381     return isSCSrcB16();
382   }
383 
384   bool isSCSrcB32() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
386   }
387 
388   bool isSCSrcB64() const {
389     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
390   }
391 
392   bool isBoolReg() const;
393 
394   bool isSCSrcF16() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
396   }
397 
398   bool isSCSrcV2F16() const {
399     return isSCSrcF16();
400   }
401 
402   bool isSCSrcF32() const {
403     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
404   }
405 
406   bool isSCSrcF64() const {
407     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
408   }
409 
410   bool isSSrcB32() const {
411     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
412   }
413 
414   bool isSSrcB16() const {
415     return isSCSrcB16() || isLiteralImm(MVT::i16);
416   }
417 
418   bool isSSrcV2B16() const {
419     llvm_unreachable("cannot happen");
420     return isSSrcB16();
421   }
422 
423   bool isSSrcB64() const {
424     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
425     // See isVSrc64().
426     return isSCSrcB64() || isLiteralImm(MVT::i64);
427   }
428 
429   bool isSSrcF32() const {
430     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
431   }
432 
433   bool isSSrcF64() const {
434     return isSCSrcB64() || isLiteralImm(MVT::f64);
435   }
436 
437   bool isSSrcF16() const {
438     return isSCSrcB16() || isLiteralImm(MVT::f16);
439   }
440 
441   bool isSSrcV2F16() const {
442     llvm_unreachable("cannot happen");
443     return isSSrcF16();
444   }
445 
446   bool isSSrcOrLdsB32() const {
447     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
448            isLiteralImm(MVT::i32) || isExpr();
449   }
450 
451   bool isVCSrcB32() const {
452     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
453   }
454 
455   bool isVCSrcB64() const {
456     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
457   }
458 
459   bool isVCSrcB16() const {
460     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
461   }
462 
463   bool isVCSrcV2B16() const {
464     return isVCSrcB16();
465   }
466 
467   bool isVCSrcF32() const {
468     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
469   }
470 
471   bool isVCSrcF64() const {
472     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
473   }
474 
475   bool isVCSrcF16() const {
476     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
477   }
478 
479   bool isVCSrcV2F16() const {
480     return isVCSrcF16();
481   }
482 
483   bool isVSrcB32() const {
484     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
485   }
486 
487   bool isVSrcB64() const {
488     return isVCSrcF64() || isLiteralImm(MVT::i64);
489   }
490 
491   bool isVSrcB16() const {
492     return isVCSrcB16() || isLiteralImm(MVT::i16);
493   }
494 
495   bool isVSrcV2B16() const {
496     return isVSrcB16() || isLiteralImm(MVT::v2i16);
497   }
498 
499   bool isVSrcF32() const {
500     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
501   }
502 
503   bool isVSrcF64() const {
504     return isVCSrcF64() || isLiteralImm(MVT::f64);
505   }
506 
507   bool isVSrcF16() const {
508     return isVCSrcF16() || isLiteralImm(MVT::f16);
509   }
510 
511   bool isVSrcV2F16() const {
512     return isVSrcF16() || isLiteralImm(MVT::v2f16);
513   }
514 
515   bool isVISrcB32() const {
516     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
517   }
518 
519   bool isVISrcB16() const {
520     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
521   }
522 
523   bool isVISrcV2B16() const {
524     return isVISrcB16();
525   }
526 
527   bool isVISrcF32() const {
528     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
529   }
530 
531   bool isVISrcF16() const {
532     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
533   }
534 
535   bool isVISrcV2F16() const {
536     return isVISrcF16() || isVISrcB32();
537   }
538 
539   bool isAISrcB32() const {
540     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
541   }
542 
543   bool isAISrcB16() const {
544     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
545   }
546 
547   bool isAISrcV2B16() const {
548     return isAISrcB16();
549   }
550 
551   bool isAISrcF32() const {
552     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
553   }
554 
555   bool isAISrcF16() const {
556     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
557   }
558 
559   bool isAISrcV2F16() const {
560     return isAISrcF16() || isAISrcB32();
561   }
562 
563   bool isAISrc_128B32() const {
564     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
565   }
566 
567   bool isAISrc_128B16() const {
568     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
569   }
570 
571   bool isAISrc_128V2B16() const {
572     return isAISrc_128B16();
573   }
574 
575   bool isAISrc_128F32() const {
576     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
577   }
578 
579   bool isAISrc_128F16() const {
580     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
581   }
582 
583   bool isAISrc_128V2F16() const {
584     return isAISrc_128F16() || isAISrc_128B32();
585   }
586 
587   bool isAISrc_512B32() const {
588     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
589   }
590 
591   bool isAISrc_512B16() const {
592     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
593   }
594 
595   bool isAISrc_512V2B16() const {
596     return isAISrc_512B16();
597   }
598 
599   bool isAISrc_512F32() const {
600     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
601   }
602 
603   bool isAISrc_512F16() const {
604     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
605   }
606 
607   bool isAISrc_512V2F16() const {
608     return isAISrc_512F16() || isAISrc_512B32();
609   }
610 
611   bool isAISrc_1024B32() const {
612     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
613   }
614 
615   bool isAISrc_1024B16() const {
616     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
617   }
618 
619   bool isAISrc_1024V2B16() const {
620     return isAISrc_1024B16();
621   }
622 
623   bool isAISrc_1024F32() const {
624     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
625   }
626 
627   bool isAISrc_1024F16() const {
628     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
629   }
630 
631   bool isAISrc_1024V2F16() const {
632     return isAISrc_1024F16() || isAISrc_1024B32();
633   }
634 
635   bool isKImmFP32() const {
636     return isLiteralImm(MVT::f32);
637   }
638 
639   bool isKImmFP16() const {
640     return isLiteralImm(MVT::f16);
641   }
642 
643   bool isMem() const override {
644     return false;
645   }
646 
647   bool isExpr() const {
648     return Kind == Expression;
649   }
650 
651   bool isSoppBrTarget() const {
652     return isExpr() || isImm();
653   }
654 
655   bool isSWaitCnt() const;
656   bool isHwreg() const;
657   bool isSendMsg() const;
658   bool isSwizzle() const;
659   bool isSMRDOffset8() const;
660   bool isSMEMOffset() const;
661   bool isSMRDLiteralOffset() const;
662   bool isDPP8() const;
663   bool isDPPCtrl() const;
664   bool isBLGP() const;
665   bool isCBSZ() const;
666   bool isABID() const;
667   bool isGPRIdxMode() const;
668   bool isS16Imm() const;
669   bool isU16Imm() const;
670   bool isEndpgm() const;
671 
672   StringRef getExpressionAsToken() const {
673     assert(isExpr());
674     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
675     return S->getSymbol().getName();
676   }
677 
678   StringRef getToken() const {
679     assert(isToken());
680 
681     if (Kind == Expression)
682       return getExpressionAsToken();
683 
684     return StringRef(Tok.Data, Tok.Length);
685   }
686 
687   int64_t getImm() const {
688     assert(isImm());
689     return Imm.Val;
690   }
691 
692   void setImm(int64_t Val) {
693     assert(isImm());
694     Imm.Val = Val;
695   }
696 
697   ImmTy getImmTy() const {
698     assert(isImm());
699     return Imm.Type;
700   }
701 
702   unsigned getReg() const override {
703     assert(isRegKind());
704     return Reg.RegNo;
705   }
706 
707   SMLoc getStartLoc() const override {
708     return StartLoc;
709   }
710 
711   SMLoc getEndLoc() const override {
712     return EndLoc;
713   }
714 
715   SMRange getLocRange() const {
716     return SMRange(StartLoc, EndLoc);
717   }
718 
719   Modifiers getModifiers() const {
720     assert(isRegKind() || isImmTy(ImmTyNone));
721     return isRegKind() ? Reg.Mods : Imm.Mods;
722   }
723 
724   void setModifiers(Modifiers Mods) {
725     assert(isRegKind() || isImmTy(ImmTyNone));
726     if (isRegKind())
727       Reg.Mods = Mods;
728     else
729       Imm.Mods = Mods;
730   }
731 
732   bool hasModifiers() const {
733     return getModifiers().hasModifiers();
734   }
735 
736   bool hasFPModifiers() const {
737     return getModifiers().hasFPModifiers();
738   }
739 
740   bool hasIntModifiers() const {
741     return getModifiers().hasIntModifiers();
742   }
743 
744   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
745 
746   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
747 
748   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
749 
750   template <unsigned Bitwidth>
751   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
752 
753   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
754     addKImmFPOperands<16>(Inst, N);
755   }
756 
757   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
758     addKImmFPOperands<32>(Inst, N);
759   }
760 
761   void addRegOperands(MCInst &Inst, unsigned N) const;
762 
763   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
764     addRegOperands(Inst, N);
765   }
766 
767   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
768     if (isRegKind())
769       addRegOperands(Inst, N);
770     else if (isExpr())
771       Inst.addOperand(MCOperand::createExpr(Expr));
772     else
773       addImmOperands(Inst, N);
774   }
775 
776   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
777     Modifiers Mods = getModifiers();
778     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
779     if (isRegKind()) {
780       addRegOperands(Inst, N);
781     } else {
782       addImmOperands(Inst, N, false);
783     }
784   }
785 
786   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
787     assert(!hasIntModifiers());
788     addRegOrImmWithInputModsOperands(Inst, N);
789   }
790 
791   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
792     assert(!hasFPModifiers());
793     addRegOrImmWithInputModsOperands(Inst, N);
794   }
795 
796   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
797     Modifiers Mods = getModifiers();
798     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
799     assert(isRegKind());
800     addRegOperands(Inst, N);
801   }
802 
803   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
804     assert(!hasIntModifiers());
805     addRegWithInputModsOperands(Inst, N);
806   }
807 
808   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
809     assert(!hasFPModifiers());
810     addRegWithInputModsOperands(Inst, N);
811   }
812 
813   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
814     if (isImm())
815       addImmOperands(Inst, N);
816     else {
817       assert(isExpr());
818       Inst.addOperand(MCOperand::createExpr(Expr));
819     }
820   }
821 
822   static void printImmTy(raw_ostream& OS, ImmTy Type) {
823     switch (Type) {
824     case ImmTyNone: OS << "None"; break;
825     case ImmTyGDS: OS << "GDS"; break;
826     case ImmTyLDS: OS << "LDS"; break;
827     case ImmTyOffen: OS << "Offen"; break;
828     case ImmTyIdxen: OS << "Idxen"; break;
829     case ImmTyAddr64: OS << "Addr64"; break;
830     case ImmTyOffset: OS << "Offset"; break;
831     case ImmTyInstOffset: OS << "InstOffset"; break;
832     case ImmTyOffset0: OS << "Offset0"; break;
833     case ImmTyOffset1: OS << "Offset1"; break;
834     case ImmTyDLC: OS << "DLC"; break;
835     case ImmTyGLC: OS << "GLC"; break;
836     case ImmTySLC: OS << "SLC"; break;
837     case ImmTySWZ: OS << "SWZ"; break;
838     case ImmTyTFE: OS << "TFE"; break;
839     case ImmTyD16: OS << "D16"; break;
840     case ImmTyFORMAT: OS << "FORMAT"; break;
841     case ImmTyClampSI: OS << "ClampSI"; break;
842     case ImmTyOModSI: OS << "OModSI"; break;
843     case ImmTyDPP8: OS << "DPP8"; break;
844     case ImmTyDppCtrl: OS << "DppCtrl"; break;
845     case ImmTyDppRowMask: OS << "DppRowMask"; break;
846     case ImmTyDppBankMask: OS << "DppBankMask"; break;
847     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
848     case ImmTyDppFi: OS << "FI"; break;
849     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
850     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
851     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
852     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
853     case ImmTyDMask: OS << "DMask"; break;
854     case ImmTyDim: OS << "Dim"; break;
855     case ImmTyUNorm: OS << "UNorm"; break;
856     case ImmTyDA: OS << "DA"; break;
857     case ImmTyR128A16: OS << "R128A16"; break;
858     case ImmTyA16: OS << "A16"; break;
859     case ImmTyLWE: OS << "LWE"; break;
860     case ImmTyOff: OS << "Off"; break;
861     case ImmTyExpTgt: OS << "ExpTgt"; break;
862     case ImmTyExpCompr: OS << "ExpCompr"; break;
863     case ImmTyExpVM: OS << "ExpVM"; break;
864     case ImmTyHwreg: OS << "Hwreg"; break;
865     case ImmTySendMsg: OS << "SendMsg"; break;
866     case ImmTyInterpSlot: OS << "InterpSlot"; break;
867     case ImmTyInterpAttr: OS << "InterpAttr"; break;
868     case ImmTyAttrChan: OS << "AttrChan"; break;
869     case ImmTyOpSel: OS << "OpSel"; break;
870     case ImmTyOpSelHi: OS << "OpSelHi"; break;
871     case ImmTyNegLo: OS << "NegLo"; break;
872     case ImmTyNegHi: OS << "NegHi"; break;
873     case ImmTySwizzle: OS << "Swizzle"; break;
874     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
875     case ImmTyHigh: OS << "High"; break;
876     case ImmTyBLGP: OS << "BLGP"; break;
877     case ImmTyCBSZ: OS << "CBSZ"; break;
878     case ImmTyABID: OS << "ABID"; break;
879     case ImmTyEndpgm: OS << "Endpgm"; break;
880     }
881   }
882 
883   void print(raw_ostream &OS) const override {
884     switch (Kind) {
885     case Register:
886       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
887       break;
888     case Immediate:
889       OS << '<' << getImm();
890       if (getImmTy() != ImmTyNone) {
891         OS << " type: "; printImmTy(OS, getImmTy());
892       }
893       OS << " mods: " << Imm.Mods << '>';
894       break;
895     case Token:
896       OS << '\'' << getToken() << '\'';
897       break;
898     case Expression:
899       OS << "<expr " << *Expr << '>';
900       break;
901     }
902   }
903 
904   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
905                                       int64_t Val, SMLoc Loc,
906                                       ImmTy Type = ImmTyNone,
907                                       bool IsFPImm = false) {
908     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
909     Op->Imm.Val = Val;
910     Op->Imm.IsFPImm = IsFPImm;
911     Op->Imm.Type = Type;
912     Op->Imm.Mods = Modifiers();
913     Op->StartLoc = Loc;
914     Op->EndLoc = Loc;
915     return Op;
916   }
917 
918   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
919                                         StringRef Str, SMLoc Loc,
920                                         bool HasExplicitEncodingSize = true) {
921     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
922     Res->Tok.Data = Str.data();
923     Res->Tok.Length = Str.size();
924     Res->StartLoc = Loc;
925     Res->EndLoc = Loc;
926     return Res;
927   }
928 
929   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
930                                       unsigned RegNo, SMLoc S,
931                                       SMLoc E) {
932     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
933     Op->Reg.RegNo = RegNo;
934     Op->Reg.Mods = Modifiers();
935     Op->StartLoc = S;
936     Op->EndLoc = E;
937     return Op;
938   }
939 
940   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
941                                        const class MCExpr *Expr, SMLoc S) {
942     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
943     Op->Expr = Expr;
944     Op->StartLoc = S;
945     Op->EndLoc = S;
946     return Op;
947   }
948 };
949 
950 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
951   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
952   return OS;
953 }
954 
955 //===----------------------------------------------------------------------===//
956 // AsmParser
957 //===----------------------------------------------------------------------===//
958 
959 // Holds info related to the current kernel, e.g. count of SGPRs used.
960 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
961 // .amdgpu_hsa_kernel or at EOF.
962 class KernelScopeInfo {
963   int SgprIndexUnusedMin = -1;
964   int VgprIndexUnusedMin = -1;
965   MCContext *Ctx = nullptr;
966 
967   void usesSgprAt(int i) {
968     if (i >= SgprIndexUnusedMin) {
969       SgprIndexUnusedMin = ++i;
970       if (Ctx) {
971         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
972         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
973       }
974     }
975   }
976 
977   void usesVgprAt(int i) {
978     if (i >= VgprIndexUnusedMin) {
979       VgprIndexUnusedMin = ++i;
980       if (Ctx) {
981         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
982         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
983       }
984     }
985   }
986 
987 public:
988   KernelScopeInfo() = default;
989 
990   void initialize(MCContext &Context) {
991     Ctx = &Context;
992     usesSgprAt(SgprIndexUnusedMin = -1);
993     usesVgprAt(VgprIndexUnusedMin = -1);
994   }
995 
996   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
997     switch (RegKind) {
998       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
999       case IS_AGPR: // fall through
1000       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1001       default: break;
1002     }
1003   }
1004 };
1005 
1006 class AMDGPUAsmParser : public MCTargetAsmParser {
1007   MCAsmParser &Parser;
1008 
1009   // Number of extra operands parsed after the first optional operand.
1010   // This may be necessary to skip hardcoded mandatory operands.
1011   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1012 
1013   unsigned ForcedEncodingSize = 0;
1014   bool ForcedDPP = false;
1015   bool ForcedSDWA = false;
1016   KernelScopeInfo KernelScope;
1017 
1018   /// @name Auto-generated Match Functions
1019   /// {
1020 
1021 #define GET_ASSEMBLER_HEADER
1022 #include "AMDGPUGenAsmMatcher.inc"
1023 
1024   /// }
1025 
1026 private:
1027   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1028   bool OutOfRangeError(SMRange Range);
1029   /// Calculate VGPR/SGPR blocks required for given target, reserved
1030   /// registers, and user-specified NextFreeXGPR values.
1031   ///
1032   /// \param Features [in] Target features, used for bug corrections.
1033   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1034   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1035   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1036   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1037   /// descriptor field, if valid.
1038   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1039   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1040   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1041   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1042   /// \param VGPRBlocks [out] Result VGPR block count.
1043   /// \param SGPRBlocks [out] Result SGPR block count.
1044   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1045                           bool FlatScrUsed, bool XNACKUsed,
1046                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1047                           SMRange VGPRRange, unsigned NextFreeSGPR,
1048                           SMRange SGPRRange, unsigned &VGPRBlocks,
1049                           unsigned &SGPRBlocks);
1050   bool ParseDirectiveAMDGCNTarget();
1051   bool ParseDirectiveAMDHSAKernel();
1052   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1053   bool ParseDirectiveHSACodeObjectVersion();
1054   bool ParseDirectiveHSACodeObjectISA();
1055   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1056   bool ParseDirectiveAMDKernelCodeT();
1057   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1058   bool ParseDirectiveAMDGPUHsaKernel();
1059 
1060   bool ParseDirectiveISAVersion();
1061   bool ParseDirectiveHSAMetadata();
1062   bool ParseDirectivePALMetadataBegin();
1063   bool ParseDirectivePALMetadata();
1064   bool ParseDirectiveAMDGPULDS();
1065 
1066   /// Common code to parse out a block of text (typically YAML) between start and
1067   /// end directives.
1068   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1069                            const char *AssemblerDirectiveEnd,
1070                            std::string &CollectString);
1071 
1072   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1073                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1074   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1075                            unsigned &RegNum, unsigned &RegWidth,
1076                            bool RestoreOnFailure = false);
1077   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1078                            unsigned &RegNum, unsigned &RegWidth,
1079                            SmallVectorImpl<AsmToken> &Tokens);
1080   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1081                            unsigned &RegWidth,
1082                            SmallVectorImpl<AsmToken> &Tokens);
1083   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1084                            unsigned &RegWidth,
1085                            SmallVectorImpl<AsmToken> &Tokens);
1086   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1087                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1088   bool ParseRegRange(unsigned& Num, unsigned& Width);
1089   unsigned getRegularReg(RegisterKind RegKind,
1090                          unsigned RegNum,
1091                          unsigned RegWidth,
1092                          SMLoc Loc);
1093 
1094   bool isRegister();
1095   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1096   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1097   void initializeGprCountSymbol(RegisterKind RegKind);
1098   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1099                              unsigned RegWidth);
1100   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1101                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1102   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1103                  bool IsGdsHardcoded);
1104 
1105 public:
1106   enum AMDGPUMatchResultTy {
1107     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1108   };
1109   enum OperandMode {
1110     OperandMode_Default,
1111     OperandMode_NSA,
1112   };
1113 
1114   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1115 
1116   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1117                const MCInstrInfo &MII,
1118                const MCTargetOptions &Options)
1119       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1120     MCAsmParserExtension::Initialize(Parser);
1121 
1122     if (getFeatureBits().none()) {
1123       // Set default features.
1124       copySTI().ToggleFeature("southern-islands");
1125     }
1126 
1127     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1128 
1129     {
1130       // TODO: make those pre-defined variables read-only.
1131       // Currently there is none suitable machinery in the core llvm-mc for this.
1132       // MCSymbol::isRedefinable is intended for another purpose, and
1133       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1134       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1135       MCContext &Ctx = getContext();
1136       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1137         MCSymbol *Sym =
1138             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1139         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1140         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1141         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1142         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1143         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1144       } else {
1145         MCSymbol *Sym =
1146             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1147         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1148         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1149         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1150         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1151         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1152       }
1153       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1154         initializeGprCountSymbol(IS_VGPR);
1155         initializeGprCountSymbol(IS_SGPR);
1156       } else
1157         KernelScope.initialize(getContext());
1158     }
1159   }
1160 
1161   bool hasXNACK() const {
1162     return AMDGPU::hasXNACK(getSTI());
1163   }
1164 
1165   bool hasMIMG_R128() const {
1166     return AMDGPU::hasMIMG_R128(getSTI());
1167   }
1168 
1169   bool hasPackedD16() const {
1170     return AMDGPU::hasPackedD16(getSTI());
1171   }
1172 
1173   bool hasGFX10A16() const {
1174     return AMDGPU::hasGFX10A16(getSTI());
1175   }
1176 
1177   bool isSI() const {
1178     return AMDGPU::isSI(getSTI());
1179   }
1180 
1181   bool isCI() const {
1182     return AMDGPU::isCI(getSTI());
1183   }
1184 
1185   bool isVI() const {
1186     return AMDGPU::isVI(getSTI());
1187   }
1188 
1189   bool isGFX9() const {
1190     return AMDGPU::isGFX9(getSTI());
1191   }
1192 
1193   bool isGFX9Plus() const {
1194     return AMDGPU::isGFX9Plus(getSTI());
1195   }
1196 
1197   bool isGFX10() const {
1198     return AMDGPU::isGFX10(getSTI());
1199   }
1200 
1201   bool isGFX10_BEncoding() const {
1202     return AMDGPU::isGFX10_BEncoding(getSTI());
1203   }
1204 
1205   bool hasInv2PiInlineImm() const {
1206     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1207   }
1208 
1209   bool hasFlatOffsets() const {
1210     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1211   }
1212 
1213   bool hasSGPR102_SGPR103() const {
1214     return !isVI() && !isGFX9();
1215   }
1216 
1217   bool hasSGPR104_SGPR105() const {
1218     return isGFX10();
1219   }
1220 
1221   bool hasIntClamp() const {
1222     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1223   }
1224 
1225   AMDGPUTargetStreamer &getTargetStreamer() {
1226     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1227     return static_cast<AMDGPUTargetStreamer &>(TS);
1228   }
1229 
1230   const MCRegisterInfo *getMRI() const {
1231     // We need this const_cast because for some reason getContext() is not const
1232     // in MCAsmParser.
1233     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1234   }
1235 
1236   const MCInstrInfo *getMII() const {
1237     return &MII;
1238   }
1239 
1240   const FeatureBitset &getFeatureBits() const {
1241     return getSTI().getFeatureBits();
1242   }
1243 
1244   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1245   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1246   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1247 
1248   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1249   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1250   bool isForcedDPP() const { return ForcedDPP; }
1251   bool isForcedSDWA() const { return ForcedSDWA; }
1252   ArrayRef<unsigned> getMatchedVariants() const;
1253   StringRef getMatchedVariantName() const;
1254 
1255   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1256   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1257                      bool RestoreOnFailure);
1258   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1259   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1260                                         SMLoc &EndLoc) override;
1261   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1262   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1263                                       unsigned Kind) override;
1264   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1265                                OperandVector &Operands, MCStreamer &Out,
1266                                uint64_t &ErrorInfo,
1267                                bool MatchingInlineAsm) override;
1268   bool ParseDirective(AsmToken DirectiveID) override;
1269   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1270                                     OperandMode Mode = OperandMode_Default);
1271   StringRef parseMnemonicSuffix(StringRef Name);
1272   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1273                         SMLoc NameLoc, OperandVector &Operands) override;
1274   //bool ProcessInstruction(MCInst &Inst);
1275 
1276   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1277 
1278   OperandMatchResultTy
1279   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1280                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1281                      bool (*ConvertResult)(int64_t &) = nullptr);
1282 
1283   OperandMatchResultTy
1284   parseOperandArrayWithPrefix(const char *Prefix,
1285                               OperandVector &Operands,
1286                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1287                               bool (*ConvertResult)(int64_t&) = nullptr);
1288 
1289   OperandMatchResultTy
1290   parseNamedBit(const char *Name, OperandVector &Operands,
1291                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1292   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1293                                              StringRef &Value);
1294 
1295   bool isModifier();
1296   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1297   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1298   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1299   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1300   bool parseSP3NegModifier();
1301   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1302   OperandMatchResultTy parseReg(OperandVector &Operands);
1303   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1304   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1305   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1306   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1307   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1308   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1309   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1310   OperandMatchResultTy parseUfmt(int64_t &Format);
1311   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1312   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1313   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1314   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1315   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1316   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1317   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1318 
1319   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1320   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1321   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1322   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1323 
1324   bool parseCnt(int64_t &IntVal);
1325   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1326   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1327 
1328 private:
1329   struct OperandInfoTy {
1330     int64_t Id;
1331     bool IsSymbolic = false;
1332     bool IsDefined = false;
1333 
1334     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1335   };
1336 
1337   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1338   bool validateSendMsg(const OperandInfoTy &Msg,
1339                        const OperandInfoTy &Op,
1340                        const OperandInfoTy &Stream,
1341                        const SMLoc Loc);
1342 
1343   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1344   bool validateHwreg(const OperandInfoTy &HwReg,
1345                      const int64_t Offset,
1346                      const int64_t Width,
1347                      const SMLoc Loc);
1348 
1349   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1350   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1351   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1352 
1353   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1354   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1355   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1356   bool validateSOPLiteral(const MCInst &Inst) const;
1357   bool validateConstantBusLimitations(const MCInst &Inst);
1358   bool validateEarlyClobberLimitations(const MCInst &Inst);
1359   bool validateIntClampSupported(const MCInst &Inst);
1360   bool validateMIMGAtomicDMask(const MCInst &Inst);
1361   bool validateMIMGGatherDMask(const MCInst &Inst);
1362   bool validateMovrels(const MCInst &Inst);
1363   bool validateMIMGDataSize(const MCInst &Inst);
1364   bool validateMIMGAddrSize(const MCInst &Inst);
1365   bool validateMIMGD16(const MCInst &Inst);
1366   bool validateMIMGDim(const MCInst &Inst);
1367   bool validateLdsDirect(const MCInst &Inst);
1368   bool validateOpSel(const MCInst &Inst);
1369   bool validateVccOperand(unsigned Reg) const;
1370   bool validateVOP3Literal(const MCInst &Inst) const;
1371   bool validateMAIAccWrite(const MCInst &Inst);
1372   unsigned getConstantBusLimit(unsigned Opcode) const;
1373   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1374   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1375   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1376 
1377   bool isSupportedMnemo(StringRef Mnemo,
1378                         const FeatureBitset &FBS);
1379   bool isSupportedMnemo(StringRef Mnemo,
1380                         const FeatureBitset &FBS,
1381                         ArrayRef<unsigned> Variants);
1382   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1383 
1384   bool isId(const StringRef Id) const;
1385   bool isId(const AsmToken &Token, const StringRef Id) const;
1386   bool isToken(const AsmToken::TokenKind Kind) const;
1387   bool trySkipId(const StringRef Id);
1388   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1389   bool trySkipToken(const AsmToken::TokenKind Kind);
1390   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1391   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1392   bool parseId(StringRef &Val, const StringRef ErrMsg);
1393 
1394   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1395   AsmToken::TokenKind getTokenKind() const;
1396   bool parseExpr(int64_t &Imm);
1397   bool parseExpr(OperandVector &Operands);
1398   StringRef getTokenStr() const;
1399   AsmToken peekToken();
1400   AsmToken getToken() const;
1401   SMLoc getLoc() const;
1402   void lex();
1403 
1404 public:
1405   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1406   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1407 
1408   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1409   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1410   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1411   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1412   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1413   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1414 
1415   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1416                             const unsigned MinVal,
1417                             const unsigned MaxVal,
1418                             const StringRef ErrMsg);
1419   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1420   bool parseSwizzleOffset(int64_t &Imm);
1421   bool parseSwizzleMacro(int64_t &Imm);
1422   bool parseSwizzleQuadPerm(int64_t &Imm);
1423   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1424   bool parseSwizzleBroadcast(int64_t &Imm);
1425   bool parseSwizzleSwap(int64_t &Imm);
1426   bool parseSwizzleReverse(int64_t &Imm);
1427 
1428   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1429   int64_t parseGPRIdxMacro();
1430 
1431   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1432   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1433   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1434   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1435   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1436 
1437   AMDGPUOperand::Ptr defaultDLC() const;
1438   AMDGPUOperand::Ptr defaultGLC() const;
1439   AMDGPUOperand::Ptr defaultSLC() const;
1440 
1441   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1442   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1443   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1444   AMDGPUOperand::Ptr defaultFlatOffset() const;
1445 
1446   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1447 
1448   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1449                OptionalImmIndexMap &OptionalIdx);
1450   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1451   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1452   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1453 
1454   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1455 
1456   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1457                bool IsAtomic = false);
1458   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1459   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1460 
1461   OperandMatchResultTy parseDim(OperandVector &Operands);
1462   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1463   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1464   AMDGPUOperand::Ptr defaultRowMask() const;
1465   AMDGPUOperand::Ptr defaultBankMask() const;
1466   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1467   AMDGPUOperand::Ptr defaultFI() const;
1468   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1469   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1470 
1471   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1472                                     AMDGPUOperand::ImmTy Type);
1473   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1474   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1475   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1476   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1477   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1478   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1479   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1480                uint64_t BasicInstType,
1481                bool SkipDstVcc = false,
1482                bool SkipSrcVcc = false);
1483 
1484   AMDGPUOperand::Ptr defaultBLGP() const;
1485   AMDGPUOperand::Ptr defaultCBSZ() const;
1486   AMDGPUOperand::Ptr defaultABID() const;
1487 
1488   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1489   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1490 };
1491 
1492 struct OptionalOperand {
1493   const char *Name;
1494   AMDGPUOperand::ImmTy Type;
1495   bool IsBit;
1496   bool (*ConvertResult)(int64_t&);
1497 };
1498 
1499 } // end anonymous namespace
1500 
1501 // May be called with integer type with equivalent bitwidth.
1502 static const fltSemantics *getFltSemantics(unsigned Size) {
1503   switch (Size) {
1504   case 4:
1505     return &APFloat::IEEEsingle();
1506   case 8:
1507     return &APFloat::IEEEdouble();
1508   case 2:
1509     return &APFloat::IEEEhalf();
1510   default:
1511     llvm_unreachable("unsupported fp type");
1512   }
1513 }
1514 
1515 static const fltSemantics *getFltSemantics(MVT VT) {
1516   return getFltSemantics(VT.getSizeInBits() / 8);
1517 }
1518 
1519 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1520   switch (OperandType) {
1521   case AMDGPU::OPERAND_REG_IMM_INT32:
1522   case AMDGPU::OPERAND_REG_IMM_FP32:
1523   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1524   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1525   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1526   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1527     return &APFloat::IEEEsingle();
1528   case AMDGPU::OPERAND_REG_IMM_INT64:
1529   case AMDGPU::OPERAND_REG_IMM_FP64:
1530   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1531   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1532     return &APFloat::IEEEdouble();
1533   case AMDGPU::OPERAND_REG_IMM_INT16:
1534   case AMDGPU::OPERAND_REG_IMM_FP16:
1535   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1536   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1537   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1538   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1539   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1540   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1541   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1542   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1543   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1544   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1545     return &APFloat::IEEEhalf();
1546   default:
1547     llvm_unreachable("unsupported fp type");
1548   }
1549 }
1550 
1551 //===----------------------------------------------------------------------===//
1552 // Operand
1553 //===----------------------------------------------------------------------===//
1554 
1555 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1556   bool Lost;
1557 
1558   // Convert literal to single precision
1559   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1560                                                APFloat::rmNearestTiesToEven,
1561                                                &Lost);
1562   // We allow precision lost but not overflow or underflow
1563   if (Status != APFloat::opOK &&
1564       Lost &&
1565       ((Status & APFloat::opOverflow)  != 0 ||
1566        (Status & APFloat::opUnderflow) != 0)) {
1567     return false;
1568   }
1569 
1570   return true;
1571 }
1572 
1573 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1574   return isUIntN(Size, Val) || isIntN(Size, Val);
1575 }
1576 
1577 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1578   if (VT.getScalarType() == MVT::i16) {
1579     // FP immediate values are broken.
1580     return isInlinableIntLiteral(Val);
1581   }
1582 
1583   // f16/v2f16 operands work correctly for all values.
1584   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1585 }
1586 
1587 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1588 
1589   // This is a hack to enable named inline values like
1590   // shared_base with both 32-bit and 64-bit operands.
1591   // Note that these values are defined as
1592   // 32-bit operands only.
1593   if (isInlineValue()) {
1594     return true;
1595   }
1596 
1597   if (!isImmTy(ImmTyNone)) {
1598     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1599     return false;
1600   }
1601   // TODO: We should avoid using host float here. It would be better to
1602   // check the float bit values which is what a few other places do.
1603   // We've had bot failures before due to weird NaN support on mips hosts.
1604 
1605   APInt Literal(64, Imm.Val);
1606 
1607   if (Imm.IsFPImm) { // We got fp literal token
1608     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1609       return AMDGPU::isInlinableLiteral64(Imm.Val,
1610                                           AsmParser->hasInv2PiInlineImm());
1611     }
1612 
1613     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1614     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1615       return false;
1616 
1617     if (type.getScalarSizeInBits() == 16) {
1618       return isInlineableLiteralOp16(
1619         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1620         type, AsmParser->hasInv2PiInlineImm());
1621     }
1622 
1623     // Check if single precision literal is inlinable
1624     return AMDGPU::isInlinableLiteral32(
1625       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1626       AsmParser->hasInv2PiInlineImm());
1627   }
1628 
1629   // We got int literal token.
1630   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1631     return AMDGPU::isInlinableLiteral64(Imm.Val,
1632                                         AsmParser->hasInv2PiInlineImm());
1633   }
1634 
1635   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1636     return false;
1637   }
1638 
1639   if (type.getScalarSizeInBits() == 16) {
1640     return isInlineableLiteralOp16(
1641       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1642       type, AsmParser->hasInv2PiInlineImm());
1643   }
1644 
1645   return AMDGPU::isInlinableLiteral32(
1646     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1647     AsmParser->hasInv2PiInlineImm());
1648 }
1649 
1650 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1651   // Check that this immediate can be added as literal
1652   if (!isImmTy(ImmTyNone)) {
1653     return false;
1654   }
1655 
1656   if (!Imm.IsFPImm) {
1657     // We got int literal token.
1658 
1659     if (type == MVT::f64 && hasFPModifiers()) {
1660       // Cannot apply fp modifiers to int literals preserving the same semantics
1661       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1662       // disable these cases.
1663       return false;
1664     }
1665 
1666     unsigned Size = type.getSizeInBits();
1667     if (Size == 64)
1668       Size = 32;
1669 
1670     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1671     // types.
1672     return isSafeTruncation(Imm.Val, Size);
1673   }
1674 
1675   // We got fp literal token
1676   if (type == MVT::f64) { // Expected 64-bit fp operand
1677     // We would set low 64-bits of literal to zeroes but we accept this literals
1678     return true;
1679   }
1680 
1681   if (type == MVT::i64) { // Expected 64-bit int operand
1682     // We don't allow fp literals in 64-bit integer instructions. It is
1683     // unclear how we should encode them.
1684     return false;
1685   }
1686 
1687   // We allow fp literals with f16x2 operands assuming that the specified
1688   // literal goes into the lower half and the upper half is zero. We also
1689   // require that the literal may be losslesly converted to f16.
1690   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1691                      (type == MVT::v2i16)? MVT::i16 : type;
1692 
1693   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1694   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1695 }
1696 
1697 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1698   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1699 }
1700 
1701 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1702   if (AsmParser->isVI())
1703     return isVReg32();
1704   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1705     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1706   else
1707     return false;
1708 }
1709 
1710 bool AMDGPUOperand::isSDWAFP16Operand() const {
1711   return isSDWAOperand(MVT::f16);
1712 }
1713 
1714 bool AMDGPUOperand::isSDWAFP32Operand() const {
1715   return isSDWAOperand(MVT::f32);
1716 }
1717 
1718 bool AMDGPUOperand::isSDWAInt16Operand() const {
1719   return isSDWAOperand(MVT::i16);
1720 }
1721 
1722 bool AMDGPUOperand::isSDWAInt32Operand() const {
1723   return isSDWAOperand(MVT::i32);
1724 }
1725 
1726 bool AMDGPUOperand::isBoolReg() const {
1727   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1728          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1729 }
1730 
1731 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1732 {
1733   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1734   assert(Size == 2 || Size == 4 || Size == 8);
1735 
1736   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1737 
1738   if (Imm.Mods.Abs) {
1739     Val &= ~FpSignMask;
1740   }
1741   if (Imm.Mods.Neg) {
1742     Val ^= FpSignMask;
1743   }
1744 
1745   return Val;
1746 }
1747 
1748 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1749   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1750                              Inst.getNumOperands())) {
1751     addLiteralImmOperand(Inst, Imm.Val,
1752                          ApplyModifiers &
1753                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1754   } else {
1755     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1756     Inst.addOperand(MCOperand::createImm(Imm.Val));
1757   }
1758 }
1759 
1760 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1761   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1762   auto OpNum = Inst.getNumOperands();
1763   // Check that this operand accepts literals
1764   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1765 
1766   if (ApplyModifiers) {
1767     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1768     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1769     Val = applyInputFPModifiers(Val, Size);
1770   }
1771 
1772   APInt Literal(64, Val);
1773   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1774 
1775   if (Imm.IsFPImm) { // We got fp literal token
1776     switch (OpTy) {
1777     case AMDGPU::OPERAND_REG_IMM_INT64:
1778     case AMDGPU::OPERAND_REG_IMM_FP64:
1779     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1780     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1781       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1782                                        AsmParser->hasInv2PiInlineImm())) {
1783         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1784         return;
1785       }
1786 
1787       // Non-inlineable
1788       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1789         // For fp operands we check if low 32 bits are zeros
1790         if (Literal.getLoBits(32) != 0) {
1791           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1792           "Can't encode literal as exact 64-bit floating-point operand. "
1793           "Low 32-bits will be set to zero");
1794         }
1795 
1796         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1797         return;
1798       }
1799 
1800       // We don't allow fp literals in 64-bit integer instructions. It is
1801       // unclear how we should encode them. This case should be checked earlier
1802       // in predicate methods (isLiteralImm())
1803       llvm_unreachable("fp literal in 64-bit integer instruction.");
1804 
1805     case AMDGPU::OPERAND_REG_IMM_INT32:
1806     case AMDGPU::OPERAND_REG_IMM_FP32:
1807     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1808     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1809     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1810     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1811     case AMDGPU::OPERAND_REG_IMM_INT16:
1812     case AMDGPU::OPERAND_REG_IMM_FP16:
1813     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1814     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1815     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1816     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1817     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1818     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1819     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1820     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1821     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1822     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1823       bool lost;
1824       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1825       // Convert literal to single precision
1826       FPLiteral.convert(*getOpFltSemantics(OpTy),
1827                         APFloat::rmNearestTiesToEven, &lost);
1828       // We allow precision lost but not overflow or underflow. This should be
1829       // checked earlier in isLiteralImm()
1830 
1831       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1832       Inst.addOperand(MCOperand::createImm(ImmVal));
1833       return;
1834     }
1835     default:
1836       llvm_unreachable("invalid operand size");
1837     }
1838 
1839     return;
1840   }
1841 
1842   // We got int literal token.
1843   // Only sign extend inline immediates.
1844   switch (OpTy) {
1845   case AMDGPU::OPERAND_REG_IMM_INT32:
1846   case AMDGPU::OPERAND_REG_IMM_FP32:
1847   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1848   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1849   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1850   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1851   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1852   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1853     if (isSafeTruncation(Val, 32) &&
1854         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1855                                      AsmParser->hasInv2PiInlineImm())) {
1856       Inst.addOperand(MCOperand::createImm(Val));
1857       return;
1858     }
1859 
1860     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1861     return;
1862 
1863   case AMDGPU::OPERAND_REG_IMM_INT64:
1864   case AMDGPU::OPERAND_REG_IMM_FP64:
1865   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1866   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1867     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1868       Inst.addOperand(MCOperand::createImm(Val));
1869       return;
1870     }
1871 
1872     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1873     return;
1874 
1875   case AMDGPU::OPERAND_REG_IMM_INT16:
1876   case AMDGPU::OPERAND_REG_IMM_FP16:
1877   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1878   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1879   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1880   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1881     if (isSafeTruncation(Val, 16) &&
1882         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1883                                      AsmParser->hasInv2PiInlineImm())) {
1884       Inst.addOperand(MCOperand::createImm(Val));
1885       return;
1886     }
1887 
1888     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1889     return;
1890 
1891   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1892   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1893   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1894   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1895     assert(isSafeTruncation(Val, 16));
1896     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1897                                         AsmParser->hasInv2PiInlineImm()));
1898 
1899     Inst.addOperand(MCOperand::createImm(Val));
1900     return;
1901   }
1902   default:
1903     llvm_unreachable("invalid operand size");
1904   }
1905 }
1906 
1907 template <unsigned Bitwidth>
1908 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1909   APInt Literal(64, Imm.Val);
1910 
1911   if (!Imm.IsFPImm) {
1912     // We got int literal token.
1913     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1914     return;
1915   }
1916 
1917   bool Lost;
1918   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1919   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1920                     APFloat::rmNearestTiesToEven, &Lost);
1921   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1922 }
1923 
1924 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1925   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1926 }
1927 
1928 static bool isInlineValue(unsigned Reg) {
1929   switch (Reg) {
1930   case AMDGPU::SRC_SHARED_BASE:
1931   case AMDGPU::SRC_SHARED_LIMIT:
1932   case AMDGPU::SRC_PRIVATE_BASE:
1933   case AMDGPU::SRC_PRIVATE_LIMIT:
1934   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1935     return true;
1936   case AMDGPU::SRC_VCCZ:
1937   case AMDGPU::SRC_EXECZ:
1938   case AMDGPU::SRC_SCC:
1939     return true;
1940   case AMDGPU::SGPR_NULL:
1941     return true;
1942   default:
1943     return false;
1944   }
1945 }
1946 
1947 bool AMDGPUOperand::isInlineValue() const {
1948   return isRegKind() && ::isInlineValue(getReg());
1949 }
1950 
1951 //===----------------------------------------------------------------------===//
1952 // AsmParser
1953 //===----------------------------------------------------------------------===//
1954 
1955 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1956   if (Is == IS_VGPR) {
1957     switch (RegWidth) {
1958       default: return -1;
1959       case 1: return AMDGPU::VGPR_32RegClassID;
1960       case 2: return AMDGPU::VReg_64RegClassID;
1961       case 3: return AMDGPU::VReg_96RegClassID;
1962       case 4: return AMDGPU::VReg_128RegClassID;
1963       case 5: return AMDGPU::VReg_160RegClassID;
1964       case 6: return AMDGPU::VReg_192RegClassID;
1965       case 8: return AMDGPU::VReg_256RegClassID;
1966       case 16: return AMDGPU::VReg_512RegClassID;
1967       case 32: return AMDGPU::VReg_1024RegClassID;
1968     }
1969   } else if (Is == IS_TTMP) {
1970     switch (RegWidth) {
1971       default: return -1;
1972       case 1: return AMDGPU::TTMP_32RegClassID;
1973       case 2: return AMDGPU::TTMP_64RegClassID;
1974       case 4: return AMDGPU::TTMP_128RegClassID;
1975       case 8: return AMDGPU::TTMP_256RegClassID;
1976       case 16: return AMDGPU::TTMP_512RegClassID;
1977     }
1978   } else if (Is == IS_SGPR) {
1979     switch (RegWidth) {
1980       default: return -1;
1981       case 1: return AMDGPU::SGPR_32RegClassID;
1982       case 2: return AMDGPU::SGPR_64RegClassID;
1983       case 3: return AMDGPU::SGPR_96RegClassID;
1984       case 4: return AMDGPU::SGPR_128RegClassID;
1985       case 5: return AMDGPU::SGPR_160RegClassID;
1986       case 6: return AMDGPU::SGPR_192RegClassID;
1987       case 8: return AMDGPU::SGPR_256RegClassID;
1988       case 16: return AMDGPU::SGPR_512RegClassID;
1989     }
1990   } else if (Is == IS_AGPR) {
1991     switch (RegWidth) {
1992       default: return -1;
1993       case 1: return AMDGPU::AGPR_32RegClassID;
1994       case 2: return AMDGPU::AReg_64RegClassID;
1995       case 3: return AMDGPU::AReg_96RegClassID;
1996       case 4: return AMDGPU::AReg_128RegClassID;
1997       case 5: return AMDGPU::AReg_160RegClassID;
1998       case 6: return AMDGPU::AReg_192RegClassID;
1999       case 8: return AMDGPU::AReg_256RegClassID;
2000       case 16: return AMDGPU::AReg_512RegClassID;
2001       case 32: return AMDGPU::AReg_1024RegClassID;
2002     }
2003   }
2004   return -1;
2005 }
2006 
2007 static unsigned getSpecialRegForName(StringRef RegName) {
2008   return StringSwitch<unsigned>(RegName)
2009     .Case("exec", AMDGPU::EXEC)
2010     .Case("vcc", AMDGPU::VCC)
2011     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2012     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2013     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2014     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2015     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2016     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2017     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2018     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2019     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2020     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2021     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2022     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2023     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2024     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2025     .Case("m0", AMDGPU::M0)
2026     .Case("vccz", AMDGPU::SRC_VCCZ)
2027     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2028     .Case("execz", AMDGPU::SRC_EXECZ)
2029     .Case("src_execz", AMDGPU::SRC_EXECZ)
2030     .Case("scc", AMDGPU::SRC_SCC)
2031     .Case("src_scc", AMDGPU::SRC_SCC)
2032     .Case("tba", AMDGPU::TBA)
2033     .Case("tma", AMDGPU::TMA)
2034     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2035     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2036     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2037     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2038     .Case("vcc_lo", AMDGPU::VCC_LO)
2039     .Case("vcc_hi", AMDGPU::VCC_HI)
2040     .Case("exec_lo", AMDGPU::EXEC_LO)
2041     .Case("exec_hi", AMDGPU::EXEC_HI)
2042     .Case("tma_lo", AMDGPU::TMA_LO)
2043     .Case("tma_hi", AMDGPU::TMA_HI)
2044     .Case("tba_lo", AMDGPU::TBA_LO)
2045     .Case("tba_hi", AMDGPU::TBA_HI)
2046     .Case("pc", AMDGPU::PC_REG)
2047     .Case("null", AMDGPU::SGPR_NULL)
2048     .Default(AMDGPU::NoRegister);
2049 }
2050 
2051 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2052                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2053   auto R = parseRegister();
2054   if (!R) return true;
2055   assert(R->isReg());
2056   RegNo = R->getReg();
2057   StartLoc = R->getStartLoc();
2058   EndLoc = R->getEndLoc();
2059   return false;
2060 }
2061 
2062 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2063                                     SMLoc &EndLoc) {
2064   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2065 }
2066 
2067 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2068                                                        SMLoc &StartLoc,
2069                                                        SMLoc &EndLoc) {
2070   bool Result =
2071       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2072   bool PendingErrors = getParser().hasPendingError();
2073   getParser().clearPendingErrors();
2074   if (PendingErrors)
2075     return MatchOperand_ParseFail;
2076   if (Result)
2077     return MatchOperand_NoMatch;
2078   return MatchOperand_Success;
2079 }
2080 
2081 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2082                                             RegisterKind RegKind, unsigned Reg1,
2083                                             SMLoc Loc) {
2084   switch (RegKind) {
2085   case IS_SPECIAL:
2086     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2087       Reg = AMDGPU::EXEC;
2088       RegWidth = 2;
2089       return true;
2090     }
2091     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2092       Reg = AMDGPU::FLAT_SCR;
2093       RegWidth = 2;
2094       return true;
2095     }
2096     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2097       Reg = AMDGPU::XNACK_MASK;
2098       RegWidth = 2;
2099       return true;
2100     }
2101     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2102       Reg = AMDGPU::VCC;
2103       RegWidth = 2;
2104       return true;
2105     }
2106     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2107       Reg = AMDGPU::TBA;
2108       RegWidth = 2;
2109       return true;
2110     }
2111     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2112       Reg = AMDGPU::TMA;
2113       RegWidth = 2;
2114       return true;
2115     }
2116     Error(Loc, "register does not fit in the list");
2117     return false;
2118   case IS_VGPR:
2119   case IS_SGPR:
2120   case IS_AGPR:
2121   case IS_TTMP:
2122     if (Reg1 != Reg + RegWidth) {
2123       Error(Loc, "registers in a list must have consecutive indices");
2124       return false;
2125     }
2126     RegWidth++;
2127     return true;
2128   default:
2129     llvm_unreachable("unexpected register kind");
2130   }
2131 }
2132 
2133 struct RegInfo {
2134   StringLiteral Name;
2135   RegisterKind Kind;
2136 };
2137 
2138 static constexpr RegInfo RegularRegisters[] = {
2139   {{"v"},    IS_VGPR},
2140   {{"s"},    IS_SGPR},
2141   {{"ttmp"}, IS_TTMP},
2142   {{"acc"},  IS_AGPR},
2143   {{"a"},    IS_AGPR},
2144 };
2145 
2146 static bool isRegularReg(RegisterKind Kind) {
2147   return Kind == IS_VGPR ||
2148          Kind == IS_SGPR ||
2149          Kind == IS_TTMP ||
2150          Kind == IS_AGPR;
2151 }
2152 
2153 static const RegInfo* getRegularRegInfo(StringRef Str) {
2154   for (const RegInfo &Reg : RegularRegisters)
2155     if (Str.startswith(Reg.Name))
2156       return &Reg;
2157   return nullptr;
2158 }
2159 
2160 static bool getRegNum(StringRef Str, unsigned& Num) {
2161   return !Str.getAsInteger(10, Num);
2162 }
2163 
2164 bool
2165 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2166                             const AsmToken &NextToken) const {
2167 
2168   // A list of consecutive registers: [s0,s1,s2,s3]
2169   if (Token.is(AsmToken::LBrac))
2170     return true;
2171 
2172   if (!Token.is(AsmToken::Identifier))
2173     return false;
2174 
2175   // A single register like s0 or a range of registers like s[0:1]
2176 
2177   StringRef Str = Token.getString();
2178   const RegInfo *Reg = getRegularRegInfo(Str);
2179   if (Reg) {
2180     StringRef RegName = Reg->Name;
2181     StringRef RegSuffix = Str.substr(RegName.size());
2182     if (!RegSuffix.empty()) {
2183       unsigned Num;
2184       // A single register with an index: rXX
2185       if (getRegNum(RegSuffix, Num))
2186         return true;
2187     } else {
2188       // A range of registers: r[XX:YY].
2189       if (NextToken.is(AsmToken::LBrac))
2190         return true;
2191     }
2192   }
2193 
2194   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2195 }
2196 
2197 bool
2198 AMDGPUAsmParser::isRegister()
2199 {
2200   return isRegister(getToken(), peekToken());
2201 }
2202 
2203 unsigned
2204 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2205                                unsigned RegNum,
2206                                unsigned RegWidth,
2207                                SMLoc Loc) {
2208 
2209   assert(isRegularReg(RegKind));
2210 
2211   unsigned AlignSize = 1;
2212   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2213     // SGPR and TTMP registers must be aligned.
2214     // Max required alignment is 4 dwords.
2215     AlignSize = std::min(RegWidth, 4u);
2216   }
2217 
2218   if (RegNum % AlignSize != 0) {
2219     Error(Loc, "invalid register alignment");
2220     return AMDGPU::NoRegister;
2221   }
2222 
2223   unsigned RegIdx = RegNum / AlignSize;
2224   int RCID = getRegClass(RegKind, RegWidth);
2225   if (RCID == -1) {
2226     Error(Loc, "invalid or unsupported register size");
2227     return AMDGPU::NoRegister;
2228   }
2229 
2230   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2231   const MCRegisterClass RC = TRI->getRegClass(RCID);
2232   if (RegIdx >= RC.getNumRegs()) {
2233     Error(Loc, "register index is out of range");
2234     return AMDGPU::NoRegister;
2235   }
2236 
2237   return RC.getRegister(RegIdx);
2238 }
2239 
2240 bool
2241 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2242   int64_t RegLo, RegHi;
2243   if (!skipToken(AsmToken::LBrac, "missing register index"))
2244     return false;
2245 
2246   SMLoc FirstIdxLoc = getLoc();
2247   SMLoc SecondIdxLoc;
2248 
2249   if (!parseExpr(RegLo))
2250     return false;
2251 
2252   if (trySkipToken(AsmToken::Colon)) {
2253     SecondIdxLoc = getLoc();
2254     if (!parseExpr(RegHi))
2255       return false;
2256   } else {
2257     RegHi = RegLo;
2258   }
2259 
2260   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2261     return false;
2262 
2263   if (!isUInt<32>(RegLo)) {
2264     Error(FirstIdxLoc, "invalid register index");
2265     return false;
2266   }
2267 
2268   if (!isUInt<32>(RegHi)) {
2269     Error(SecondIdxLoc, "invalid register index");
2270     return false;
2271   }
2272 
2273   if (RegLo > RegHi) {
2274     Error(FirstIdxLoc, "first register index should not exceed second index");
2275     return false;
2276   }
2277 
2278   Num = static_cast<unsigned>(RegLo);
2279   Width = (RegHi - RegLo) + 1;
2280   return true;
2281 }
2282 
2283 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2284                                           unsigned &RegNum, unsigned &RegWidth,
2285                                           SmallVectorImpl<AsmToken> &Tokens) {
2286   assert(isToken(AsmToken::Identifier));
2287   unsigned Reg = getSpecialRegForName(getTokenStr());
2288   if (Reg) {
2289     RegNum = 0;
2290     RegWidth = 1;
2291     RegKind = IS_SPECIAL;
2292     Tokens.push_back(getToken());
2293     lex(); // skip register name
2294   }
2295   return Reg;
2296 }
2297 
2298 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2299                                           unsigned &RegNum, unsigned &RegWidth,
2300                                           SmallVectorImpl<AsmToken> &Tokens) {
2301   assert(isToken(AsmToken::Identifier));
2302   StringRef RegName = getTokenStr();
2303   auto Loc = getLoc();
2304 
2305   const RegInfo *RI = getRegularRegInfo(RegName);
2306   if (!RI) {
2307     Error(Loc, "invalid register name");
2308     return AMDGPU::NoRegister;
2309   }
2310 
2311   Tokens.push_back(getToken());
2312   lex(); // skip register name
2313 
2314   RegKind = RI->Kind;
2315   StringRef RegSuffix = RegName.substr(RI->Name.size());
2316   if (!RegSuffix.empty()) {
2317     // Single 32-bit register: vXX.
2318     if (!getRegNum(RegSuffix, RegNum)) {
2319       Error(Loc, "invalid register index");
2320       return AMDGPU::NoRegister;
2321     }
2322     RegWidth = 1;
2323   } else {
2324     // Range of registers: v[XX:YY]. ":YY" is optional.
2325     if (!ParseRegRange(RegNum, RegWidth))
2326       return AMDGPU::NoRegister;
2327   }
2328 
2329   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2330 }
2331 
2332 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2333                                        unsigned &RegWidth,
2334                                        SmallVectorImpl<AsmToken> &Tokens) {
2335   unsigned Reg = AMDGPU::NoRegister;
2336   auto ListLoc = getLoc();
2337 
2338   if (!skipToken(AsmToken::LBrac,
2339                  "expected a register or a list of registers")) {
2340     return AMDGPU::NoRegister;
2341   }
2342 
2343   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2344 
2345   auto Loc = getLoc();
2346   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2347     return AMDGPU::NoRegister;
2348   if (RegWidth != 1) {
2349     Error(Loc, "expected a single 32-bit register");
2350     return AMDGPU::NoRegister;
2351   }
2352 
2353   for (; trySkipToken(AsmToken::Comma); ) {
2354     RegisterKind NextRegKind;
2355     unsigned NextReg, NextRegNum, NextRegWidth;
2356     Loc = getLoc();
2357 
2358     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2359                              NextRegNum, NextRegWidth,
2360                              Tokens)) {
2361       return AMDGPU::NoRegister;
2362     }
2363     if (NextRegWidth != 1) {
2364       Error(Loc, "expected a single 32-bit register");
2365       return AMDGPU::NoRegister;
2366     }
2367     if (NextRegKind != RegKind) {
2368       Error(Loc, "registers in a list must be of the same kind");
2369       return AMDGPU::NoRegister;
2370     }
2371     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2372       return AMDGPU::NoRegister;
2373   }
2374 
2375   if (!skipToken(AsmToken::RBrac,
2376                  "expected a comma or a closing square bracket")) {
2377     return AMDGPU::NoRegister;
2378   }
2379 
2380   if (isRegularReg(RegKind))
2381     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2382 
2383   return Reg;
2384 }
2385 
2386 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2387                                           unsigned &RegNum, unsigned &RegWidth,
2388                                           SmallVectorImpl<AsmToken> &Tokens) {
2389   auto Loc = getLoc();
2390   Reg = AMDGPU::NoRegister;
2391 
2392   if (isToken(AsmToken::Identifier)) {
2393     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2394     if (Reg == AMDGPU::NoRegister)
2395       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2396   } else {
2397     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2398   }
2399 
2400   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2401   if (Reg == AMDGPU::NoRegister) {
2402     assert(Parser.hasPendingError());
2403     return false;
2404   }
2405 
2406   if (!subtargetHasRegister(*TRI, Reg)) {
2407     if (Reg == AMDGPU::SGPR_NULL) {
2408       Error(Loc, "'null' operand is not supported on this GPU");
2409     } else {
2410       Error(Loc, "register not available on this GPU");
2411     }
2412     return false;
2413   }
2414 
2415   return true;
2416 }
2417 
2418 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2419                                           unsigned &RegNum, unsigned &RegWidth,
2420                                           bool RestoreOnFailure /*=false*/) {
2421   Reg = AMDGPU::NoRegister;
2422 
2423   SmallVector<AsmToken, 1> Tokens;
2424   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2425     if (RestoreOnFailure) {
2426       while (!Tokens.empty()) {
2427         getLexer().UnLex(Tokens.pop_back_val());
2428       }
2429     }
2430     return true;
2431   }
2432   return false;
2433 }
2434 
2435 Optional<StringRef>
2436 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2437   switch (RegKind) {
2438   case IS_VGPR:
2439     return StringRef(".amdgcn.next_free_vgpr");
2440   case IS_SGPR:
2441     return StringRef(".amdgcn.next_free_sgpr");
2442   default:
2443     return None;
2444   }
2445 }
2446 
2447 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2448   auto SymbolName = getGprCountSymbolName(RegKind);
2449   assert(SymbolName && "initializing invalid register kind");
2450   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2451   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2452 }
2453 
2454 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2455                                             unsigned DwordRegIndex,
2456                                             unsigned RegWidth) {
2457   // Symbols are only defined for GCN targets
2458   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2459     return true;
2460 
2461   auto SymbolName = getGprCountSymbolName(RegKind);
2462   if (!SymbolName)
2463     return true;
2464   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2465 
2466   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2467   int64_t OldCount;
2468 
2469   if (!Sym->isVariable())
2470     return !Error(getParser().getTok().getLoc(),
2471                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2472   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2473     return !Error(
2474         getParser().getTok().getLoc(),
2475         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2476 
2477   if (OldCount <= NewMax)
2478     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2479 
2480   return true;
2481 }
2482 
2483 std::unique_ptr<AMDGPUOperand>
2484 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2485   const auto &Tok = Parser.getTok();
2486   SMLoc StartLoc = Tok.getLoc();
2487   SMLoc EndLoc = Tok.getEndLoc();
2488   RegisterKind RegKind;
2489   unsigned Reg, RegNum, RegWidth;
2490 
2491   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2492     return nullptr;
2493   }
2494   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2495     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2496       return nullptr;
2497   } else
2498     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2499   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2500 }
2501 
2502 OperandMatchResultTy
2503 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2504   // TODO: add syntactic sugar for 1/(2*PI)
2505 
2506   assert(!isRegister());
2507   assert(!isModifier());
2508 
2509   const auto& Tok = getToken();
2510   const auto& NextTok = peekToken();
2511   bool IsReal = Tok.is(AsmToken::Real);
2512   SMLoc S = getLoc();
2513   bool Negate = false;
2514 
2515   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2516     lex();
2517     IsReal = true;
2518     Negate = true;
2519   }
2520 
2521   if (IsReal) {
2522     // Floating-point expressions are not supported.
2523     // Can only allow floating-point literals with an
2524     // optional sign.
2525 
2526     StringRef Num = getTokenStr();
2527     lex();
2528 
2529     APFloat RealVal(APFloat::IEEEdouble());
2530     auto roundMode = APFloat::rmNearestTiesToEven;
2531     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2532       return MatchOperand_ParseFail;
2533     }
2534     if (Negate)
2535       RealVal.changeSign();
2536 
2537     Operands.push_back(
2538       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2539                                AMDGPUOperand::ImmTyNone, true));
2540 
2541     return MatchOperand_Success;
2542 
2543   } else {
2544     int64_t IntVal;
2545     const MCExpr *Expr;
2546     SMLoc S = getLoc();
2547 
2548     if (HasSP3AbsModifier) {
2549       // This is a workaround for handling expressions
2550       // as arguments of SP3 'abs' modifier, for example:
2551       //     |1.0|
2552       //     |-1|
2553       //     |1+x|
2554       // This syntax is not compatible with syntax of standard
2555       // MC expressions (due to the trailing '|').
2556       SMLoc EndLoc;
2557       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2558         return MatchOperand_ParseFail;
2559     } else {
2560       if (Parser.parseExpression(Expr))
2561         return MatchOperand_ParseFail;
2562     }
2563 
2564     if (Expr->evaluateAsAbsolute(IntVal)) {
2565       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2566     } else {
2567       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2568     }
2569 
2570     return MatchOperand_Success;
2571   }
2572 
2573   return MatchOperand_NoMatch;
2574 }
2575 
2576 OperandMatchResultTy
2577 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2578   if (!isRegister())
2579     return MatchOperand_NoMatch;
2580 
2581   if (auto R = parseRegister()) {
2582     assert(R->isReg());
2583     Operands.push_back(std::move(R));
2584     return MatchOperand_Success;
2585   }
2586   return MatchOperand_ParseFail;
2587 }
2588 
2589 OperandMatchResultTy
2590 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2591   auto res = parseReg(Operands);
2592   if (res != MatchOperand_NoMatch) {
2593     return res;
2594   } else if (isModifier()) {
2595     return MatchOperand_NoMatch;
2596   } else {
2597     return parseImm(Operands, HasSP3AbsMod);
2598   }
2599 }
2600 
2601 bool
2602 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2603   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2604     const auto &str = Token.getString();
2605     return str == "abs" || str == "neg" || str == "sext";
2606   }
2607   return false;
2608 }
2609 
2610 bool
2611 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2612   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2613 }
2614 
2615 bool
2616 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2617   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2618 }
2619 
2620 bool
2621 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2622   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2623 }
2624 
2625 // Check if this is an operand modifier or an opcode modifier
2626 // which may look like an expression but it is not. We should
2627 // avoid parsing these modifiers as expressions. Currently
2628 // recognized sequences are:
2629 //   |...|
2630 //   abs(...)
2631 //   neg(...)
2632 //   sext(...)
2633 //   -reg
2634 //   -|...|
2635 //   -abs(...)
2636 //   name:...
2637 // Note that simple opcode modifiers like 'gds' may be parsed as
2638 // expressions; this is a special case. See getExpressionAsToken.
2639 //
2640 bool
2641 AMDGPUAsmParser::isModifier() {
2642 
2643   AsmToken Tok = getToken();
2644   AsmToken NextToken[2];
2645   peekTokens(NextToken);
2646 
2647   return isOperandModifier(Tok, NextToken[0]) ||
2648          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2649          isOpcodeModifierWithVal(Tok, NextToken[0]);
2650 }
2651 
2652 // Check if the current token is an SP3 'neg' modifier.
2653 // Currently this modifier is allowed in the following context:
2654 //
2655 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2656 // 2. Before an 'abs' modifier: -abs(...)
2657 // 3. Before an SP3 'abs' modifier: -|...|
2658 //
2659 // In all other cases "-" is handled as a part
2660 // of an expression that follows the sign.
2661 //
2662 // Note: When "-" is followed by an integer literal,
2663 // this is interpreted as integer negation rather
2664 // than a floating-point NEG modifier applied to N.
2665 // Beside being contr-intuitive, such use of floating-point
2666 // NEG modifier would have resulted in different meaning
2667 // of integer literals used with VOP1/2/C and VOP3,
2668 // for example:
2669 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2670 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2671 // Negative fp literals with preceding "-" are
2672 // handled likewise for unifomtity
2673 //
2674 bool
2675 AMDGPUAsmParser::parseSP3NegModifier() {
2676 
2677   AsmToken NextToken[2];
2678   peekTokens(NextToken);
2679 
2680   if (isToken(AsmToken::Minus) &&
2681       (isRegister(NextToken[0], NextToken[1]) ||
2682        NextToken[0].is(AsmToken::Pipe) ||
2683        isId(NextToken[0], "abs"))) {
2684     lex();
2685     return true;
2686   }
2687 
2688   return false;
2689 }
2690 
2691 OperandMatchResultTy
2692 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2693                                               bool AllowImm) {
2694   bool Neg, SP3Neg;
2695   bool Abs, SP3Abs;
2696   SMLoc Loc;
2697 
2698   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2699   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2700     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2701     return MatchOperand_ParseFail;
2702   }
2703 
2704   SP3Neg = parseSP3NegModifier();
2705 
2706   Loc = getLoc();
2707   Neg = trySkipId("neg");
2708   if (Neg && SP3Neg) {
2709     Error(Loc, "expected register or immediate");
2710     return MatchOperand_ParseFail;
2711   }
2712   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2713     return MatchOperand_ParseFail;
2714 
2715   Abs = trySkipId("abs");
2716   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2717     return MatchOperand_ParseFail;
2718 
2719   Loc = getLoc();
2720   SP3Abs = trySkipToken(AsmToken::Pipe);
2721   if (Abs && SP3Abs) {
2722     Error(Loc, "expected register or immediate");
2723     return MatchOperand_ParseFail;
2724   }
2725 
2726   OperandMatchResultTy Res;
2727   if (AllowImm) {
2728     Res = parseRegOrImm(Operands, SP3Abs);
2729   } else {
2730     Res = parseReg(Operands);
2731   }
2732   if (Res != MatchOperand_Success) {
2733     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2734   }
2735 
2736   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2737     return MatchOperand_ParseFail;
2738   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2739     return MatchOperand_ParseFail;
2740   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2741     return MatchOperand_ParseFail;
2742 
2743   AMDGPUOperand::Modifiers Mods;
2744   Mods.Abs = Abs || SP3Abs;
2745   Mods.Neg = Neg || SP3Neg;
2746 
2747   if (Mods.hasFPModifiers()) {
2748     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2749     if (Op.isExpr()) {
2750       Error(Op.getStartLoc(), "expected an absolute expression");
2751       return MatchOperand_ParseFail;
2752     }
2753     Op.setModifiers(Mods);
2754   }
2755   return MatchOperand_Success;
2756 }
2757 
2758 OperandMatchResultTy
2759 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2760                                                bool AllowImm) {
2761   bool Sext = trySkipId("sext");
2762   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2763     return MatchOperand_ParseFail;
2764 
2765   OperandMatchResultTy Res;
2766   if (AllowImm) {
2767     Res = parseRegOrImm(Operands);
2768   } else {
2769     Res = parseReg(Operands);
2770   }
2771   if (Res != MatchOperand_Success) {
2772     return Sext? MatchOperand_ParseFail : Res;
2773   }
2774 
2775   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2776     return MatchOperand_ParseFail;
2777 
2778   AMDGPUOperand::Modifiers Mods;
2779   Mods.Sext = Sext;
2780 
2781   if (Mods.hasIntModifiers()) {
2782     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2783     if (Op.isExpr()) {
2784       Error(Op.getStartLoc(), "expected an absolute expression");
2785       return MatchOperand_ParseFail;
2786     }
2787     Op.setModifiers(Mods);
2788   }
2789 
2790   return MatchOperand_Success;
2791 }
2792 
2793 OperandMatchResultTy
2794 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2795   return parseRegOrImmWithFPInputMods(Operands, false);
2796 }
2797 
2798 OperandMatchResultTy
2799 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2800   return parseRegOrImmWithIntInputMods(Operands, false);
2801 }
2802 
2803 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2804   auto Loc = getLoc();
2805   if (trySkipId("off")) {
2806     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2807                                                 AMDGPUOperand::ImmTyOff, false));
2808     return MatchOperand_Success;
2809   }
2810 
2811   if (!isRegister())
2812     return MatchOperand_NoMatch;
2813 
2814   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2815   if (Reg) {
2816     Operands.push_back(std::move(Reg));
2817     return MatchOperand_Success;
2818   }
2819 
2820   return MatchOperand_ParseFail;
2821 
2822 }
2823 
2824 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2825   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2826 
2827   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2828       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2829       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2830       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2831     return Match_InvalidOperand;
2832 
2833   if ((TSFlags & SIInstrFlags::VOP3) &&
2834       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2835       getForcedEncodingSize() != 64)
2836     return Match_PreferE32;
2837 
2838   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2839       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2840     // v_mac_f32/16 allow only dst_sel == DWORD;
2841     auto OpNum =
2842         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2843     const auto &Op = Inst.getOperand(OpNum);
2844     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2845       return Match_InvalidOperand;
2846     }
2847   }
2848 
2849   return Match_Success;
2850 }
2851 
2852 static ArrayRef<unsigned> getAllVariants() {
2853   static const unsigned Variants[] = {
2854     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2855     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2856   };
2857 
2858   return makeArrayRef(Variants);
2859 }
2860 
2861 // What asm variants we should check
2862 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2863   if (getForcedEncodingSize() == 32) {
2864     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2865     return makeArrayRef(Variants);
2866   }
2867 
2868   if (isForcedVOP3()) {
2869     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2870     return makeArrayRef(Variants);
2871   }
2872 
2873   if (isForcedSDWA()) {
2874     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2875                                         AMDGPUAsmVariants::SDWA9};
2876     return makeArrayRef(Variants);
2877   }
2878 
2879   if (isForcedDPP()) {
2880     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2881     return makeArrayRef(Variants);
2882   }
2883 
2884   return getAllVariants();
2885 }
2886 
2887 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
2888   if (getForcedEncodingSize() == 32)
2889     return "e32";
2890 
2891   if (isForcedVOP3())
2892     return "e64";
2893 
2894   if (isForcedSDWA())
2895     return "sdwa";
2896 
2897   if (isForcedDPP())
2898     return "dpp";
2899 
2900   return "";
2901 }
2902 
2903 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2904   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2905   const unsigned Num = Desc.getNumImplicitUses();
2906   for (unsigned i = 0; i < Num; ++i) {
2907     unsigned Reg = Desc.ImplicitUses[i];
2908     switch (Reg) {
2909     case AMDGPU::FLAT_SCR:
2910     case AMDGPU::VCC:
2911     case AMDGPU::VCC_LO:
2912     case AMDGPU::VCC_HI:
2913     case AMDGPU::M0:
2914       return Reg;
2915     default:
2916       break;
2917     }
2918   }
2919   return AMDGPU::NoRegister;
2920 }
2921 
2922 // NB: This code is correct only when used to check constant
2923 // bus limitations because GFX7 support no f16 inline constants.
2924 // Note that there are no cases when a GFX7 opcode violates
2925 // constant bus limitations due to the use of an f16 constant.
2926 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2927                                        unsigned OpIdx) const {
2928   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2929 
2930   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2931     return false;
2932   }
2933 
2934   const MCOperand &MO = Inst.getOperand(OpIdx);
2935 
2936   int64_t Val = MO.getImm();
2937   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2938 
2939   switch (OpSize) { // expected operand size
2940   case 8:
2941     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2942   case 4:
2943     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2944   case 2: {
2945     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2946     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
2947         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
2948         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
2949       return AMDGPU::isInlinableIntLiteral(Val);
2950 
2951     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2952         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2953         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
2954       return AMDGPU::isInlinableIntLiteralV216(Val);
2955 
2956     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2957         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2958         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
2959       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2960 
2961     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2962   }
2963   default:
2964     llvm_unreachable("invalid operand size");
2965   }
2966 }
2967 
2968 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2969   if (!isGFX10())
2970     return 1;
2971 
2972   switch (Opcode) {
2973   // 64-bit shift instructions can use only one scalar value input
2974   case AMDGPU::V_LSHLREV_B64:
2975   case AMDGPU::V_LSHLREV_B64_gfx10:
2976   case AMDGPU::V_LSHL_B64:
2977   case AMDGPU::V_LSHRREV_B64:
2978   case AMDGPU::V_LSHRREV_B64_gfx10:
2979   case AMDGPU::V_LSHR_B64:
2980   case AMDGPU::V_ASHRREV_I64:
2981   case AMDGPU::V_ASHRREV_I64_gfx10:
2982   case AMDGPU::V_ASHR_I64:
2983     return 1;
2984   default:
2985     return 2;
2986   }
2987 }
2988 
2989 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2990   const MCOperand &MO = Inst.getOperand(OpIdx);
2991   if (MO.isImm()) {
2992     return !isInlineConstant(Inst, OpIdx);
2993   } else if (MO.isReg()) {
2994     auto Reg = MO.getReg();
2995     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2996     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2997   } else {
2998     return true;
2999   }
3000 }
3001 
3002 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
3003   const unsigned Opcode = Inst.getOpcode();
3004   const MCInstrDesc &Desc = MII.get(Opcode);
3005   unsigned ConstantBusUseCount = 0;
3006   unsigned NumLiterals = 0;
3007   unsigned LiteralSize;
3008 
3009   if (Desc.TSFlags &
3010       (SIInstrFlags::VOPC |
3011        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3012        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3013        SIInstrFlags::SDWA)) {
3014     // Check special imm operands (used by madmk, etc)
3015     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3016       ++ConstantBusUseCount;
3017     }
3018 
3019     SmallDenseSet<unsigned> SGPRsUsed;
3020     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3021     if (SGPRUsed != AMDGPU::NoRegister) {
3022       SGPRsUsed.insert(SGPRUsed);
3023       ++ConstantBusUseCount;
3024     }
3025 
3026     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3027     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3028     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3029 
3030     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3031 
3032     for (int OpIdx : OpIndices) {
3033       if (OpIdx == -1) break;
3034 
3035       const MCOperand &MO = Inst.getOperand(OpIdx);
3036       if (usesConstantBus(Inst, OpIdx)) {
3037         if (MO.isReg()) {
3038           const unsigned Reg = mc2PseudoReg(MO.getReg());
3039           // Pairs of registers with a partial intersections like these
3040           //   s0, s[0:1]
3041           //   flat_scratch_lo, flat_scratch
3042           //   flat_scratch_lo, flat_scratch_hi
3043           // are theoretically valid but they are disabled anyway.
3044           // Note that this code mimics SIInstrInfo::verifyInstruction
3045           if (!SGPRsUsed.count(Reg)) {
3046             SGPRsUsed.insert(Reg);
3047             ++ConstantBusUseCount;
3048           }
3049         } else { // Expression or a literal
3050 
3051           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3052             continue; // special operand like VINTERP attr_chan
3053 
3054           // An instruction may use only one literal.
3055           // This has been validated on the previous step.
3056           // See validateVOP3Literal.
3057           // This literal may be used as more than one operand.
3058           // If all these operands are of the same size,
3059           // this literal counts as one scalar value.
3060           // Otherwise it counts as 2 scalar values.
3061           // See "GFX10 Shader Programming", section 3.6.2.3.
3062 
3063           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3064           if (Size < 4) Size = 4;
3065 
3066           if (NumLiterals == 0) {
3067             NumLiterals = 1;
3068             LiteralSize = Size;
3069           } else if (LiteralSize != Size) {
3070             NumLiterals = 2;
3071           }
3072         }
3073       }
3074     }
3075   }
3076   ConstantBusUseCount += NumLiterals;
3077 
3078   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
3079 }
3080 
3081 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
3082   const unsigned Opcode = Inst.getOpcode();
3083   const MCInstrDesc &Desc = MII.get(Opcode);
3084 
3085   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3086   if (DstIdx == -1 ||
3087       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3088     return true;
3089   }
3090 
3091   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3092 
3093   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3094   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3095   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3096 
3097   assert(DstIdx != -1);
3098   const MCOperand &Dst = Inst.getOperand(DstIdx);
3099   assert(Dst.isReg());
3100   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3101 
3102   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3103 
3104   for (int SrcIdx : SrcIndices) {
3105     if (SrcIdx == -1) break;
3106     const MCOperand &Src = Inst.getOperand(SrcIdx);
3107     if (Src.isReg()) {
3108       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3109       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3110         return false;
3111       }
3112     }
3113   }
3114 
3115   return true;
3116 }
3117 
3118 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3119 
3120   const unsigned Opc = Inst.getOpcode();
3121   const MCInstrDesc &Desc = MII.get(Opc);
3122 
3123   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3124     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3125     assert(ClampIdx != -1);
3126     return Inst.getOperand(ClampIdx).getImm() == 0;
3127   }
3128 
3129   return true;
3130 }
3131 
3132 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3133 
3134   const unsigned Opc = Inst.getOpcode();
3135   const MCInstrDesc &Desc = MII.get(Opc);
3136 
3137   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3138     return true;
3139 
3140   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3141   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3142   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3143 
3144   assert(VDataIdx != -1);
3145 
3146   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3147     return true;
3148 
3149   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3150   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3151   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3152   if (DMask == 0)
3153     DMask = 1;
3154 
3155   unsigned DataSize =
3156     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3157   if (hasPackedD16()) {
3158     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3159     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3160       DataSize = (DataSize + 1) / 2;
3161   }
3162 
3163   return (VDataSize / 4) == DataSize + TFESize;
3164 }
3165 
3166 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3167   const unsigned Opc = Inst.getOpcode();
3168   const MCInstrDesc &Desc = MII.get(Opc);
3169 
3170   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
3171     return true;
3172 
3173   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3174 
3175   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3176       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3177   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3178   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3179   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3180 
3181   assert(VAddr0Idx != -1);
3182   assert(SrsrcIdx != -1);
3183   assert(SrsrcIdx > VAddr0Idx);
3184 
3185   if (DimIdx == -1)
3186     return true; // intersect_ray
3187 
3188   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3189   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3190   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3191   unsigned VAddrSize =
3192       IsNSA ? SrsrcIdx - VAddr0Idx
3193             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3194 
3195   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3196                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3197                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3198                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3199   if (!IsNSA) {
3200     if (AddrSize > 8)
3201       AddrSize = 16;
3202     else if (AddrSize > 4)
3203       AddrSize = 8;
3204   }
3205 
3206   return VAddrSize == AddrSize;
3207 }
3208 
3209 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3210 
3211   const unsigned Opc = Inst.getOpcode();
3212   const MCInstrDesc &Desc = MII.get(Opc);
3213 
3214   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3215     return true;
3216   if (!Desc.mayLoad() || !Desc.mayStore())
3217     return true; // Not atomic
3218 
3219   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3220   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3221 
3222   // This is an incomplete check because image_atomic_cmpswap
3223   // may only use 0x3 and 0xf while other atomic operations
3224   // may use 0x1 and 0x3. However these limitations are
3225   // verified when we check that dmask matches dst size.
3226   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3227 }
3228 
3229 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3230 
3231   const unsigned Opc = Inst.getOpcode();
3232   const MCInstrDesc &Desc = MII.get(Opc);
3233 
3234   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3235     return true;
3236 
3237   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3238   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3239 
3240   // GATHER4 instructions use dmask in a different fashion compared to
3241   // other MIMG instructions. The only useful DMASK values are
3242   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3243   // (red,red,red,red) etc.) The ISA document doesn't mention
3244   // this.
3245   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3246 }
3247 
3248 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3249 {
3250   switch (Opcode) {
3251   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3252   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3253   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3254     return true;
3255   default:
3256     return false;
3257   }
3258 }
3259 
3260 // movrels* opcodes should only allow VGPRS as src0.
3261 // This is specified in .td description for vop1/vop3,
3262 // but sdwa is handled differently. See isSDWAOperand.
3263 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3264 
3265   const unsigned Opc = Inst.getOpcode();
3266   const MCInstrDesc &Desc = MII.get(Opc);
3267 
3268   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3269     return true;
3270 
3271   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3272   assert(Src0Idx != -1);
3273 
3274   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3275   if (!Src0.isReg())
3276     return false;
3277 
3278   auto Reg = Src0.getReg();
3279   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3280   return !isSGPR(mc2PseudoReg(Reg), TRI);
3281 }
3282 
3283 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) {
3284 
3285   const unsigned Opc = Inst.getOpcode();
3286 
3287   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3288     return true;
3289 
3290   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3291   assert(Src0Idx != -1);
3292 
3293   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3294   if (!Src0.isReg())
3295     return true;
3296 
3297   auto Reg = Src0.getReg();
3298   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3299   if (isSGPR(mc2PseudoReg(Reg), TRI)) {
3300     Error(getLoc(), "source operand must be either a VGPR or an inline constant");
3301     return false;
3302   }
3303 
3304   return true;
3305 }
3306 
3307 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3308 
3309   const unsigned Opc = Inst.getOpcode();
3310   const MCInstrDesc &Desc = MII.get(Opc);
3311 
3312   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3313     return true;
3314 
3315   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3316   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3317     if (isCI() || isSI())
3318       return false;
3319   }
3320 
3321   return true;
3322 }
3323 
3324 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3325   const unsigned Opc = Inst.getOpcode();
3326   const MCInstrDesc &Desc = MII.get(Opc);
3327 
3328   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3329     return true;
3330 
3331   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3332   if (DimIdx < 0)
3333     return true;
3334 
3335   long Imm = Inst.getOperand(DimIdx).getImm();
3336   if (Imm < 0 || Imm >= 8)
3337     return false;
3338 
3339   return true;
3340 }
3341 
3342 static bool IsRevOpcode(const unsigned Opcode)
3343 {
3344   switch (Opcode) {
3345   case AMDGPU::V_SUBREV_F32_e32:
3346   case AMDGPU::V_SUBREV_F32_e64:
3347   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3348   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3349   case AMDGPU::V_SUBREV_F32_e32_vi:
3350   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3351   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3352   case AMDGPU::V_SUBREV_F32_e64_vi:
3353 
3354   case AMDGPU::V_SUBREV_CO_U32_e32:
3355   case AMDGPU::V_SUBREV_CO_U32_e64:
3356   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3357   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3358 
3359   case AMDGPU::V_SUBBREV_U32_e32:
3360   case AMDGPU::V_SUBBREV_U32_e64:
3361   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3362   case AMDGPU::V_SUBBREV_U32_e32_vi:
3363   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3364   case AMDGPU::V_SUBBREV_U32_e64_vi:
3365 
3366   case AMDGPU::V_SUBREV_U32_e32:
3367   case AMDGPU::V_SUBREV_U32_e64:
3368   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3369   case AMDGPU::V_SUBREV_U32_e32_vi:
3370   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3371   case AMDGPU::V_SUBREV_U32_e64_vi:
3372 
3373   case AMDGPU::V_SUBREV_F16_e32:
3374   case AMDGPU::V_SUBREV_F16_e64:
3375   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3376   case AMDGPU::V_SUBREV_F16_e32_vi:
3377   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3378   case AMDGPU::V_SUBREV_F16_e64_vi:
3379 
3380   case AMDGPU::V_SUBREV_U16_e32:
3381   case AMDGPU::V_SUBREV_U16_e64:
3382   case AMDGPU::V_SUBREV_U16_e32_vi:
3383   case AMDGPU::V_SUBREV_U16_e64_vi:
3384 
3385   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3386   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3387   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3388 
3389   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3390   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3391 
3392   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3393   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3394 
3395   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3396   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3397 
3398   case AMDGPU::V_LSHRREV_B32_e32:
3399   case AMDGPU::V_LSHRREV_B32_e64:
3400   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3401   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3402   case AMDGPU::V_LSHRREV_B32_e32_vi:
3403   case AMDGPU::V_LSHRREV_B32_e64_vi:
3404   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3405   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3406 
3407   case AMDGPU::V_ASHRREV_I32_e32:
3408   case AMDGPU::V_ASHRREV_I32_e64:
3409   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3410   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3411   case AMDGPU::V_ASHRREV_I32_e32_vi:
3412   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3413   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3414   case AMDGPU::V_ASHRREV_I32_e64_vi:
3415 
3416   case AMDGPU::V_LSHLREV_B32_e32:
3417   case AMDGPU::V_LSHLREV_B32_e64:
3418   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3419   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3420   case AMDGPU::V_LSHLREV_B32_e32_vi:
3421   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3422   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3423   case AMDGPU::V_LSHLREV_B32_e64_vi:
3424 
3425   case AMDGPU::V_LSHLREV_B16_e32:
3426   case AMDGPU::V_LSHLREV_B16_e64:
3427   case AMDGPU::V_LSHLREV_B16_e32_vi:
3428   case AMDGPU::V_LSHLREV_B16_e64_vi:
3429   case AMDGPU::V_LSHLREV_B16_gfx10:
3430 
3431   case AMDGPU::V_LSHRREV_B16_e32:
3432   case AMDGPU::V_LSHRREV_B16_e64:
3433   case AMDGPU::V_LSHRREV_B16_e32_vi:
3434   case AMDGPU::V_LSHRREV_B16_e64_vi:
3435   case AMDGPU::V_LSHRREV_B16_gfx10:
3436 
3437   case AMDGPU::V_ASHRREV_I16_e32:
3438   case AMDGPU::V_ASHRREV_I16_e64:
3439   case AMDGPU::V_ASHRREV_I16_e32_vi:
3440   case AMDGPU::V_ASHRREV_I16_e64_vi:
3441   case AMDGPU::V_ASHRREV_I16_gfx10:
3442 
3443   case AMDGPU::V_LSHLREV_B64:
3444   case AMDGPU::V_LSHLREV_B64_gfx10:
3445   case AMDGPU::V_LSHLREV_B64_vi:
3446 
3447   case AMDGPU::V_LSHRREV_B64:
3448   case AMDGPU::V_LSHRREV_B64_gfx10:
3449   case AMDGPU::V_LSHRREV_B64_vi:
3450 
3451   case AMDGPU::V_ASHRREV_I64:
3452   case AMDGPU::V_ASHRREV_I64_gfx10:
3453   case AMDGPU::V_ASHRREV_I64_vi:
3454 
3455   case AMDGPU::V_PK_LSHLREV_B16:
3456   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3457   case AMDGPU::V_PK_LSHLREV_B16_vi:
3458 
3459   case AMDGPU::V_PK_LSHRREV_B16:
3460   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3461   case AMDGPU::V_PK_LSHRREV_B16_vi:
3462   case AMDGPU::V_PK_ASHRREV_I16:
3463   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3464   case AMDGPU::V_PK_ASHRREV_I16_vi:
3465     return true;
3466   default:
3467     return false;
3468   }
3469 }
3470 
3471 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3472 
3473   using namespace SIInstrFlags;
3474   const unsigned Opcode = Inst.getOpcode();
3475   const MCInstrDesc &Desc = MII.get(Opcode);
3476 
3477   // lds_direct register is defined so that it can be used
3478   // with 9-bit operands only. Ignore encodings which do not accept these.
3479   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3480     return true;
3481 
3482   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3483   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3484   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3485 
3486   const int SrcIndices[] = { Src1Idx, Src2Idx };
3487 
3488   // lds_direct cannot be specified as either src1 or src2.
3489   for (int SrcIdx : SrcIndices) {
3490     if (SrcIdx == -1) break;
3491     const MCOperand &Src = Inst.getOperand(SrcIdx);
3492     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3493       return false;
3494     }
3495   }
3496 
3497   if (Src0Idx == -1)
3498     return true;
3499 
3500   const MCOperand &Src = Inst.getOperand(Src0Idx);
3501   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3502     return true;
3503 
3504   // lds_direct is specified as src0. Check additional limitations.
3505   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3506 }
3507 
3508 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3509   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3510     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3511     if (Op.isFlatOffset())
3512       return Op.getStartLoc();
3513   }
3514   return getLoc();
3515 }
3516 
3517 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3518                                          const OperandVector &Operands) {
3519   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3520   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3521     return true;
3522 
3523   auto Opcode = Inst.getOpcode();
3524   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3525   assert(OpNum != -1);
3526 
3527   const auto &Op = Inst.getOperand(OpNum);
3528   if (!hasFlatOffsets() && Op.getImm() != 0) {
3529     Error(getFlatOffsetLoc(Operands),
3530           "flat offset modifier is not supported on this GPU");
3531     return false;
3532   }
3533 
3534   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3535   // For FLAT segment the offset must be positive;
3536   // MSB is ignored and forced to zero.
3537   unsigned OffsetSize = isGFX9() ? 13 : 12;
3538   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3539     if (!isIntN(OffsetSize, Op.getImm())) {
3540       Error(getFlatOffsetLoc(Operands),
3541             isGFX9() ? "expected a 13-bit signed offset" :
3542                        "expected a 12-bit signed offset");
3543       return false;
3544     }
3545   } else {
3546     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3547       Error(getFlatOffsetLoc(Operands),
3548             isGFX9() ? "expected a 12-bit unsigned offset" :
3549                        "expected an 11-bit unsigned offset");
3550       return false;
3551     }
3552   }
3553 
3554   return true;
3555 }
3556 
3557 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3558   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3559     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3560     if (Op.isSMEMOffset())
3561       return Op.getStartLoc();
3562   }
3563   return getLoc();
3564 }
3565 
3566 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3567                                          const OperandVector &Operands) {
3568   if (isCI() || isSI())
3569     return true;
3570 
3571   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3572   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3573     return true;
3574 
3575   auto Opcode = Inst.getOpcode();
3576   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3577   if (OpNum == -1)
3578     return true;
3579 
3580   const auto &Op = Inst.getOperand(OpNum);
3581   if (!Op.isImm())
3582     return true;
3583 
3584   uint64_t Offset = Op.getImm();
3585   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3586   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3587       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3588     return true;
3589 
3590   Error(getSMEMOffsetLoc(Operands),
3591         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3592                                "expected a 21-bit signed offset");
3593 
3594   return false;
3595 }
3596 
3597 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3598   unsigned Opcode = Inst.getOpcode();
3599   const MCInstrDesc &Desc = MII.get(Opcode);
3600   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3601     return true;
3602 
3603   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3604   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3605 
3606   const int OpIndices[] = { Src0Idx, Src1Idx };
3607 
3608   unsigned NumExprs = 0;
3609   unsigned NumLiterals = 0;
3610   uint32_t LiteralValue;
3611 
3612   for (int OpIdx : OpIndices) {
3613     if (OpIdx == -1) break;
3614 
3615     const MCOperand &MO = Inst.getOperand(OpIdx);
3616     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3617     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3618       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3619         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3620         if (NumLiterals == 0 || LiteralValue != Value) {
3621           LiteralValue = Value;
3622           ++NumLiterals;
3623         }
3624       } else if (MO.isExpr()) {
3625         ++NumExprs;
3626       }
3627     }
3628   }
3629 
3630   return NumLiterals + NumExprs <= 1;
3631 }
3632 
3633 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3634   const unsigned Opc = Inst.getOpcode();
3635   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3636       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3637     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3638     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3639 
3640     if (OpSel & ~3)
3641       return false;
3642   }
3643   return true;
3644 }
3645 
3646 // Check if VCC register matches wavefront size
3647 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3648   auto FB = getFeatureBits();
3649   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3650     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3651 }
3652 
3653 // VOP3 literal is only allowed in GFX10+ and only one can be used
3654 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3655   unsigned Opcode = Inst.getOpcode();
3656   const MCInstrDesc &Desc = MII.get(Opcode);
3657   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3658     return true;
3659 
3660   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3661   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3662   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3663 
3664   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3665 
3666   unsigned NumExprs = 0;
3667   unsigned NumLiterals = 0;
3668   uint32_t LiteralValue;
3669 
3670   for (int OpIdx : OpIndices) {
3671     if (OpIdx == -1) break;
3672 
3673     const MCOperand &MO = Inst.getOperand(OpIdx);
3674     if (!MO.isImm() && !MO.isExpr())
3675       continue;
3676     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3677       continue;
3678 
3679     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3680         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3681       return false;
3682 
3683     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3684       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3685       if (NumLiterals == 0 || LiteralValue != Value) {
3686         LiteralValue = Value;
3687         ++NumLiterals;
3688       }
3689     } else if (MO.isExpr()) {
3690       ++NumExprs;
3691     }
3692   }
3693   NumLiterals += NumExprs;
3694 
3695   return !NumLiterals ||
3696          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3697 }
3698 
3699 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3700                                           const SMLoc &IDLoc,
3701                                           const OperandVector &Operands) {
3702   if (!validateLdsDirect(Inst)) {
3703     Error(IDLoc,
3704       "invalid use of lds_direct");
3705     return false;
3706   }
3707   if (!validateSOPLiteral(Inst)) {
3708     Error(IDLoc,
3709       "only one literal operand is allowed");
3710     return false;
3711   }
3712   if (!validateVOP3Literal(Inst)) {
3713     Error(IDLoc,
3714       "invalid literal operand");
3715     return false;
3716   }
3717   if (!validateConstantBusLimitations(Inst)) {
3718     Error(IDLoc,
3719       "invalid operand (violates constant bus restrictions)");
3720     return false;
3721   }
3722   if (!validateEarlyClobberLimitations(Inst)) {
3723     Error(IDLoc,
3724       "destination must be different than all sources");
3725     return false;
3726   }
3727   if (!validateIntClampSupported(Inst)) {
3728     Error(IDLoc,
3729       "integer clamping is not supported on this GPU");
3730     return false;
3731   }
3732   if (!validateOpSel(Inst)) {
3733     Error(IDLoc,
3734       "invalid op_sel operand");
3735     return false;
3736   }
3737   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3738   if (!validateMIMGD16(Inst)) {
3739     Error(IDLoc,
3740       "d16 modifier is not supported on this GPU");
3741     return false;
3742   }
3743   if (!validateMIMGDim(Inst)) {
3744     Error(IDLoc, "dim modifier is required on this GPU");
3745     return false;
3746   }
3747   if (!validateMIMGDataSize(Inst)) {
3748     Error(IDLoc,
3749       "image data size does not match dmask and tfe");
3750     return false;
3751   }
3752   if (!validateMIMGAddrSize(Inst)) {
3753     Error(IDLoc,
3754       "image address size does not match dim and a16");
3755     return false;
3756   }
3757   if (!validateMIMGAtomicDMask(Inst)) {
3758     Error(IDLoc,
3759       "invalid atomic image dmask");
3760     return false;
3761   }
3762   if (!validateMIMGGatherDMask(Inst)) {
3763     Error(IDLoc,
3764       "invalid image_gather dmask: only one bit must be set");
3765     return false;
3766   }
3767   if (!validateMovrels(Inst)) {
3768     Error(IDLoc, "source operand must be a VGPR");
3769     return false;
3770   }
3771   if (!validateFlatOffset(Inst, Operands)) {
3772     return false;
3773   }
3774   if (!validateSMEMOffset(Inst, Operands)) {
3775     return false;
3776   }
3777   if (!validateMAIAccWrite(Inst)) {
3778     return false;
3779   }
3780 
3781   return true;
3782 }
3783 
3784 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3785                                             const FeatureBitset &FBS,
3786                                             unsigned VariantID = 0);
3787 
3788 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
3789                                 const FeatureBitset &AvailableFeatures,
3790                                 unsigned VariantID);
3791 
3792 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3793                                        const FeatureBitset &FBS) {
3794   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
3795 }
3796 
3797 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3798                                        const FeatureBitset &FBS,
3799                                        ArrayRef<unsigned> Variants) {
3800   for (auto Variant : Variants) {
3801     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
3802       return true;
3803   }
3804 
3805   return false;
3806 }
3807 
3808 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
3809                                                   const SMLoc &IDLoc) {
3810   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3811 
3812   // Check if requested instruction variant is supported.
3813   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
3814     return false;
3815 
3816   // This instruction is not supported.
3817   // Clear any other pending errors because they are no longer relevant.
3818   getParser().clearPendingErrors();
3819 
3820   // Requested instruction variant is not supported.
3821   // Check if any other variants are supported.
3822   StringRef VariantName = getMatchedVariantName();
3823   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
3824     return Error(IDLoc,
3825                  Twine(VariantName,
3826                        " variant of this instruction is not supported"));
3827   }
3828 
3829   // Finally check if this instruction is supported on any other GPU.
3830   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
3831     return Error(IDLoc, "instruction not supported on this GPU");
3832   }
3833 
3834   // Instruction not supported on any GPU. Probably a typo.
3835   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
3836   return Error(IDLoc, "invalid instruction" + Suggestion);
3837 }
3838 
3839 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3840                                               OperandVector &Operands,
3841                                               MCStreamer &Out,
3842                                               uint64_t &ErrorInfo,
3843                                               bool MatchingInlineAsm) {
3844   MCInst Inst;
3845   unsigned Result = Match_Success;
3846   for (auto Variant : getMatchedVariants()) {
3847     uint64_t EI;
3848     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3849                                   Variant);
3850     // We order match statuses from least to most specific. We use most specific
3851     // status as resulting
3852     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3853     if ((R == Match_Success) ||
3854         (R == Match_PreferE32) ||
3855         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3856         (R == Match_InvalidOperand && Result != Match_MissingFeature
3857                                    && Result != Match_PreferE32) ||
3858         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3859                                    && Result != Match_MissingFeature
3860                                    && Result != Match_PreferE32)) {
3861       Result = R;
3862       ErrorInfo = EI;
3863     }
3864     if (R == Match_Success)
3865       break;
3866   }
3867 
3868   if (Result == Match_Success) {
3869     if (!validateInstruction(Inst, IDLoc, Operands)) {
3870       return true;
3871     }
3872     Inst.setLoc(IDLoc);
3873     Out.emitInstruction(Inst, getSTI());
3874     return false;
3875   }
3876 
3877   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
3878   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
3879     return true;
3880   }
3881 
3882   switch (Result) {
3883   default: break;
3884   case Match_MissingFeature:
3885     // FIXME: this case should be analyzed and error message corrected.
3886     return Error(IDLoc, "instruction not supported on this GPU");
3887 
3888   case Match_InvalidOperand: {
3889     SMLoc ErrorLoc = IDLoc;
3890     if (ErrorInfo != ~0ULL) {
3891       if (ErrorInfo >= Operands.size()) {
3892         return Error(IDLoc, "too few operands for instruction");
3893       }
3894       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3895       if (ErrorLoc == SMLoc())
3896         ErrorLoc = IDLoc;
3897     }
3898     return Error(ErrorLoc, "invalid operand for instruction");
3899   }
3900 
3901   case Match_PreferE32:
3902     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3903                         "should be encoded as e32");
3904   case Match_MnemonicFail:
3905     llvm_unreachable("Invalid instructions should have been handled already");
3906   }
3907   llvm_unreachable("Implement any new match types added!");
3908 }
3909 
3910 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3911   int64_t Tmp = -1;
3912   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3913     return true;
3914   }
3915   if (getParser().parseAbsoluteExpression(Tmp)) {
3916     return true;
3917   }
3918   Ret = static_cast<uint32_t>(Tmp);
3919   return false;
3920 }
3921 
3922 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3923                                                uint32_t &Minor) {
3924   if (ParseAsAbsoluteExpression(Major))
3925     return TokError("invalid major version");
3926 
3927   if (getLexer().isNot(AsmToken::Comma))
3928     return TokError("minor version number required, comma expected");
3929   Lex();
3930 
3931   if (ParseAsAbsoluteExpression(Minor))
3932     return TokError("invalid minor version");
3933 
3934   return false;
3935 }
3936 
3937 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3938   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3939     return TokError("directive only supported for amdgcn architecture");
3940 
3941   std::string Target;
3942 
3943   SMLoc TargetStart = getTok().getLoc();
3944   if (getParser().parseEscapedString(Target))
3945     return true;
3946   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3947 
3948   std::string ExpectedTarget;
3949   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3950   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3951 
3952   if (Target != ExpectedTargetOS.str())
3953     return getParser().Error(TargetRange.Start, "target must match options",
3954                              TargetRange);
3955 
3956   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3957   return false;
3958 }
3959 
3960 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3961   return getParser().Error(Range.Start, "value out of range", Range);
3962 }
3963 
3964 bool AMDGPUAsmParser::calculateGPRBlocks(
3965     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3966     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3967     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3968     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3969   // TODO(scott.linder): These calculations are duplicated from
3970   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3971   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3972 
3973   unsigned NumVGPRs = NextFreeVGPR;
3974   unsigned NumSGPRs = NextFreeSGPR;
3975 
3976   if (Version.Major >= 10)
3977     NumSGPRs = 0;
3978   else {
3979     unsigned MaxAddressableNumSGPRs =
3980         IsaInfo::getAddressableNumSGPRs(&getSTI());
3981 
3982     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3983         NumSGPRs > MaxAddressableNumSGPRs)
3984       return OutOfRangeError(SGPRRange);
3985 
3986     NumSGPRs +=
3987         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3988 
3989     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3990         NumSGPRs > MaxAddressableNumSGPRs)
3991       return OutOfRangeError(SGPRRange);
3992 
3993     if (Features.test(FeatureSGPRInitBug))
3994       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3995   }
3996 
3997   VGPRBlocks =
3998       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3999   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4000 
4001   return false;
4002 }
4003 
4004 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4005   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4006     return TokError("directive only supported for amdgcn architecture");
4007 
4008   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4009     return TokError("directive only supported for amdhsa OS");
4010 
4011   StringRef KernelName;
4012   if (getParser().parseIdentifier(KernelName))
4013     return true;
4014 
4015   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4016 
4017   StringSet<> Seen;
4018 
4019   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4020 
4021   SMRange VGPRRange;
4022   uint64_t NextFreeVGPR = 0;
4023   SMRange SGPRRange;
4024   uint64_t NextFreeSGPR = 0;
4025   unsigned UserSGPRCount = 0;
4026   bool ReserveVCC = true;
4027   bool ReserveFlatScr = true;
4028   bool ReserveXNACK = hasXNACK();
4029   Optional<bool> EnableWavefrontSize32;
4030 
4031   while (true) {
4032     while (getLexer().is(AsmToken::EndOfStatement))
4033       Lex();
4034 
4035     if (getLexer().isNot(AsmToken::Identifier))
4036       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
4037 
4038     StringRef ID = getTok().getIdentifier();
4039     SMRange IDRange = getTok().getLocRange();
4040     Lex();
4041 
4042     if (ID == ".end_amdhsa_kernel")
4043       break;
4044 
4045     if (Seen.find(ID) != Seen.end())
4046       return TokError(".amdhsa_ directives cannot be repeated");
4047     Seen.insert(ID);
4048 
4049     SMLoc ValStart = getTok().getLoc();
4050     int64_t IVal;
4051     if (getParser().parseAbsoluteExpression(IVal))
4052       return true;
4053     SMLoc ValEnd = getTok().getLoc();
4054     SMRange ValRange = SMRange(ValStart, ValEnd);
4055 
4056     if (IVal < 0)
4057       return OutOfRangeError(ValRange);
4058 
4059     uint64_t Val = IVal;
4060 
4061 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4062   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4063     return OutOfRangeError(RANGE);                                             \
4064   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4065 
4066     if (ID == ".amdhsa_group_segment_fixed_size") {
4067       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4068         return OutOfRangeError(ValRange);
4069       KD.group_segment_fixed_size = Val;
4070     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4071       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4072         return OutOfRangeError(ValRange);
4073       KD.private_segment_fixed_size = Val;
4074     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4075       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4076                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4077                        Val, ValRange);
4078       if (Val)
4079         UserSGPRCount += 4;
4080     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4081       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4082                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4083                        ValRange);
4084       if (Val)
4085         UserSGPRCount += 2;
4086     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4087       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4088                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4089                        ValRange);
4090       if (Val)
4091         UserSGPRCount += 2;
4092     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4093       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4094                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4095                        Val, ValRange);
4096       if (Val)
4097         UserSGPRCount += 2;
4098     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4099       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4100                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4101                        ValRange);
4102       if (Val)
4103         UserSGPRCount += 2;
4104     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4105       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4106                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4107                        ValRange);
4108       if (Val)
4109         UserSGPRCount += 2;
4110     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4111       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4112                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4113                        Val, ValRange);
4114       if (Val)
4115         UserSGPRCount += 1;
4116     } else if (ID == ".amdhsa_wavefront_size32") {
4117       if (IVersion.Major < 10)
4118         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4119                                  IDRange);
4120       EnableWavefrontSize32 = Val;
4121       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4122                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4123                        Val, ValRange);
4124     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4125       PARSE_BITS_ENTRY(
4126           KD.compute_pgm_rsrc2,
4127           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
4128           ValRange);
4129     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4130       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4131                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4132                        ValRange);
4133     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4134       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4135                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4136                        ValRange);
4137     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4138       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4139                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4140                        ValRange);
4141     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4142       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4143                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4144                        ValRange);
4145     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4146       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4147                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4148                        ValRange);
4149     } else if (ID == ".amdhsa_next_free_vgpr") {
4150       VGPRRange = ValRange;
4151       NextFreeVGPR = Val;
4152     } else if (ID == ".amdhsa_next_free_sgpr") {
4153       SGPRRange = ValRange;
4154       NextFreeSGPR = Val;
4155     } else if (ID == ".amdhsa_reserve_vcc") {
4156       if (!isUInt<1>(Val))
4157         return OutOfRangeError(ValRange);
4158       ReserveVCC = Val;
4159     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4160       if (IVersion.Major < 7)
4161         return getParser().Error(IDRange.Start, "directive requires gfx7+",
4162                                  IDRange);
4163       if (!isUInt<1>(Val))
4164         return OutOfRangeError(ValRange);
4165       ReserveFlatScr = Val;
4166     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4167       if (IVersion.Major < 8)
4168         return getParser().Error(IDRange.Start, "directive requires gfx8+",
4169                                  IDRange);
4170       if (!isUInt<1>(Val))
4171         return OutOfRangeError(ValRange);
4172       ReserveXNACK = Val;
4173     } else if (ID == ".amdhsa_float_round_mode_32") {
4174       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4175                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4176     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4177       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4178                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4179     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4180       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4181                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4182     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4183       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4184                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4185                        ValRange);
4186     } else if (ID == ".amdhsa_dx10_clamp") {
4187       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4188                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4189     } else if (ID == ".amdhsa_ieee_mode") {
4190       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4191                        Val, ValRange);
4192     } else if (ID == ".amdhsa_fp16_overflow") {
4193       if (IVersion.Major < 9)
4194         return getParser().Error(IDRange.Start, "directive requires gfx9+",
4195                                  IDRange);
4196       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4197                        ValRange);
4198     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4199       if (IVersion.Major < 10)
4200         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4201                                  IDRange);
4202       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4203                        ValRange);
4204     } else if (ID == ".amdhsa_memory_ordered") {
4205       if (IVersion.Major < 10)
4206         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4207                                  IDRange);
4208       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4209                        ValRange);
4210     } else if (ID == ".amdhsa_forward_progress") {
4211       if (IVersion.Major < 10)
4212         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4213                                  IDRange);
4214       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4215                        ValRange);
4216     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4217       PARSE_BITS_ENTRY(
4218           KD.compute_pgm_rsrc2,
4219           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4220           ValRange);
4221     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4222       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4223                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4224                        Val, ValRange);
4225     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4226       PARSE_BITS_ENTRY(
4227           KD.compute_pgm_rsrc2,
4228           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4229           ValRange);
4230     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4231       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4232                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4233                        Val, ValRange);
4234     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4235       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4236                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4237                        Val, ValRange);
4238     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4239       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4240                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4241                        Val, ValRange);
4242     } else if (ID == ".amdhsa_exception_int_div_zero") {
4243       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4244                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4245                        Val, ValRange);
4246     } else {
4247       return getParser().Error(IDRange.Start,
4248                                "unknown .amdhsa_kernel directive", IDRange);
4249     }
4250 
4251 #undef PARSE_BITS_ENTRY
4252   }
4253 
4254   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4255     return TokError(".amdhsa_next_free_vgpr directive is required");
4256 
4257   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4258     return TokError(".amdhsa_next_free_sgpr directive is required");
4259 
4260   unsigned VGPRBlocks;
4261   unsigned SGPRBlocks;
4262   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4263                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4264                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4265                          SGPRBlocks))
4266     return true;
4267 
4268   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4269           VGPRBlocks))
4270     return OutOfRangeError(VGPRRange);
4271   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4272                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4273 
4274   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4275           SGPRBlocks))
4276     return OutOfRangeError(SGPRRange);
4277   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4278                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4279                   SGPRBlocks);
4280 
4281   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4282     return TokError("too many user SGPRs enabled");
4283   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4284                   UserSGPRCount);
4285 
4286   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4287       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4288       ReserveFlatScr, ReserveXNACK);
4289   return false;
4290 }
4291 
4292 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4293   uint32_t Major;
4294   uint32_t Minor;
4295 
4296   if (ParseDirectiveMajorMinor(Major, Minor))
4297     return true;
4298 
4299   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4300   return false;
4301 }
4302 
4303 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4304   uint32_t Major;
4305   uint32_t Minor;
4306   uint32_t Stepping;
4307   StringRef VendorName;
4308   StringRef ArchName;
4309 
4310   // If this directive has no arguments, then use the ISA version for the
4311   // targeted GPU.
4312   if (getLexer().is(AsmToken::EndOfStatement)) {
4313     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4314     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4315                                                       ISA.Stepping,
4316                                                       "AMD", "AMDGPU");
4317     return false;
4318   }
4319 
4320   if (ParseDirectiveMajorMinor(Major, Minor))
4321     return true;
4322 
4323   if (getLexer().isNot(AsmToken::Comma))
4324     return TokError("stepping version number required, comma expected");
4325   Lex();
4326 
4327   if (ParseAsAbsoluteExpression(Stepping))
4328     return TokError("invalid stepping version");
4329 
4330   if (getLexer().isNot(AsmToken::Comma))
4331     return TokError("vendor name required, comma expected");
4332   Lex();
4333 
4334   if (getLexer().isNot(AsmToken::String))
4335     return TokError("invalid vendor name");
4336 
4337   VendorName = getLexer().getTok().getStringContents();
4338   Lex();
4339 
4340   if (getLexer().isNot(AsmToken::Comma))
4341     return TokError("arch name required, comma expected");
4342   Lex();
4343 
4344   if (getLexer().isNot(AsmToken::String))
4345     return TokError("invalid arch name");
4346 
4347   ArchName = getLexer().getTok().getStringContents();
4348   Lex();
4349 
4350   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4351                                                     VendorName, ArchName);
4352   return false;
4353 }
4354 
4355 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4356                                                amd_kernel_code_t &Header) {
4357   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4358   // assembly for backwards compatibility.
4359   if (ID == "max_scratch_backing_memory_byte_size") {
4360     Parser.eatToEndOfStatement();
4361     return false;
4362   }
4363 
4364   SmallString<40> ErrStr;
4365   raw_svector_ostream Err(ErrStr);
4366   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4367     return TokError(Err.str());
4368   }
4369   Lex();
4370 
4371   if (ID == "enable_wavefront_size32") {
4372     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4373       if (!isGFX10())
4374         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4375       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4376         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4377     } else {
4378       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4379         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4380     }
4381   }
4382 
4383   if (ID == "wavefront_size") {
4384     if (Header.wavefront_size == 5) {
4385       if (!isGFX10())
4386         return TokError("wavefront_size=5 is only allowed on GFX10+");
4387       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4388         return TokError("wavefront_size=5 requires +WavefrontSize32");
4389     } else if (Header.wavefront_size == 6) {
4390       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4391         return TokError("wavefront_size=6 requires +WavefrontSize64");
4392     }
4393   }
4394 
4395   if (ID == "enable_wgp_mode") {
4396     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4397       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4398   }
4399 
4400   if (ID == "enable_mem_ordered") {
4401     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4402       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4403   }
4404 
4405   if (ID == "enable_fwd_progress") {
4406     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4407       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4408   }
4409 
4410   return false;
4411 }
4412 
4413 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4414   amd_kernel_code_t Header;
4415   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4416 
4417   while (true) {
4418     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4419     // will set the current token to EndOfStatement.
4420     while(getLexer().is(AsmToken::EndOfStatement))
4421       Lex();
4422 
4423     if (getLexer().isNot(AsmToken::Identifier))
4424       return TokError("expected value identifier or .end_amd_kernel_code_t");
4425 
4426     StringRef ID = getLexer().getTok().getIdentifier();
4427     Lex();
4428 
4429     if (ID == ".end_amd_kernel_code_t")
4430       break;
4431 
4432     if (ParseAMDKernelCodeTValue(ID, Header))
4433       return true;
4434   }
4435 
4436   getTargetStreamer().EmitAMDKernelCodeT(Header);
4437 
4438   return false;
4439 }
4440 
4441 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4442   if (getLexer().isNot(AsmToken::Identifier))
4443     return TokError("expected symbol name");
4444 
4445   StringRef KernelName = Parser.getTok().getString();
4446 
4447   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4448                                            ELF::STT_AMDGPU_HSA_KERNEL);
4449   Lex();
4450   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4451     KernelScope.initialize(getContext());
4452   return false;
4453 }
4454 
4455 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4456   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4457     return Error(getParser().getTok().getLoc(),
4458                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4459                  "architectures");
4460   }
4461 
4462   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4463 
4464   std::string ISAVersionStringFromSTI;
4465   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4466   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4467 
4468   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4469     return Error(getParser().getTok().getLoc(),
4470                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4471                  "arguments specified through the command line");
4472   }
4473 
4474   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4475   Lex();
4476 
4477   return false;
4478 }
4479 
4480 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4481   const char *AssemblerDirectiveBegin;
4482   const char *AssemblerDirectiveEnd;
4483   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4484       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4485           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4486                             HSAMD::V3::AssemblerDirectiveEnd)
4487           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4488                             HSAMD::AssemblerDirectiveEnd);
4489 
4490   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4491     return Error(getParser().getTok().getLoc(),
4492                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4493                  "not available on non-amdhsa OSes")).str());
4494   }
4495 
4496   std::string HSAMetadataString;
4497   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4498                           HSAMetadataString))
4499     return true;
4500 
4501   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4502     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4503       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4504   } else {
4505     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4506       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4507   }
4508 
4509   return false;
4510 }
4511 
4512 /// Common code to parse out a block of text (typically YAML) between start and
4513 /// end directives.
4514 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4515                                           const char *AssemblerDirectiveEnd,
4516                                           std::string &CollectString) {
4517 
4518   raw_string_ostream CollectStream(CollectString);
4519 
4520   getLexer().setSkipSpace(false);
4521 
4522   bool FoundEnd = false;
4523   while (!getLexer().is(AsmToken::Eof)) {
4524     while (getLexer().is(AsmToken::Space)) {
4525       CollectStream << getLexer().getTok().getString();
4526       Lex();
4527     }
4528 
4529     if (getLexer().is(AsmToken::Identifier)) {
4530       StringRef ID = getLexer().getTok().getIdentifier();
4531       if (ID == AssemblerDirectiveEnd) {
4532         Lex();
4533         FoundEnd = true;
4534         break;
4535       }
4536     }
4537 
4538     CollectStream << Parser.parseStringToEndOfStatement()
4539                   << getContext().getAsmInfo()->getSeparatorString();
4540 
4541     Parser.eatToEndOfStatement();
4542   }
4543 
4544   getLexer().setSkipSpace(true);
4545 
4546   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4547     return TokError(Twine("expected directive ") +
4548                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4549   }
4550 
4551   CollectStream.flush();
4552   return false;
4553 }
4554 
4555 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4556 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4557   std::string String;
4558   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4559                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4560     return true;
4561 
4562   auto PALMetadata = getTargetStreamer().getPALMetadata();
4563   if (!PALMetadata->setFromString(String))
4564     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4565   return false;
4566 }
4567 
4568 /// Parse the assembler directive for old linear-format PAL metadata.
4569 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4570   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4571     return Error(getParser().getTok().getLoc(),
4572                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4573                  "not available on non-amdpal OSes")).str());
4574   }
4575 
4576   auto PALMetadata = getTargetStreamer().getPALMetadata();
4577   PALMetadata->setLegacy();
4578   for (;;) {
4579     uint32_t Key, Value;
4580     if (ParseAsAbsoluteExpression(Key)) {
4581       return TokError(Twine("invalid value in ") +
4582                       Twine(PALMD::AssemblerDirective));
4583     }
4584     if (getLexer().isNot(AsmToken::Comma)) {
4585       return TokError(Twine("expected an even number of values in ") +
4586                       Twine(PALMD::AssemblerDirective));
4587     }
4588     Lex();
4589     if (ParseAsAbsoluteExpression(Value)) {
4590       return TokError(Twine("invalid value in ") +
4591                       Twine(PALMD::AssemblerDirective));
4592     }
4593     PALMetadata->setRegister(Key, Value);
4594     if (getLexer().isNot(AsmToken::Comma))
4595       break;
4596     Lex();
4597   }
4598   return false;
4599 }
4600 
4601 /// ParseDirectiveAMDGPULDS
4602 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4603 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4604   if (getParser().checkForValidSection())
4605     return true;
4606 
4607   StringRef Name;
4608   SMLoc NameLoc = getLexer().getLoc();
4609   if (getParser().parseIdentifier(Name))
4610     return TokError("expected identifier in directive");
4611 
4612   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4613   if (parseToken(AsmToken::Comma, "expected ','"))
4614     return true;
4615 
4616   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4617 
4618   int64_t Size;
4619   SMLoc SizeLoc = getLexer().getLoc();
4620   if (getParser().parseAbsoluteExpression(Size))
4621     return true;
4622   if (Size < 0)
4623     return Error(SizeLoc, "size must be non-negative");
4624   if (Size > LocalMemorySize)
4625     return Error(SizeLoc, "size is too large");
4626 
4627   int64_t Alignment = 4;
4628   if (getLexer().is(AsmToken::Comma)) {
4629     Lex();
4630     SMLoc AlignLoc = getLexer().getLoc();
4631     if (getParser().parseAbsoluteExpression(Alignment))
4632       return true;
4633     if (Alignment < 0 || !isPowerOf2_64(Alignment))
4634       return Error(AlignLoc, "alignment must be a power of two");
4635 
4636     // Alignment larger than the size of LDS is possible in theory, as long
4637     // as the linker manages to place to symbol at address 0, but we do want
4638     // to make sure the alignment fits nicely into a 32-bit integer.
4639     if (Alignment >= 1u << 31)
4640       return Error(AlignLoc, "alignment is too large");
4641   }
4642 
4643   if (parseToken(AsmToken::EndOfStatement,
4644                  "unexpected token in '.amdgpu_lds' directive"))
4645     return true;
4646 
4647   Symbol->redefineIfPossible();
4648   if (!Symbol->isUndefined())
4649     return Error(NameLoc, "invalid symbol redefinition");
4650 
4651   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
4652   return false;
4653 }
4654 
4655 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4656   StringRef IDVal = DirectiveID.getString();
4657 
4658   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4659     if (IDVal == ".amdgcn_target")
4660       return ParseDirectiveAMDGCNTarget();
4661 
4662     if (IDVal == ".amdhsa_kernel")
4663       return ParseDirectiveAMDHSAKernel();
4664 
4665     // TODO: Restructure/combine with PAL metadata directive.
4666     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4667       return ParseDirectiveHSAMetadata();
4668   } else {
4669     if (IDVal == ".hsa_code_object_version")
4670       return ParseDirectiveHSACodeObjectVersion();
4671 
4672     if (IDVal == ".hsa_code_object_isa")
4673       return ParseDirectiveHSACodeObjectISA();
4674 
4675     if (IDVal == ".amd_kernel_code_t")
4676       return ParseDirectiveAMDKernelCodeT();
4677 
4678     if (IDVal == ".amdgpu_hsa_kernel")
4679       return ParseDirectiveAMDGPUHsaKernel();
4680 
4681     if (IDVal == ".amd_amdgpu_isa")
4682       return ParseDirectiveISAVersion();
4683 
4684     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4685       return ParseDirectiveHSAMetadata();
4686   }
4687 
4688   if (IDVal == ".amdgpu_lds")
4689     return ParseDirectiveAMDGPULDS();
4690 
4691   if (IDVal == PALMD::AssemblerDirectiveBegin)
4692     return ParseDirectivePALMetadataBegin();
4693 
4694   if (IDVal == PALMD::AssemblerDirective)
4695     return ParseDirectivePALMetadata();
4696 
4697   return true;
4698 }
4699 
4700 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4701                                            unsigned RegNo) const {
4702 
4703   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4704        R.isValid(); ++R) {
4705     if (*R == RegNo)
4706       return isGFX9Plus();
4707   }
4708 
4709   // GFX10 has 2 more SGPRs 104 and 105.
4710   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4711        R.isValid(); ++R) {
4712     if (*R == RegNo)
4713       return hasSGPR104_SGPR105();
4714   }
4715 
4716   switch (RegNo) {
4717   case AMDGPU::SRC_SHARED_BASE:
4718   case AMDGPU::SRC_SHARED_LIMIT:
4719   case AMDGPU::SRC_PRIVATE_BASE:
4720   case AMDGPU::SRC_PRIVATE_LIMIT:
4721   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4722     return !isCI() && !isSI() && !isVI();
4723   case AMDGPU::TBA:
4724   case AMDGPU::TBA_LO:
4725   case AMDGPU::TBA_HI:
4726   case AMDGPU::TMA:
4727   case AMDGPU::TMA_LO:
4728   case AMDGPU::TMA_HI:
4729     return !isGFX9() && !isGFX10();
4730   case AMDGPU::XNACK_MASK:
4731   case AMDGPU::XNACK_MASK_LO:
4732   case AMDGPU::XNACK_MASK_HI:
4733     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4734   case AMDGPU::SGPR_NULL:
4735     return isGFX10();
4736   default:
4737     break;
4738   }
4739 
4740   if (isCI())
4741     return true;
4742 
4743   if (isSI() || isGFX10()) {
4744     // No flat_scr on SI.
4745     // On GFX10 flat scratch is not a valid register operand and can only be
4746     // accessed with s_setreg/s_getreg.
4747     switch (RegNo) {
4748     case AMDGPU::FLAT_SCR:
4749     case AMDGPU::FLAT_SCR_LO:
4750     case AMDGPU::FLAT_SCR_HI:
4751       return false;
4752     default:
4753       return true;
4754     }
4755   }
4756 
4757   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4758   // SI/CI have.
4759   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4760        R.isValid(); ++R) {
4761     if (*R == RegNo)
4762       return hasSGPR102_SGPR103();
4763   }
4764 
4765   return true;
4766 }
4767 
4768 OperandMatchResultTy
4769 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4770                               OperandMode Mode) {
4771   // Try to parse with a custom parser
4772   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4773 
4774   // If we successfully parsed the operand or if there as an error parsing,
4775   // we are done.
4776   //
4777   // If we are parsing after we reach EndOfStatement then this means we
4778   // are appending default values to the Operands list.  This is only done
4779   // by custom parser, so we shouldn't continue on to the generic parsing.
4780   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4781       getLexer().is(AsmToken::EndOfStatement))
4782     return ResTy;
4783 
4784   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4785     unsigned Prefix = Operands.size();
4786     SMLoc LBraceLoc = getTok().getLoc();
4787     Parser.Lex(); // eat the '['
4788 
4789     for (;;) {
4790       ResTy = parseReg(Operands);
4791       if (ResTy != MatchOperand_Success)
4792         return ResTy;
4793 
4794       if (getLexer().is(AsmToken::RBrac))
4795         break;
4796 
4797       if (getLexer().isNot(AsmToken::Comma))
4798         return MatchOperand_ParseFail;
4799       Parser.Lex();
4800     }
4801 
4802     if (Operands.size() - Prefix > 1) {
4803       Operands.insert(Operands.begin() + Prefix,
4804                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4805       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4806                                                     getTok().getLoc()));
4807     }
4808 
4809     Parser.Lex(); // eat the ']'
4810     return MatchOperand_Success;
4811   }
4812 
4813   return parseRegOrImm(Operands);
4814 }
4815 
4816 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4817   // Clear any forced encodings from the previous instruction.
4818   setForcedEncodingSize(0);
4819   setForcedDPP(false);
4820   setForcedSDWA(false);
4821 
4822   if (Name.endswith("_e64")) {
4823     setForcedEncodingSize(64);
4824     return Name.substr(0, Name.size() - 4);
4825   } else if (Name.endswith("_e32")) {
4826     setForcedEncodingSize(32);
4827     return Name.substr(0, Name.size() - 4);
4828   } else if (Name.endswith("_dpp")) {
4829     setForcedDPP(true);
4830     return Name.substr(0, Name.size() - 4);
4831   } else if (Name.endswith("_sdwa")) {
4832     setForcedSDWA(true);
4833     return Name.substr(0, Name.size() - 5);
4834   }
4835   return Name;
4836 }
4837 
4838 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4839                                        StringRef Name,
4840                                        SMLoc NameLoc, OperandVector &Operands) {
4841   // Add the instruction mnemonic
4842   Name = parseMnemonicSuffix(Name);
4843   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4844 
4845   bool IsMIMG = Name.startswith("image_");
4846 
4847   while (!getLexer().is(AsmToken::EndOfStatement)) {
4848     OperandMode Mode = OperandMode_Default;
4849     if (IsMIMG && isGFX10() && Operands.size() == 2)
4850       Mode = OperandMode_NSA;
4851     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4852 
4853     // Eat the comma or space if there is one.
4854     if (getLexer().is(AsmToken::Comma))
4855       Parser.Lex();
4856 
4857     if (Res != MatchOperand_Success) {
4858       checkUnsupportedInstruction(Name, NameLoc);
4859       if (!Parser.hasPendingError()) {
4860         // FIXME: use real operand location rather than the current location.
4861         StringRef Msg =
4862           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
4863                                             "not a valid operand.";
4864         Error(getLexer().getLoc(), Msg);
4865       }
4866       while (!getLexer().is(AsmToken::EndOfStatement)) {
4867         Parser.Lex();
4868       }
4869       return true;
4870     }
4871   }
4872 
4873   return false;
4874 }
4875 
4876 //===----------------------------------------------------------------------===//
4877 // Utility functions
4878 //===----------------------------------------------------------------------===//
4879 
4880 OperandMatchResultTy
4881 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4882 
4883   if (!trySkipId(Prefix, AsmToken::Colon))
4884     return MatchOperand_NoMatch;
4885 
4886   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4887 }
4888 
4889 OperandMatchResultTy
4890 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4891                                     AMDGPUOperand::ImmTy ImmTy,
4892                                     bool (*ConvertResult)(int64_t&)) {
4893   SMLoc S = getLoc();
4894   int64_t Value = 0;
4895 
4896   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4897   if (Res != MatchOperand_Success)
4898     return Res;
4899 
4900   if (ConvertResult && !ConvertResult(Value)) {
4901     Error(S, "invalid " + StringRef(Prefix) + " value.");
4902   }
4903 
4904   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4905   return MatchOperand_Success;
4906 }
4907 
4908 OperandMatchResultTy
4909 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4910                                              OperandVector &Operands,
4911                                              AMDGPUOperand::ImmTy ImmTy,
4912                                              bool (*ConvertResult)(int64_t&)) {
4913   SMLoc S = getLoc();
4914   if (!trySkipId(Prefix, AsmToken::Colon))
4915     return MatchOperand_NoMatch;
4916 
4917   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4918     return MatchOperand_ParseFail;
4919 
4920   unsigned Val = 0;
4921   const unsigned MaxSize = 4;
4922 
4923   // FIXME: How to verify the number of elements matches the number of src
4924   // operands?
4925   for (int I = 0; ; ++I) {
4926     int64_t Op;
4927     SMLoc Loc = getLoc();
4928     if (!parseExpr(Op))
4929       return MatchOperand_ParseFail;
4930 
4931     if (Op != 0 && Op != 1) {
4932       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4933       return MatchOperand_ParseFail;
4934     }
4935 
4936     Val |= (Op << I);
4937 
4938     if (trySkipToken(AsmToken::RBrac))
4939       break;
4940 
4941     if (I + 1 == MaxSize) {
4942       Error(getLoc(), "expected a closing square bracket");
4943       return MatchOperand_ParseFail;
4944     }
4945 
4946     if (!skipToken(AsmToken::Comma, "expected a comma"))
4947       return MatchOperand_ParseFail;
4948   }
4949 
4950   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4951   return MatchOperand_Success;
4952 }
4953 
4954 OperandMatchResultTy
4955 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4956                                AMDGPUOperand::ImmTy ImmTy) {
4957   int64_t Bit = 0;
4958   SMLoc S = Parser.getTok().getLoc();
4959 
4960   // We are at the end of the statement, and this is a default argument, so
4961   // use a default value.
4962   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4963     switch(getLexer().getKind()) {
4964       case AsmToken::Identifier: {
4965         StringRef Tok = Parser.getTok().getString();
4966         if (Tok == Name) {
4967           if (Tok == "r128" && !hasMIMG_R128())
4968             Error(S, "r128 modifier is not supported on this GPU");
4969           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
4970             Error(S, "a16 modifier is not supported on this GPU");
4971           Bit = 1;
4972           Parser.Lex();
4973         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4974           Bit = 0;
4975           Parser.Lex();
4976         } else {
4977           return MatchOperand_NoMatch;
4978         }
4979         break;
4980       }
4981       default:
4982         return MatchOperand_NoMatch;
4983     }
4984   }
4985 
4986   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4987     return MatchOperand_ParseFail;
4988 
4989   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
4990     ImmTy = AMDGPUOperand::ImmTyR128A16;
4991 
4992   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4993   return MatchOperand_Success;
4994 }
4995 
4996 static void addOptionalImmOperand(
4997   MCInst& Inst, const OperandVector& Operands,
4998   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4999   AMDGPUOperand::ImmTy ImmT,
5000   int64_t Default = 0) {
5001   auto i = OptionalIdx.find(ImmT);
5002   if (i != OptionalIdx.end()) {
5003     unsigned Idx = i->second;
5004     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5005   } else {
5006     Inst.addOperand(MCOperand::createImm(Default));
5007   }
5008 }
5009 
5010 OperandMatchResultTy
5011 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
5012   if (getLexer().isNot(AsmToken::Identifier)) {
5013     return MatchOperand_NoMatch;
5014   }
5015   StringRef Tok = Parser.getTok().getString();
5016   if (Tok != Prefix) {
5017     return MatchOperand_NoMatch;
5018   }
5019 
5020   Parser.Lex();
5021   if (getLexer().isNot(AsmToken::Colon)) {
5022     return MatchOperand_ParseFail;
5023   }
5024 
5025   Parser.Lex();
5026   if (getLexer().isNot(AsmToken::Identifier)) {
5027     return MatchOperand_ParseFail;
5028   }
5029 
5030   Value = Parser.getTok().getString();
5031   return MatchOperand_Success;
5032 }
5033 
5034 //===----------------------------------------------------------------------===//
5035 // MTBUF format
5036 //===----------------------------------------------------------------------===//
5037 
5038 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5039                                   int64_t MaxVal,
5040                                   int64_t &Fmt) {
5041   int64_t Val;
5042   SMLoc Loc = getLoc();
5043 
5044   auto Res = parseIntWithPrefix(Pref, Val);
5045   if (Res == MatchOperand_ParseFail)
5046     return false;
5047   if (Res == MatchOperand_NoMatch)
5048     return true;
5049 
5050   if (Val < 0 || Val > MaxVal) {
5051     Error(Loc, Twine("out of range ", StringRef(Pref)));
5052     return false;
5053   }
5054 
5055   Fmt = Val;
5056   return true;
5057 }
5058 
5059 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5060 // values to live in a joint format operand in the MCInst encoding.
5061 OperandMatchResultTy
5062 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5063   using namespace llvm::AMDGPU::MTBUFFormat;
5064 
5065   int64_t Dfmt = DFMT_UNDEF;
5066   int64_t Nfmt = NFMT_UNDEF;
5067 
5068   // dfmt and nfmt can appear in either order, and each is optional.
5069   for (int I = 0; I < 2; ++I) {
5070     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5071       return MatchOperand_ParseFail;
5072 
5073     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5074       return MatchOperand_ParseFail;
5075     }
5076     // Skip optional comma between dfmt/nfmt
5077     // but guard against 2 commas following each other.
5078     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5079         !peekToken().is(AsmToken::Comma)) {
5080       trySkipToken(AsmToken::Comma);
5081     }
5082   }
5083 
5084   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5085     return MatchOperand_NoMatch;
5086 
5087   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5088   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5089 
5090   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5091   return MatchOperand_Success;
5092 }
5093 
5094 OperandMatchResultTy
5095 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5096   using namespace llvm::AMDGPU::MTBUFFormat;
5097 
5098   int64_t Fmt = UFMT_UNDEF;
5099 
5100   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5101     return MatchOperand_ParseFail;
5102 
5103   if (Fmt == UFMT_UNDEF)
5104     return MatchOperand_NoMatch;
5105 
5106   Format = Fmt;
5107   return MatchOperand_Success;
5108 }
5109 
5110 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5111                                     int64_t &Nfmt,
5112                                     StringRef FormatStr,
5113                                     SMLoc Loc) {
5114   using namespace llvm::AMDGPU::MTBUFFormat;
5115   int64_t Format;
5116 
5117   Format = getDfmt(FormatStr);
5118   if (Format != DFMT_UNDEF) {
5119     Dfmt = Format;
5120     return true;
5121   }
5122 
5123   Format = getNfmt(FormatStr, getSTI());
5124   if (Format != NFMT_UNDEF) {
5125     Nfmt = Format;
5126     return true;
5127   }
5128 
5129   Error(Loc, "unsupported format");
5130   return false;
5131 }
5132 
5133 OperandMatchResultTy
5134 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5135                                           SMLoc FormatLoc,
5136                                           int64_t &Format) {
5137   using namespace llvm::AMDGPU::MTBUFFormat;
5138 
5139   int64_t Dfmt = DFMT_UNDEF;
5140   int64_t Nfmt = NFMT_UNDEF;
5141   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5142     return MatchOperand_ParseFail;
5143 
5144   if (trySkipToken(AsmToken::Comma)) {
5145     StringRef Str;
5146     SMLoc Loc = getLoc();
5147     if (!parseId(Str, "expected a format string") ||
5148         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5149       return MatchOperand_ParseFail;
5150     }
5151     if (Dfmt == DFMT_UNDEF) {
5152       Error(Loc, "duplicate numeric format");
5153       return MatchOperand_ParseFail;
5154     } else if (Nfmt == NFMT_UNDEF) {
5155       Error(Loc, "duplicate data format");
5156       return MatchOperand_ParseFail;
5157     }
5158   }
5159 
5160   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5161   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5162 
5163   if (isGFX10()) {
5164     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5165     if (Ufmt == UFMT_UNDEF) {
5166       Error(FormatLoc, "unsupported format");
5167       return MatchOperand_ParseFail;
5168     }
5169     Format = Ufmt;
5170   } else {
5171     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5172   }
5173 
5174   return MatchOperand_Success;
5175 }
5176 
5177 OperandMatchResultTy
5178 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5179                                             SMLoc Loc,
5180                                             int64_t &Format) {
5181   using namespace llvm::AMDGPU::MTBUFFormat;
5182 
5183   auto Id = getUnifiedFormat(FormatStr);
5184   if (Id == UFMT_UNDEF)
5185     return MatchOperand_NoMatch;
5186 
5187   if (!isGFX10()) {
5188     Error(Loc, "unified format is not supported on this GPU");
5189     return MatchOperand_ParseFail;
5190   }
5191 
5192   Format = Id;
5193   return MatchOperand_Success;
5194 }
5195 
5196 OperandMatchResultTy
5197 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5198   using namespace llvm::AMDGPU::MTBUFFormat;
5199   SMLoc Loc = getLoc();
5200 
5201   if (!parseExpr(Format))
5202     return MatchOperand_ParseFail;
5203   if (!isValidFormatEncoding(Format, getSTI())) {
5204     Error(Loc, "out of range format");
5205     return MatchOperand_ParseFail;
5206   }
5207 
5208   return MatchOperand_Success;
5209 }
5210 
5211 OperandMatchResultTy
5212 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5213   using namespace llvm::AMDGPU::MTBUFFormat;
5214 
5215   if (!trySkipId("format", AsmToken::Colon))
5216     return MatchOperand_NoMatch;
5217 
5218   if (trySkipToken(AsmToken::LBrac)) {
5219     StringRef FormatStr;
5220     SMLoc Loc = getLoc();
5221     if (!parseId(FormatStr, "expected a format string"))
5222       return MatchOperand_ParseFail;
5223 
5224     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5225     if (Res == MatchOperand_NoMatch)
5226       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5227     if (Res != MatchOperand_Success)
5228       return Res;
5229 
5230     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5231       return MatchOperand_ParseFail;
5232 
5233     return MatchOperand_Success;
5234   }
5235 
5236   return parseNumericFormat(Format);
5237 }
5238 
5239 OperandMatchResultTy
5240 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5241   using namespace llvm::AMDGPU::MTBUFFormat;
5242 
5243   int64_t Format = getDefaultFormatEncoding(getSTI());
5244   OperandMatchResultTy Res;
5245   SMLoc Loc = getLoc();
5246 
5247   // Parse legacy format syntax.
5248   Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5249   if (Res == MatchOperand_ParseFail)
5250     return Res;
5251 
5252   bool FormatFound = (Res == MatchOperand_Success);
5253 
5254   Operands.push_back(
5255     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5256 
5257   if (FormatFound)
5258     trySkipToken(AsmToken::Comma);
5259 
5260   if (isToken(AsmToken::EndOfStatement)) {
5261     // We are expecting an soffset operand,
5262     // but let matcher handle the error.
5263     return MatchOperand_Success;
5264   }
5265 
5266   // Parse soffset.
5267   Res = parseRegOrImm(Operands);
5268   if (Res != MatchOperand_Success)
5269     return Res;
5270 
5271   trySkipToken(AsmToken::Comma);
5272 
5273   if (!FormatFound) {
5274     Res = parseSymbolicOrNumericFormat(Format);
5275     if (Res == MatchOperand_ParseFail)
5276       return Res;
5277     if (Res == MatchOperand_Success) {
5278       auto Size = Operands.size();
5279       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5280       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5281       Op.setImm(Format);
5282     }
5283     return MatchOperand_Success;
5284   }
5285 
5286   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5287     Error(getLoc(), "duplicate format");
5288     return MatchOperand_ParseFail;
5289   }
5290   return MatchOperand_Success;
5291 }
5292 
5293 //===----------------------------------------------------------------------===//
5294 // ds
5295 //===----------------------------------------------------------------------===//
5296 
5297 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5298                                     const OperandVector &Operands) {
5299   OptionalImmIndexMap OptionalIdx;
5300 
5301   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5302     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5303 
5304     // Add the register arguments
5305     if (Op.isReg()) {
5306       Op.addRegOperands(Inst, 1);
5307       continue;
5308     }
5309 
5310     // Handle optional arguments
5311     OptionalIdx[Op.getImmTy()] = i;
5312   }
5313 
5314   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5315   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5316   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5317 
5318   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5319 }
5320 
5321 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5322                                 bool IsGdsHardcoded) {
5323   OptionalImmIndexMap OptionalIdx;
5324 
5325   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5326     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5327 
5328     // Add the register arguments
5329     if (Op.isReg()) {
5330       Op.addRegOperands(Inst, 1);
5331       continue;
5332     }
5333 
5334     if (Op.isToken() && Op.getToken() == "gds") {
5335       IsGdsHardcoded = true;
5336       continue;
5337     }
5338 
5339     // Handle optional arguments
5340     OptionalIdx[Op.getImmTy()] = i;
5341   }
5342 
5343   AMDGPUOperand::ImmTy OffsetType =
5344     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5345      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5346      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5347                                                       AMDGPUOperand::ImmTyOffset;
5348 
5349   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5350 
5351   if (!IsGdsHardcoded) {
5352     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5353   }
5354   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5355 }
5356 
5357 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5358   OptionalImmIndexMap OptionalIdx;
5359 
5360   unsigned OperandIdx[4];
5361   unsigned EnMask = 0;
5362   int SrcIdx = 0;
5363 
5364   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5365     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5366 
5367     // Add the register arguments
5368     if (Op.isReg()) {
5369       assert(SrcIdx < 4);
5370       OperandIdx[SrcIdx] = Inst.size();
5371       Op.addRegOperands(Inst, 1);
5372       ++SrcIdx;
5373       continue;
5374     }
5375 
5376     if (Op.isOff()) {
5377       assert(SrcIdx < 4);
5378       OperandIdx[SrcIdx] = Inst.size();
5379       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5380       ++SrcIdx;
5381       continue;
5382     }
5383 
5384     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5385       Op.addImmOperands(Inst, 1);
5386       continue;
5387     }
5388 
5389     if (Op.isToken() && Op.getToken() == "done")
5390       continue;
5391 
5392     // Handle optional arguments
5393     OptionalIdx[Op.getImmTy()] = i;
5394   }
5395 
5396   assert(SrcIdx == 4);
5397 
5398   bool Compr = false;
5399   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5400     Compr = true;
5401     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5402     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5403     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5404   }
5405 
5406   for (auto i = 0; i < SrcIdx; ++i) {
5407     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5408       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5409     }
5410   }
5411 
5412   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5413   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5414 
5415   Inst.addOperand(MCOperand::createImm(EnMask));
5416 }
5417 
5418 //===----------------------------------------------------------------------===//
5419 // s_waitcnt
5420 //===----------------------------------------------------------------------===//
5421 
5422 static bool
5423 encodeCnt(
5424   const AMDGPU::IsaVersion ISA,
5425   int64_t &IntVal,
5426   int64_t CntVal,
5427   bool Saturate,
5428   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5429   unsigned (*decode)(const IsaVersion &Version, unsigned))
5430 {
5431   bool Failed = false;
5432 
5433   IntVal = encode(ISA, IntVal, CntVal);
5434   if (CntVal != decode(ISA, IntVal)) {
5435     if (Saturate) {
5436       IntVal = encode(ISA, IntVal, -1);
5437     } else {
5438       Failed = true;
5439     }
5440   }
5441   return Failed;
5442 }
5443 
5444 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5445 
5446   SMLoc CntLoc = getLoc();
5447   StringRef CntName = getTokenStr();
5448 
5449   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5450       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5451     return false;
5452 
5453   int64_t CntVal;
5454   SMLoc ValLoc = getLoc();
5455   if (!parseExpr(CntVal))
5456     return false;
5457 
5458   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5459 
5460   bool Failed = true;
5461   bool Sat = CntName.endswith("_sat");
5462 
5463   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5464     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5465   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5466     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5467   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5468     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5469   } else {
5470     Error(CntLoc, "invalid counter name " + CntName);
5471     return false;
5472   }
5473 
5474   if (Failed) {
5475     Error(ValLoc, "too large value for " + CntName);
5476     return false;
5477   }
5478 
5479   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5480     return false;
5481 
5482   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5483     if (isToken(AsmToken::EndOfStatement)) {
5484       Error(getLoc(), "expected a counter name");
5485       return false;
5486     }
5487   }
5488 
5489   return true;
5490 }
5491 
5492 OperandMatchResultTy
5493 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5494   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5495   int64_t Waitcnt = getWaitcntBitMask(ISA);
5496   SMLoc S = getLoc();
5497 
5498   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5499     while (!isToken(AsmToken::EndOfStatement)) {
5500       if (!parseCnt(Waitcnt))
5501         return MatchOperand_ParseFail;
5502     }
5503   } else {
5504     if (!parseExpr(Waitcnt))
5505       return MatchOperand_ParseFail;
5506   }
5507 
5508   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5509   return MatchOperand_Success;
5510 }
5511 
5512 bool
5513 AMDGPUOperand::isSWaitCnt() const {
5514   return isImm();
5515 }
5516 
5517 //===----------------------------------------------------------------------===//
5518 // hwreg
5519 //===----------------------------------------------------------------------===//
5520 
5521 bool
5522 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5523                                 int64_t &Offset,
5524                                 int64_t &Width) {
5525   using namespace llvm::AMDGPU::Hwreg;
5526 
5527   // The register may be specified by name or using a numeric code
5528   if (isToken(AsmToken::Identifier) &&
5529       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5530     HwReg.IsSymbolic = true;
5531     lex(); // skip message name
5532   } else if (!parseExpr(HwReg.Id)) {
5533     return false;
5534   }
5535 
5536   if (trySkipToken(AsmToken::RParen))
5537     return true;
5538 
5539   // parse optional params
5540   return
5541     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
5542     parseExpr(Offset) &&
5543     skipToken(AsmToken::Comma, "expected a comma") &&
5544     parseExpr(Width) &&
5545     skipToken(AsmToken::RParen, "expected a closing parenthesis");
5546 }
5547 
5548 bool
5549 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5550                                const int64_t Offset,
5551                                const int64_t Width,
5552                                const SMLoc Loc) {
5553 
5554   using namespace llvm::AMDGPU::Hwreg;
5555 
5556   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5557     Error(Loc, "specified hardware register is not supported on this GPU");
5558     return false;
5559   } else if (!isValidHwreg(HwReg.Id)) {
5560     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
5561     return false;
5562   } else if (!isValidHwregOffset(Offset)) {
5563     Error(Loc, "invalid bit offset: only 5-bit values are legal");
5564     return false;
5565   } else if (!isValidHwregWidth(Width)) {
5566     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
5567     return false;
5568   }
5569   return true;
5570 }
5571 
5572 OperandMatchResultTy
5573 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5574   using namespace llvm::AMDGPU::Hwreg;
5575 
5576   int64_t ImmVal = 0;
5577   SMLoc Loc = getLoc();
5578 
5579   if (trySkipId("hwreg", AsmToken::LParen)) {
5580     OperandInfoTy HwReg(ID_UNKNOWN_);
5581     int64_t Offset = OFFSET_DEFAULT_;
5582     int64_t Width = WIDTH_DEFAULT_;
5583     if (parseHwregBody(HwReg, Offset, Width) &&
5584         validateHwreg(HwReg, Offset, Width, Loc)) {
5585       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5586     } else {
5587       return MatchOperand_ParseFail;
5588     }
5589   } else if (parseExpr(ImmVal)) {
5590     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5591       Error(Loc, "invalid immediate: only 16-bit values are legal");
5592       return MatchOperand_ParseFail;
5593     }
5594   } else {
5595     return MatchOperand_ParseFail;
5596   }
5597 
5598   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5599   return MatchOperand_Success;
5600 }
5601 
5602 bool AMDGPUOperand::isHwreg() const {
5603   return isImmTy(ImmTyHwreg);
5604 }
5605 
5606 //===----------------------------------------------------------------------===//
5607 // sendmsg
5608 //===----------------------------------------------------------------------===//
5609 
5610 bool
5611 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5612                                   OperandInfoTy &Op,
5613                                   OperandInfoTy &Stream) {
5614   using namespace llvm::AMDGPU::SendMsg;
5615 
5616   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5617     Msg.IsSymbolic = true;
5618     lex(); // skip message name
5619   } else if (!parseExpr(Msg.Id)) {
5620     return false;
5621   }
5622 
5623   if (trySkipToken(AsmToken::Comma)) {
5624     Op.IsDefined = true;
5625     if (isToken(AsmToken::Identifier) &&
5626         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5627       lex(); // skip operation name
5628     } else if (!parseExpr(Op.Id)) {
5629       return false;
5630     }
5631 
5632     if (trySkipToken(AsmToken::Comma)) {
5633       Stream.IsDefined = true;
5634       if (!parseExpr(Stream.Id))
5635         return false;
5636     }
5637   }
5638 
5639   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5640 }
5641 
5642 bool
5643 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5644                                  const OperandInfoTy &Op,
5645                                  const OperandInfoTy &Stream,
5646                                  const SMLoc S) {
5647   using namespace llvm::AMDGPU::SendMsg;
5648 
5649   // Validation strictness depends on whether message is specified
5650   // in a symbolc or in a numeric form. In the latter case
5651   // only encoding possibility is checked.
5652   bool Strict = Msg.IsSymbolic;
5653 
5654   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5655     Error(S, "invalid message id");
5656     return false;
5657   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5658     Error(S, Op.IsDefined ?
5659              "message does not support operations" :
5660              "missing message operation");
5661     return false;
5662   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5663     Error(S, "invalid operation id");
5664     return false;
5665   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5666     Error(S, "message operation does not support streams");
5667     return false;
5668   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5669     Error(S, "invalid message stream id");
5670     return false;
5671   }
5672   return true;
5673 }
5674 
5675 OperandMatchResultTy
5676 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5677   using namespace llvm::AMDGPU::SendMsg;
5678 
5679   int64_t ImmVal = 0;
5680   SMLoc Loc = getLoc();
5681 
5682   if (trySkipId("sendmsg", AsmToken::LParen)) {
5683     OperandInfoTy Msg(ID_UNKNOWN_);
5684     OperandInfoTy Op(OP_NONE_);
5685     OperandInfoTy Stream(STREAM_ID_NONE_);
5686     if (parseSendMsgBody(Msg, Op, Stream) &&
5687         validateSendMsg(Msg, Op, Stream, Loc)) {
5688       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5689     } else {
5690       return MatchOperand_ParseFail;
5691     }
5692   } else if (parseExpr(ImmVal)) {
5693     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5694       Error(Loc, "invalid immediate: only 16-bit values are legal");
5695       return MatchOperand_ParseFail;
5696     }
5697   } else {
5698     return MatchOperand_ParseFail;
5699   }
5700 
5701   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5702   return MatchOperand_Success;
5703 }
5704 
5705 bool AMDGPUOperand::isSendMsg() const {
5706   return isImmTy(ImmTySendMsg);
5707 }
5708 
5709 //===----------------------------------------------------------------------===//
5710 // v_interp
5711 //===----------------------------------------------------------------------===//
5712 
5713 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5714   if (getLexer().getKind() != AsmToken::Identifier)
5715     return MatchOperand_NoMatch;
5716 
5717   StringRef Str = Parser.getTok().getString();
5718   int Slot = StringSwitch<int>(Str)
5719     .Case("p10", 0)
5720     .Case("p20", 1)
5721     .Case("p0", 2)
5722     .Default(-1);
5723 
5724   SMLoc S = Parser.getTok().getLoc();
5725   if (Slot == -1)
5726     return MatchOperand_ParseFail;
5727 
5728   Parser.Lex();
5729   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5730                                               AMDGPUOperand::ImmTyInterpSlot));
5731   return MatchOperand_Success;
5732 }
5733 
5734 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5735   if (getLexer().getKind() != AsmToken::Identifier)
5736     return MatchOperand_NoMatch;
5737 
5738   StringRef Str = Parser.getTok().getString();
5739   if (!Str.startswith("attr"))
5740     return MatchOperand_NoMatch;
5741 
5742   StringRef Chan = Str.take_back(2);
5743   int AttrChan = StringSwitch<int>(Chan)
5744     .Case(".x", 0)
5745     .Case(".y", 1)
5746     .Case(".z", 2)
5747     .Case(".w", 3)
5748     .Default(-1);
5749   if (AttrChan == -1)
5750     return MatchOperand_ParseFail;
5751 
5752   Str = Str.drop_back(2).drop_front(4);
5753 
5754   uint8_t Attr;
5755   if (Str.getAsInteger(10, Attr))
5756     return MatchOperand_ParseFail;
5757 
5758   SMLoc S = Parser.getTok().getLoc();
5759   Parser.Lex();
5760   if (Attr > 63) {
5761     Error(S, "out of bounds attr");
5762     return MatchOperand_ParseFail;
5763   }
5764 
5765   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5766 
5767   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5768                                               AMDGPUOperand::ImmTyInterpAttr));
5769   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5770                                               AMDGPUOperand::ImmTyAttrChan));
5771   return MatchOperand_Success;
5772 }
5773 
5774 //===----------------------------------------------------------------------===//
5775 // exp
5776 //===----------------------------------------------------------------------===//
5777 
5778 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5779                                                       uint8_t &Val) {
5780   if (Str == "null") {
5781     Val = 9;
5782     return MatchOperand_Success;
5783   }
5784 
5785   if (Str.startswith("mrt")) {
5786     Str = Str.drop_front(3);
5787     if (Str == "z") { // == mrtz
5788       Val = 8;
5789       return MatchOperand_Success;
5790     }
5791 
5792     if (Str.getAsInteger(10, Val))
5793       return MatchOperand_ParseFail;
5794 
5795     if (Val > 7) {
5796       Error(getLoc(), "invalid exp target");
5797       return MatchOperand_ParseFail;
5798     }
5799 
5800     return MatchOperand_Success;
5801   }
5802 
5803   if (Str.startswith("pos")) {
5804     Str = Str.drop_front(3);
5805     if (Str.getAsInteger(10, Val))
5806       return MatchOperand_ParseFail;
5807 
5808     if (Val > 4 || (Val == 4 && !isGFX10())) {
5809       Error(getLoc(), "invalid exp target");
5810       return MatchOperand_ParseFail;
5811     }
5812 
5813     Val += 12;
5814     return MatchOperand_Success;
5815   }
5816 
5817   if (isGFX10() && Str == "prim") {
5818     Val = 20;
5819     return MatchOperand_Success;
5820   }
5821 
5822   if (Str.startswith("param")) {
5823     Str = Str.drop_front(5);
5824     if (Str.getAsInteger(10, Val))
5825       return MatchOperand_ParseFail;
5826 
5827     if (Val >= 32) {
5828       Error(getLoc(), "invalid exp target");
5829       return MatchOperand_ParseFail;
5830     }
5831 
5832     Val += 32;
5833     return MatchOperand_Success;
5834   }
5835 
5836   if (Str.startswith("invalid_target_")) {
5837     Str = Str.drop_front(15);
5838     if (Str.getAsInteger(10, Val))
5839       return MatchOperand_ParseFail;
5840 
5841     Error(getLoc(), "invalid exp target");
5842     return MatchOperand_ParseFail;
5843   }
5844 
5845   return MatchOperand_NoMatch;
5846 }
5847 
5848 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5849   uint8_t Val;
5850   StringRef Str = Parser.getTok().getString();
5851 
5852   auto Res = parseExpTgtImpl(Str, Val);
5853   if (Res != MatchOperand_Success)
5854     return Res;
5855 
5856   SMLoc S = Parser.getTok().getLoc();
5857   Parser.Lex();
5858 
5859   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5860                                               AMDGPUOperand::ImmTyExpTgt));
5861   return MatchOperand_Success;
5862 }
5863 
5864 //===----------------------------------------------------------------------===//
5865 // parser helpers
5866 //===----------------------------------------------------------------------===//
5867 
5868 bool
5869 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5870   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5871 }
5872 
5873 bool
5874 AMDGPUAsmParser::isId(const StringRef Id) const {
5875   return isId(getToken(), Id);
5876 }
5877 
5878 bool
5879 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5880   return getTokenKind() == Kind;
5881 }
5882 
5883 bool
5884 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5885   if (isId(Id)) {
5886     lex();
5887     return true;
5888   }
5889   return false;
5890 }
5891 
5892 bool
5893 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5894   if (isId(Id) && peekToken().is(Kind)) {
5895     lex();
5896     lex();
5897     return true;
5898   }
5899   return false;
5900 }
5901 
5902 bool
5903 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5904   if (isToken(Kind)) {
5905     lex();
5906     return true;
5907   }
5908   return false;
5909 }
5910 
5911 bool
5912 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5913                            const StringRef ErrMsg) {
5914   if (!trySkipToken(Kind)) {
5915     Error(getLoc(), ErrMsg);
5916     return false;
5917   }
5918   return true;
5919 }
5920 
5921 bool
5922 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5923   return !getParser().parseAbsoluteExpression(Imm);
5924 }
5925 
5926 bool
5927 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5928   SMLoc S = getLoc();
5929 
5930   const MCExpr *Expr;
5931   if (Parser.parseExpression(Expr))
5932     return false;
5933 
5934   int64_t IntVal;
5935   if (Expr->evaluateAsAbsolute(IntVal)) {
5936     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5937   } else {
5938     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5939   }
5940   return true;
5941 }
5942 
5943 bool
5944 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5945   if (isToken(AsmToken::String)) {
5946     Val = getToken().getStringContents();
5947     lex();
5948     return true;
5949   } else {
5950     Error(getLoc(), ErrMsg);
5951     return false;
5952   }
5953 }
5954 
5955 bool
5956 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
5957   if (isToken(AsmToken::Identifier)) {
5958     Val = getTokenStr();
5959     lex();
5960     return true;
5961   } else {
5962     Error(getLoc(), ErrMsg);
5963     return false;
5964   }
5965 }
5966 
5967 AsmToken
5968 AMDGPUAsmParser::getToken() const {
5969   return Parser.getTok();
5970 }
5971 
5972 AsmToken
5973 AMDGPUAsmParser::peekToken() {
5974   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
5975 }
5976 
5977 void
5978 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5979   auto TokCount = getLexer().peekTokens(Tokens);
5980 
5981   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5982     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5983 }
5984 
5985 AsmToken::TokenKind
5986 AMDGPUAsmParser::getTokenKind() const {
5987   return getLexer().getKind();
5988 }
5989 
5990 SMLoc
5991 AMDGPUAsmParser::getLoc() const {
5992   return getToken().getLoc();
5993 }
5994 
5995 StringRef
5996 AMDGPUAsmParser::getTokenStr() const {
5997   return getToken().getString();
5998 }
5999 
6000 void
6001 AMDGPUAsmParser::lex() {
6002   Parser.Lex();
6003 }
6004 
6005 //===----------------------------------------------------------------------===//
6006 // swizzle
6007 //===----------------------------------------------------------------------===//
6008 
6009 LLVM_READNONE
6010 static unsigned
6011 encodeBitmaskPerm(const unsigned AndMask,
6012                   const unsigned OrMask,
6013                   const unsigned XorMask) {
6014   using namespace llvm::AMDGPU::Swizzle;
6015 
6016   return BITMASK_PERM_ENC |
6017          (AndMask << BITMASK_AND_SHIFT) |
6018          (OrMask  << BITMASK_OR_SHIFT)  |
6019          (XorMask << BITMASK_XOR_SHIFT);
6020 }
6021 
6022 bool
6023 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6024                                       const unsigned MinVal,
6025                                       const unsigned MaxVal,
6026                                       const StringRef ErrMsg) {
6027   for (unsigned i = 0; i < OpNum; ++i) {
6028     if (!skipToken(AsmToken::Comma, "expected a comma")){
6029       return false;
6030     }
6031     SMLoc ExprLoc = Parser.getTok().getLoc();
6032     if (!parseExpr(Op[i])) {
6033       return false;
6034     }
6035     if (Op[i] < MinVal || Op[i] > MaxVal) {
6036       Error(ExprLoc, ErrMsg);
6037       return false;
6038     }
6039   }
6040 
6041   return true;
6042 }
6043 
6044 bool
6045 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6046   using namespace llvm::AMDGPU::Swizzle;
6047 
6048   int64_t Lane[LANE_NUM];
6049   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6050                            "expected a 2-bit lane id")) {
6051     Imm = QUAD_PERM_ENC;
6052     for (unsigned I = 0; I < LANE_NUM; ++I) {
6053       Imm |= Lane[I] << (LANE_SHIFT * I);
6054     }
6055     return true;
6056   }
6057   return false;
6058 }
6059 
6060 bool
6061 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6062   using namespace llvm::AMDGPU::Swizzle;
6063 
6064   SMLoc S = Parser.getTok().getLoc();
6065   int64_t GroupSize;
6066   int64_t LaneIdx;
6067 
6068   if (!parseSwizzleOperands(1, &GroupSize,
6069                             2, 32,
6070                             "group size must be in the interval [2,32]")) {
6071     return false;
6072   }
6073   if (!isPowerOf2_64(GroupSize)) {
6074     Error(S, "group size must be a power of two");
6075     return false;
6076   }
6077   if (parseSwizzleOperands(1, &LaneIdx,
6078                            0, GroupSize - 1,
6079                            "lane id must be in the interval [0,group size - 1]")) {
6080     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6081     return true;
6082   }
6083   return false;
6084 }
6085 
6086 bool
6087 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6088   using namespace llvm::AMDGPU::Swizzle;
6089 
6090   SMLoc S = Parser.getTok().getLoc();
6091   int64_t GroupSize;
6092 
6093   if (!parseSwizzleOperands(1, &GroupSize,
6094       2, 32, "group size must be in the interval [2,32]")) {
6095     return false;
6096   }
6097   if (!isPowerOf2_64(GroupSize)) {
6098     Error(S, "group size must be a power of two");
6099     return false;
6100   }
6101 
6102   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6103   return true;
6104 }
6105 
6106 bool
6107 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6108   using namespace llvm::AMDGPU::Swizzle;
6109 
6110   SMLoc S = Parser.getTok().getLoc();
6111   int64_t GroupSize;
6112 
6113   if (!parseSwizzleOperands(1, &GroupSize,
6114       1, 16, "group size must be in the interval [1,16]")) {
6115     return false;
6116   }
6117   if (!isPowerOf2_64(GroupSize)) {
6118     Error(S, "group size must be a power of two");
6119     return false;
6120   }
6121 
6122   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6123   return true;
6124 }
6125 
6126 bool
6127 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6128   using namespace llvm::AMDGPU::Swizzle;
6129 
6130   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6131     return false;
6132   }
6133 
6134   StringRef Ctl;
6135   SMLoc StrLoc = Parser.getTok().getLoc();
6136   if (!parseString(Ctl)) {
6137     return false;
6138   }
6139   if (Ctl.size() != BITMASK_WIDTH) {
6140     Error(StrLoc, "expected a 5-character mask");
6141     return false;
6142   }
6143 
6144   unsigned AndMask = 0;
6145   unsigned OrMask = 0;
6146   unsigned XorMask = 0;
6147 
6148   for (size_t i = 0; i < Ctl.size(); ++i) {
6149     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6150     switch(Ctl[i]) {
6151     default:
6152       Error(StrLoc, "invalid mask");
6153       return false;
6154     case '0':
6155       break;
6156     case '1':
6157       OrMask |= Mask;
6158       break;
6159     case 'p':
6160       AndMask |= Mask;
6161       break;
6162     case 'i':
6163       AndMask |= Mask;
6164       XorMask |= Mask;
6165       break;
6166     }
6167   }
6168 
6169   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6170   return true;
6171 }
6172 
6173 bool
6174 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6175 
6176   SMLoc OffsetLoc = Parser.getTok().getLoc();
6177 
6178   if (!parseExpr(Imm)) {
6179     return false;
6180   }
6181   if (!isUInt<16>(Imm)) {
6182     Error(OffsetLoc, "expected a 16-bit offset");
6183     return false;
6184   }
6185   return true;
6186 }
6187 
6188 bool
6189 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6190   using namespace llvm::AMDGPU::Swizzle;
6191 
6192   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6193 
6194     SMLoc ModeLoc = Parser.getTok().getLoc();
6195     bool Ok = false;
6196 
6197     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6198       Ok = parseSwizzleQuadPerm(Imm);
6199     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6200       Ok = parseSwizzleBitmaskPerm(Imm);
6201     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6202       Ok = parseSwizzleBroadcast(Imm);
6203     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6204       Ok = parseSwizzleSwap(Imm);
6205     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6206       Ok = parseSwizzleReverse(Imm);
6207     } else {
6208       Error(ModeLoc, "expected a swizzle mode");
6209     }
6210 
6211     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6212   }
6213 
6214   return false;
6215 }
6216 
6217 OperandMatchResultTy
6218 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6219   SMLoc S = Parser.getTok().getLoc();
6220   int64_t Imm = 0;
6221 
6222   if (trySkipId("offset")) {
6223 
6224     bool Ok = false;
6225     if (skipToken(AsmToken::Colon, "expected a colon")) {
6226       if (trySkipId("swizzle")) {
6227         Ok = parseSwizzleMacro(Imm);
6228       } else {
6229         Ok = parseSwizzleOffset(Imm);
6230       }
6231     }
6232 
6233     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6234 
6235     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6236   } else {
6237     // Swizzle "offset" operand is optional.
6238     // If it is omitted, try parsing other optional operands.
6239     return parseOptionalOpr(Operands);
6240   }
6241 }
6242 
6243 bool
6244 AMDGPUOperand::isSwizzle() const {
6245   return isImmTy(ImmTySwizzle);
6246 }
6247 
6248 //===----------------------------------------------------------------------===//
6249 // VGPR Index Mode
6250 //===----------------------------------------------------------------------===//
6251 
6252 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6253 
6254   using namespace llvm::AMDGPU::VGPRIndexMode;
6255 
6256   if (trySkipToken(AsmToken::RParen)) {
6257     return OFF;
6258   }
6259 
6260   int64_t Imm = 0;
6261 
6262   while (true) {
6263     unsigned Mode = 0;
6264     SMLoc S = Parser.getTok().getLoc();
6265 
6266     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6267       if (trySkipId(IdSymbolic[ModeId])) {
6268         Mode = 1 << ModeId;
6269         break;
6270       }
6271     }
6272 
6273     if (Mode == 0) {
6274       Error(S, (Imm == 0)?
6275                "expected a VGPR index mode or a closing parenthesis" :
6276                "expected a VGPR index mode");
6277       return UNDEF;
6278     }
6279 
6280     if (Imm & Mode) {
6281       Error(S, "duplicate VGPR index mode");
6282       return UNDEF;
6283     }
6284     Imm |= Mode;
6285 
6286     if (trySkipToken(AsmToken::RParen))
6287       break;
6288     if (!skipToken(AsmToken::Comma,
6289                    "expected a comma or a closing parenthesis"))
6290       return UNDEF;
6291   }
6292 
6293   return Imm;
6294 }
6295 
6296 OperandMatchResultTy
6297 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6298 
6299   using namespace llvm::AMDGPU::VGPRIndexMode;
6300 
6301   int64_t Imm = 0;
6302   SMLoc S = Parser.getTok().getLoc();
6303 
6304   if (getLexer().getKind() == AsmToken::Identifier &&
6305       Parser.getTok().getString() == "gpr_idx" &&
6306       getLexer().peekTok().is(AsmToken::LParen)) {
6307 
6308     Parser.Lex();
6309     Parser.Lex();
6310 
6311     Imm = parseGPRIdxMacro();
6312     if (Imm == UNDEF)
6313       return MatchOperand_ParseFail;
6314 
6315   } else {
6316     if (getParser().parseAbsoluteExpression(Imm))
6317       return MatchOperand_ParseFail;
6318     if (Imm < 0 || !isUInt<4>(Imm)) {
6319       Error(S, "invalid immediate: only 4-bit values are legal");
6320       return MatchOperand_ParseFail;
6321     }
6322   }
6323 
6324   Operands.push_back(
6325       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6326   return MatchOperand_Success;
6327 }
6328 
6329 bool AMDGPUOperand::isGPRIdxMode() const {
6330   return isImmTy(ImmTyGprIdxMode);
6331 }
6332 
6333 //===----------------------------------------------------------------------===//
6334 // sopp branch targets
6335 //===----------------------------------------------------------------------===//
6336 
6337 OperandMatchResultTy
6338 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6339 
6340   // Make sure we are not parsing something
6341   // that looks like a label or an expression but is not.
6342   // This will improve error messages.
6343   if (isRegister() || isModifier())
6344     return MatchOperand_NoMatch;
6345 
6346   if (!parseExpr(Operands))
6347     return MatchOperand_ParseFail;
6348 
6349   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6350   assert(Opr.isImm() || Opr.isExpr());
6351   SMLoc Loc = Opr.getStartLoc();
6352 
6353   // Currently we do not support arbitrary expressions as branch targets.
6354   // Only labels and absolute expressions are accepted.
6355   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6356     Error(Loc, "expected an absolute expression or a label");
6357   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6358     Error(Loc, "expected a 16-bit signed jump offset");
6359   }
6360 
6361   return MatchOperand_Success;
6362 }
6363 
6364 //===----------------------------------------------------------------------===//
6365 // Boolean holding registers
6366 //===----------------------------------------------------------------------===//
6367 
6368 OperandMatchResultTy
6369 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6370   return parseReg(Operands);
6371 }
6372 
6373 //===----------------------------------------------------------------------===//
6374 // mubuf
6375 //===----------------------------------------------------------------------===//
6376 
6377 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
6378   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
6379 }
6380 
6381 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
6382   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
6383 }
6384 
6385 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
6386   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
6387 }
6388 
6389 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6390                                const OperandVector &Operands,
6391                                bool IsAtomic,
6392                                bool IsAtomicReturn,
6393                                bool IsLds) {
6394   bool IsLdsOpcode = IsLds;
6395   bool HasLdsModifier = false;
6396   OptionalImmIndexMap OptionalIdx;
6397   assert(IsAtomicReturn ? IsAtomic : true);
6398   unsigned FirstOperandIdx = 1;
6399 
6400   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6401     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6402 
6403     // Add the register arguments
6404     if (Op.isReg()) {
6405       Op.addRegOperands(Inst, 1);
6406       // Insert a tied src for atomic return dst.
6407       // This cannot be postponed as subsequent calls to
6408       // addImmOperands rely on correct number of MC operands.
6409       if (IsAtomicReturn && i == FirstOperandIdx)
6410         Op.addRegOperands(Inst, 1);
6411       continue;
6412     }
6413 
6414     // Handle the case where soffset is an immediate
6415     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6416       Op.addImmOperands(Inst, 1);
6417       continue;
6418     }
6419 
6420     HasLdsModifier |= Op.isLDS();
6421 
6422     // Handle tokens like 'offen' which are sometimes hard-coded into the
6423     // asm string.  There are no MCInst operands for these.
6424     if (Op.isToken()) {
6425       continue;
6426     }
6427     assert(Op.isImm());
6428 
6429     // Handle optional arguments
6430     OptionalIdx[Op.getImmTy()] = i;
6431   }
6432 
6433   // This is a workaround for an llvm quirk which may result in an
6434   // incorrect instruction selection. Lds and non-lds versions of
6435   // MUBUF instructions are identical except that lds versions
6436   // have mandatory 'lds' modifier. However this modifier follows
6437   // optional modifiers and llvm asm matcher regards this 'lds'
6438   // modifier as an optional one. As a result, an lds version
6439   // of opcode may be selected even if it has no 'lds' modifier.
6440   if (IsLdsOpcode && !HasLdsModifier) {
6441     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6442     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6443       Inst.setOpcode(NoLdsOpcode);
6444       IsLdsOpcode = false;
6445     }
6446   }
6447 
6448   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6449   if (!IsAtomic) { // glc is hard-coded.
6450     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6451   }
6452   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6453 
6454   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6455     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6456   }
6457 
6458   if (isGFX10())
6459     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6460 }
6461 
6462 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6463   OptionalImmIndexMap OptionalIdx;
6464 
6465   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6466     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6467 
6468     // Add the register arguments
6469     if (Op.isReg()) {
6470       Op.addRegOperands(Inst, 1);
6471       continue;
6472     }
6473 
6474     // Handle the case where soffset is an immediate
6475     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6476       Op.addImmOperands(Inst, 1);
6477       continue;
6478     }
6479 
6480     // Handle tokens like 'offen' which are sometimes hard-coded into the
6481     // asm string.  There are no MCInst operands for these.
6482     if (Op.isToken()) {
6483       continue;
6484     }
6485     assert(Op.isImm());
6486 
6487     // Handle optional arguments
6488     OptionalIdx[Op.getImmTy()] = i;
6489   }
6490 
6491   addOptionalImmOperand(Inst, Operands, OptionalIdx,
6492                         AMDGPUOperand::ImmTyOffset);
6493   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6494   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6495   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6496   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6497 
6498   if (isGFX10())
6499     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6500 }
6501 
6502 //===----------------------------------------------------------------------===//
6503 // mimg
6504 //===----------------------------------------------------------------------===//
6505 
6506 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6507                               bool IsAtomic) {
6508   unsigned I = 1;
6509   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6510   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6511     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6512   }
6513 
6514   if (IsAtomic) {
6515     // Add src, same as dst
6516     assert(Desc.getNumDefs() == 1);
6517     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6518   }
6519 
6520   OptionalImmIndexMap OptionalIdx;
6521 
6522   for (unsigned E = Operands.size(); I != E; ++I) {
6523     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6524 
6525     // Add the register arguments
6526     if (Op.isReg()) {
6527       Op.addRegOperands(Inst, 1);
6528     } else if (Op.isImmModifier()) {
6529       OptionalIdx[Op.getImmTy()] = I;
6530     } else if (!Op.isToken()) {
6531       llvm_unreachable("unexpected operand type");
6532     }
6533   }
6534 
6535   bool IsGFX10 = isGFX10();
6536 
6537   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6538   if (IsGFX10)
6539     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6540   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6541   if (IsGFX10)
6542     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6543   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6544   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6545   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6546   if (IsGFX10)
6547     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6548   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6549   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6550   if (!IsGFX10)
6551     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6552   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6553 }
6554 
6555 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6556   cvtMIMG(Inst, Operands, true);
6557 }
6558 
6559 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
6560                                       const OperandVector &Operands) {
6561   for (unsigned I = 1; I < Operands.size(); ++I) {
6562     auto &Operand = (AMDGPUOperand &)*Operands[I];
6563     if (Operand.isReg())
6564       Operand.addRegOperands(Inst, 1);
6565   }
6566 
6567   Inst.addOperand(MCOperand::createImm(1)); // a16
6568 }
6569 
6570 //===----------------------------------------------------------------------===//
6571 // smrd
6572 //===----------------------------------------------------------------------===//
6573 
6574 bool AMDGPUOperand::isSMRDOffset8() const {
6575   return isImm() && isUInt<8>(getImm());
6576 }
6577 
6578 bool AMDGPUOperand::isSMEMOffset() const {
6579   return isImm(); // Offset range is checked later by validator.
6580 }
6581 
6582 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6583   // 32-bit literals are only supported on CI and we only want to use them
6584   // when the offset is > 8-bits.
6585   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6586 }
6587 
6588 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6589   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6590 }
6591 
6592 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
6593   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6594 }
6595 
6596 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6597   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6598 }
6599 
6600 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6601   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6602 }
6603 
6604 //===----------------------------------------------------------------------===//
6605 // vop3
6606 //===----------------------------------------------------------------------===//
6607 
6608 static bool ConvertOmodMul(int64_t &Mul) {
6609   if (Mul != 1 && Mul != 2 && Mul != 4)
6610     return false;
6611 
6612   Mul >>= 1;
6613   return true;
6614 }
6615 
6616 static bool ConvertOmodDiv(int64_t &Div) {
6617   if (Div == 1) {
6618     Div = 0;
6619     return true;
6620   }
6621 
6622   if (Div == 2) {
6623     Div = 3;
6624     return true;
6625   }
6626 
6627   return false;
6628 }
6629 
6630 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6631   if (BoundCtrl == 0) {
6632     BoundCtrl = 1;
6633     return true;
6634   }
6635 
6636   if (BoundCtrl == -1) {
6637     BoundCtrl = 0;
6638     return true;
6639   }
6640 
6641   return false;
6642 }
6643 
6644 // Note: the order in this table matches the order of operands in AsmString.
6645 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6646   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6647   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6648   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6649   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6650   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6651   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6652   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6653   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6654   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6655   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6656   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6657   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6658   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6659   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6660   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6661   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6662   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6663   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6664   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6665   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6666   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6667   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6668   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6669   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6670   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6671   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6672   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6673   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6674   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6675   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6676   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6677   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6678   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6679   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6680   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6681   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6682   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6683   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6684   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6685   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6686   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6687   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6688   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6689 };
6690 
6691 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6692 
6693   OperandMatchResultTy res = parseOptionalOpr(Operands);
6694 
6695   // This is a hack to enable hardcoded mandatory operands which follow
6696   // optional operands.
6697   //
6698   // Current design assumes that all operands after the first optional operand
6699   // are also optional. However implementation of some instructions violates
6700   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6701   //
6702   // To alleviate this problem, we have to (implicitly) parse extra operands
6703   // to make sure autogenerated parser of custom operands never hit hardcoded
6704   // mandatory operands.
6705 
6706   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6707     if (res != MatchOperand_Success ||
6708         isToken(AsmToken::EndOfStatement))
6709       break;
6710 
6711     trySkipToken(AsmToken::Comma);
6712     res = parseOptionalOpr(Operands);
6713   }
6714 
6715   return res;
6716 }
6717 
6718 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6719   OperandMatchResultTy res;
6720   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6721     // try to parse any optional operand here
6722     if (Op.IsBit) {
6723       res = parseNamedBit(Op.Name, Operands, Op.Type);
6724     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6725       res = parseOModOperand(Operands);
6726     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6727                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6728                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6729       res = parseSDWASel(Operands, Op.Name, Op.Type);
6730     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6731       res = parseSDWADstUnused(Operands);
6732     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6733                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6734                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6735                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6736       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6737                                         Op.ConvertResult);
6738     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6739       res = parseDim(Operands);
6740     } else {
6741       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6742     }
6743     if (res != MatchOperand_NoMatch) {
6744       return res;
6745     }
6746   }
6747   return MatchOperand_NoMatch;
6748 }
6749 
6750 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6751   StringRef Name = Parser.getTok().getString();
6752   if (Name == "mul") {
6753     return parseIntWithPrefix("mul", Operands,
6754                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6755   }
6756 
6757   if (Name == "div") {
6758     return parseIntWithPrefix("div", Operands,
6759                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6760   }
6761 
6762   return MatchOperand_NoMatch;
6763 }
6764 
6765 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6766   cvtVOP3P(Inst, Operands);
6767 
6768   int Opc = Inst.getOpcode();
6769 
6770   int SrcNum;
6771   const int Ops[] = { AMDGPU::OpName::src0,
6772                       AMDGPU::OpName::src1,
6773                       AMDGPU::OpName::src2 };
6774   for (SrcNum = 0;
6775        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6776        ++SrcNum);
6777   assert(SrcNum > 0);
6778 
6779   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6780   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6781 
6782   if ((OpSel & (1 << SrcNum)) != 0) {
6783     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6784     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6785     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6786   }
6787 }
6788 
6789 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6790       // 1. This operand is input modifiers
6791   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6792       // 2. This is not last operand
6793       && Desc.NumOperands > (OpNum + 1)
6794       // 3. Next operand is register class
6795       && Desc.OpInfo[OpNum + 1].RegClass != -1
6796       // 4. Next register is not tied to any other operand
6797       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6798 }
6799 
6800 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6801 {
6802   OptionalImmIndexMap OptionalIdx;
6803   unsigned Opc = Inst.getOpcode();
6804 
6805   unsigned I = 1;
6806   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6807   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6808     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6809   }
6810 
6811   for (unsigned E = Operands.size(); I != E; ++I) {
6812     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6813     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6814       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6815     } else if (Op.isInterpSlot() ||
6816                Op.isInterpAttr() ||
6817                Op.isAttrChan()) {
6818       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6819     } else if (Op.isImmModifier()) {
6820       OptionalIdx[Op.getImmTy()] = I;
6821     } else {
6822       llvm_unreachable("unhandled operand type");
6823     }
6824   }
6825 
6826   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6827     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6828   }
6829 
6830   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6831     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6832   }
6833 
6834   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6835     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6836   }
6837 }
6838 
6839 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6840                               OptionalImmIndexMap &OptionalIdx) {
6841   unsigned Opc = Inst.getOpcode();
6842 
6843   unsigned I = 1;
6844   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6845   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6846     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6847   }
6848 
6849   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6850     // This instruction has src modifiers
6851     for (unsigned E = Operands.size(); I != E; ++I) {
6852       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6853       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6854         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6855       } else if (Op.isImmModifier()) {
6856         OptionalIdx[Op.getImmTy()] = I;
6857       } else if (Op.isRegOrImm()) {
6858         Op.addRegOrImmOperands(Inst, 1);
6859       } else {
6860         llvm_unreachable("unhandled operand type");
6861       }
6862     }
6863   } else {
6864     // No src modifiers
6865     for (unsigned E = Operands.size(); I != E; ++I) {
6866       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6867       if (Op.isMod()) {
6868         OptionalIdx[Op.getImmTy()] = I;
6869       } else {
6870         Op.addRegOrImmOperands(Inst, 1);
6871       }
6872     }
6873   }
6874 
6875   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6876     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6877   }
6878 
6879   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6880     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6881   }
6882 
6883   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6884   // it has src2 register operand that is tied to dst operand
6885   // we don't allow modifiers for this operand in assembler so src2_modifiers
6886   // should be 0.
6887   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6888       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6889       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6890       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6891       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6892       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6893       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6894     auto it = Inst.begin();
6895     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6896     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6897     ++it;
6898     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6899   }
6900 }
6901 
6902 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6903   OptionalImmIndexMap OptionalIdx;
6904   cvtVOP3(Inst, Operands, OptionalIdx);
6905 }
6906 
6907 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6908                                const OperandVector &Operands) {
6909   OptionalImmIndexMap OptIdx;
6910   const int Opc = Inst.getOpcode();
6911   const MCInstrDesc &Desc = MII.get(Opc);
6912 
6913   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6914 
6915   cvtVOP3(Inst, Operands, OptIdx);
6916 
6917   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6918     assert(!IsPacked);
6919     Inst.addOperand(Inst.getOperand(0));
6920   }
6921 
6922   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6923   // instruction, and then figure out where to actually put the modifiers
6924 
6925   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6926 
6927   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6928   if (OpSelHiIdx != -1) {
6929     int DefaultVal = IsPacked ? -1 : 0;
6930     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6931                           DefaultVal);
6932   }
6933 
6934   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6935   if (NegLoIdx != -1) {
6936     assert(IsPacked);
6937     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6938     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6939   }
6940 
6941   const int Ops[] = { AMDGPU::OpName::src0,
6942                       AMDGPU::OpName::src1,
6943                       AMDGPU::OpName::src2 };
6944   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6945                          AMDGPU::OpName::src1_modifiers,
6946                          AMDGPU::OpName::src2_modifiers };
6947 
6948   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6949 
6950   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6951   unsigned OpSelHi = 0;
6952   unsigned NegLo = 0;
6953   unsigned NegHi = 0;
6954 
6955   if (OpSelHiIdx != -1) {
6956     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6957   }
6958 
6959   if (NegLoIdx != -1) {
6960     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6961     NegLo = Inst.getOperand(NegLoIdx).getImm();
6962     NegHi = Inst.getOperand(NegHiIdx).getImm();
6963   }
6964 
6965   for (int J = 0; J < 3; ++J) {
6966     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6967     if (OpIdx == -1)
6968       break;
6969 
6970     uint32_t ModVal = 0;
6971 
6972     if ((OpSel & (1 << J)) != 0)
6973       ModVal |= SISrcMods::OP_SEL_0;
6974 
6975     if ((OpSelHi & (1 << J)) != 0)
6976       ModVal |= SISrcMods::OP_SEL_1;
6977 
6978     if ((NegLo & (1 << J)) != 0)
6979       ModVal |= SISrcMods::NEG;
6980 
6981     if ((NegHi & (1 << J)) != 0)
6982       ModVal |= SISrcMods::NEG_HI;
6983 
6984     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6985 
6986     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6987   }
6988 }
6989 
6990 //===----------------------------------------------------------------------===//
6991 // dpp
6992 //===----------------------------------------------------------------------===//
6993 
6994 bool AMDGPUOperand::isDPP8() const {
6995   return isImmTy(ImmTyDPP8);
6996 }
6997 
6998 bool AMDGPUOperand::isDPPCtrl() const {
6999   using namespace AMDGPU::DPP;
7000 
7001   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7002   if (result) {
7003     int64_t Imm = getImm();
7004     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7005            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7006            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7007            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7008            (Imm == DppCtrl::WAVE_SHL1) ||
7009            (Imm == DppCtrl::WAVE_ROL1) ||
7010            (Imm == DppCtrl::WAVE_SHR1) ||
7011            (Imm == DppCtrl::WAVE_ROR1) ||
7012            (Imm == DppCtrl::ROW_MIRROR) ||
7013            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7014            (Imm == DppCtrl::BCAST15) ||
7015            (Imm == DppCtrl::BCAST31) ||
7016            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7017            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7018   }
7019   return false;
7020 }
7021 
7022 //===----------------------------------------------------------------------===//
7023 // mAI
7024 //===----------------------------------------------------------------------===//
7025 
7026 bool AMDGPUOperand::isBLGP() const {
7027   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7028 }
7029 
7030 bool AMDGPUOperand::isCBSZ() const {
7031   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7032 }
7033 
7034 bool AMDGPUOperand::isABID() const {
7035   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7036 }
7037 
7038 bool AMDGPUOperand::isS16Imm() const {
7039   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7040 }
7041 
7042 bool AMDGPUOperand::isU16Imm() const {
7043   return isImm() && isUInt<16>(getImm());
7044 }
7045 
7046 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7047   if (!isGFX10())
7048     return MatchOperand_NoMatch;
7049 
7050   SMLoc S = Parser.getTok().getLoc();
7051 
7052   if (getLexer().isNot(AsmToken::Identifier))
7053     return MatchOperand_NoMatch;
7054   if (getLexer().getTok().getString() != "dim")
7055     return MatchOperand_NoMatch;
7056 
7057   Parser.Lex();
7058   if (getLexer().isNot(AsmToken::Colon))
7059     return MatchOperand_ParseFail;
7060 
7061   Parser.Lex();
7062 
7063   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
7064   // integer.
7065   std::string Token;
7066   if (getLexer().is(AsmToken::Integer)) {
7067     SMLoc Loc = getLexer().getTok().getEndLoc();
7068     Token = std::string(getLexer().getTok().getString());
7069     Parser.Lex();
7070     if (getLexer().getTok().getLoc() != Loc)
7071       return MatchOperand_ParseFail;
7072   }
7073   if (getLexer().isNot(AsmToken::Identifier))
7074     return MatchOperand_ParseFail;
7075   Token += getLexer().getTok().getString();
7076 
7077   StringRef DimId = Token;
7078   if (DimId.startswith("SQ_RSRC_IMG_"))
7079     DimId = DimId.substr(12);
7080 
7081   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7082   if (!DimInfo)
7083     return MatchOperand_ParseFail;
7084 
7085   Parser.Lex();
7086 
7087   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
7088                                               AMDGPUOperand::ImmTyDim));
7089   return MatchOperand_Success;
7090 }
7091 
7092 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7093   SMLoc S = Parser.getTok().getLoc();
7094   StringRef Prefix;
7095 
7096   if (getLexer().getKind() == AsmToken::Identifier) {
7097     Prefix = Parser.getTok().getString();
7098   } else {
7099     return MatchOperand_NoMatch;
7100   }
7101 
7102   if (Prefix != "dpp8")
7103     return parseDPPCtrl(Operands);
7104   if (!isGFX10())
7105     return MatchOperand_NoMatch;
7106 
7107   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7108 
7109   int64_t Sels[8];
7110 
7111   Parser.Lex();
7112   if (getLexer().isNot(AsmToken::Colon))
7113     return MatchOperand_ParseFail;
7114 
7115   Parser.Lex();
7116   if (getLexer().isNot(AsmToken::LBrac))
7117     return MatchOperand_ParseFail;
7118 
7119   Parser.Lex();
7120   if (getParser().parseAbsoluteExpression(Sels[0]))
7121     return MatchOperand_ParseFail;
7122   if (0 > Sels[0] || 7 < Sels[0])
7123     return MatchOperand_ParseFail;
7124 
7125   for (size_t i = 1; i < 8; ++i) {
7126     if (getLexer().isNot(AsmToken::Comma))
7127       return MatchOperand_ParseFail;
7128 
7129     Parser.Lex();
7130     if (getParser().parseAbsoluteExpression(Sels[i]))
7131       return MatchOperand_ParseFail;
7132     if (0 > Sels[i] || 7 < Sels[i])
7133       return MatchOperand_ParseFail;
7134   }
7135 
7136   if (getLexer().isNot(AsmToken::RBrac))
7137     return MatchOperand_ParseFail;
7138   Parser.Lex();
7139 
7140   unsigned DPP8 = 0;
7141   for (size_t i = 0; i < 8; ++i)
7142     DPP8 |= (Sels[i] << (i * 3));
7143 
7144   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7145   return MatchOperand_Success;
7146 }
7147 
7148 OperandMatchResultTy
7149 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7150   using namespace AMDGPU::DPP;
7151 
7152   SMLoc S = Parser.getTok().getLoc();
7153   StringRef Prefix;
7154   int64_t Int;
7155 
7156   if (getLexer().getKind() == AsmToken::Identifier) {
7157     Prefix = Parser.getTok().getString();
7158   } else {
7159     return MatchOperand_NoMatch;
7160   }
7161 
7162   if (Prefix == "row_mirror") {
7163     Int = DppCtrl::ROW_MIRROR;
7164     Parser.Lex();
7165   } else if (Prefix == "row_half_mirror") {
7166     Int = DppCtrl::ROW_HALF_MIRROR;
7167     Parser.Lex();
7168   } else {
7169     // Check to prevent parseDPPCtrlOps from eating invalid tokens
7170     if (Prefix != "quad_perm"
7171         && Prefix != "row_shl"
7172         && Prefix != "row_shr"
7173         && Prefix != "row_ror"
7174         && Prefix != "wave_shl"
7175         && Prefix != "wave_rol"
7176         && Prefix != "wave_shr"
7177         && Prefix != "wave_ror"
7178         && Prefix != "row_bcast"
7179         && Prefix != "row_share"
7180         && Prefix != "row_xmask") {
7181       return MatchOperand_NoMatch;
7182     }
7183 
7184     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
7185       return MatchOperand_NoMatch;
7186 
7187     if (!isVI() && !isGFX9() &&
7188         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
7189          Prefix == "wave_rol" || Prefix == "wave_ror" ||
7190          Prefix == "row_bcast"))
7191       return MatchOperand_NoMatch;
7192 
7193     Parser.Lex();
7194     if (getLexer().isNot(AsmToken::Colon))
7195       return MatchOperand_ParseFail;
7196 
7197     if (Prefix == "quad_perm") {
7198       // quad_perm:[%d,%d,%d,%d]
7199       Parser.Lex();
7200       if (getLexer().isNot(AsmToken::LBrac))
7201         return MatchOperand_ParseFail;
7202       Parser.Lex();
7203 
7204       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
7205         return MatchOperand_ParseFail;
7206 
7207       for (int i = 0; i < 3; ++i) {
7208         if (getLexer().isNot(AsmToken::Comma))
7209           return MatchOperand_ParseFail;
7210         Parser.Lex();
7211 
7212         int64_t Temp;
7213         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
7214           return MatchOperand_ParseFail;
7215         const int shift = i*2 + 2;
7216         Int += (Temp << shift);
7217       }
7218 
7219       if (getLexer().isNot(AsmToken::RBrac))
7220         return MatchOperand_ParseFail;
7221       Parser.Lex();
7222     } else {
7223       // sel:%d
7224       Parser.Lex();
7225       if (getParser().parseAbsoluteExpression(Int))
7226         return MatchOperand_ParseFail;
7227 
7228       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
7229         Int |= DppCtrl::ROW_SHL0;
7230       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
7231         Int |= DppCtrl::ROW_SHR0;
7232       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
7233         Int |= DppCtrl::ROW_ROR0;
7234       } else if (Prefix == "wave_shl" && 1 == Int) {
7235         Int = DppCtrl::WAVE_SHL1;
7236       } else if (Prefix == "wave_rol" && 1 == Int) {
7237         Int = DppCtrl::WAVE_ROL1;
7238       } else if (Prefix == "wave_shr" && 1 == Int) {
7239         Int = DppCtrl::WAVE_SHR1;
7240       } else if (Prefix == "wave_ror" && 1 == Int) {
7241         Int = DppCtrl::WAVE_ROR1;
7242       } else if (Prefix == "row_bcast") {
7243         if (Int == 15) {
7244           Int = DppCtrl::BCAST15;
7245         } else if (Int == 31) {
7246           Int = DppCtrl::BCAST31;
7247         } else {
7248           return MatchOperand_ParseFail;
7249         }
7250       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
7251         Int |= DppCtrl::ROW_SHARE_FIRST;
7252       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
7253         Int |= DppCtrl::ROW_XMASK_FIRST;
7254       } else {
7255         return MatchOperand_ParseFail;
7256       }
7257     }
7258   }
7259 
7260   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
7261   return MatchOperand_Success;
7262 }
7263 
7264 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7265   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7266 }
7267 
7268 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7269   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7270 }
7271 
7272 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7273   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7274 }
7275 
7276 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7277   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7278 }
7279 
7280 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7281   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7282 }
7283 
7284 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7285   OptionalImmIndexMap OptionalIdx;
7286 
7287   unsigned I = 1;
7288   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7289   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7290     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7291   }
7292 
7293   int Fi = 0;
7294   for (unsigned E = Operands.size(); I != E; ++I) {
7295     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7296                                             MCOI::TIED_TO);
7297     if (TiedTo != -1) {
7298       assert((unsigned)TiedTo < Inst.getNumOperands());
7299       // handle tied old or src2 for MAC instructions
7300       Inst.addOperand(Inst.getOperand(TiedTo));
7301     }
7302     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7303     // Add the register arguments
7304     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7305       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7306       // Skip it.
7307       continue;
7308     }
7309 
7310     if (IsDPP8) {
7311       if (Op.isDPP8()) {
7312         Op.addImmOperands(Inst, 1);
7313       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7314         Op.addRegWithFPInputModsOperands(Inst, 2);
7315       } else if (Op.isFI()) {
7316         Fi = Op.getImm();
7317       } else if (Op.isReg()) {
7318         Op.addRegOperands(Inst, 1);
7319       } else {
7320         llvm_unreachable("Invalid operand type");
7321       }
7322     } else {
7323       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7324         Op.addRegWithFPInputModsOperands(Inst, 2);
7325       } else if (Op.isDPPCtrl()) {
7326         Op.addImmOperands(Inst, 1);
7327       } else if (Op.isImm()) {
7328         // Handle optional arguments
7329         OptionalIdx[Op.getImmTy()] = I;
7330       } else {
7331         llvm_unreachable("Invalid operand type");
7332       }
7333     }
7334   }
7335 
7336   if (IsDPP8) {
7337     using namespace llvm::AMDGPU::DPP;
7338     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7339   } else {
7340     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7341     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7342     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7343     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7344       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7345     }
7346   }
7347 }
7348 
7349 //===----------------------------------------------------------------------===//
7350 // sdwa
7351 //===----------------------------------------------------------------------===//
7352 
7353 OperandMatchResultTy
7354 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7355                               AMDGPUOperand::ImmTy Type) {
7356   using namespace llvm::AMDGPU::SDWA;
7357 
7358   SMLoc S = Parser.getTok().getLoc();
7359   StringRef Value;
7360   OperandMatchResultTy res;
7361 
7362   res = parseStringWithPrefix(Prefix, Value);
7363   if (res != MatchOperand_Success) {
7364     return res;
7365   }
7366 
7367   int64_t Int;
7368   Int = StringSwitch<int64_t>(Value)
7369         .Case("BYTE_0", SdwaSel::BYTE_0)
7370         .Case("BYTE_1", SdwaSel::BYTE_1)
7371         .Case("BYTE_2", SdwaSel::BYTE_2)
7372         .Case("BYTE_3", SdwaSel::BYTE_3)
7373         .Case("WORD_0", SdwaSel::WORD_0)
7374         .Case("WORD_1", SdwaSel::WORD_1)
7375         .Case("DWORD", SdwaSel::DWORD)
7376         .Default(0xffffffff);
7377   Parser.Lex(); // eat last token
7378 
7379   if (Int == 0xffffffff) {
7380     return MatchOperand_ParseFail;
7381   }
7382 
7383   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
7384   return MatchOperand_Success;
7385 }
7386 
7387 OperandMatchResultTy
7388 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
7389   using namespace llvm::AMDGPU::SDWA;
7390 
7391   SMLoc S = Parser.getTok().getLoc();
7392   StringRef Value;
7393   OperandMatchResultTy res;
7394 
7395   res = parseStringWithPrefix("dst_unused", Value);
7396   if (res != MatchOperand_Success) {
7397     return res;
7398   }
7399 
7400   int64_t Int;
7401   Int = StringSwitch<int64_t>(Value)
7402         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
7403         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
7404         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
7405         .Default(0xffffffff);
7406   Parser.Lex(); // eat last token
7407 
7408   if (Int == 0xffffffff) {
7409     return MatchOperand_ParseFail;
7410   }
7411 
7412   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
7413   return MatchOperand_Success;
7414 }
7415 
7416 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
7417   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
7418 }
7419 
7420 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
7421   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
7422 }
7423 
7424 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
7425   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
7426 }
7427 
7428 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
7429   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
7430 }
7431 
7432 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
7433   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
7434 }
7435 
7436 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
7437                               uint64_t BasicInstType,
7438                               bool SkipDstVcc,
7439                               bool SkipSrcVcc) {
7440   using namespace llvm::AMDGPU::SDWA;
7441 
7442   OptionalImmIndexMap OptionalIdx;
7443   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
7444   bool SkippedVcc = false;
7445 
7446   unsigned I = 1;
7447   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7448   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7449     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7450   }
7451 
7452   for (unsigned E = Operands.size(); I != E; ++I) {
7453     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7454     if (SkipVcc && !SkippedVcc && Op.isReg() &&
7455         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7456       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7457       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7458       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7459       // Skip VCC only if we didn't skip it on previous iteration.
7460       // Note that src0 and src1 occupy 2 slots each because of modifiers.
7461       if (BasicInstType == SIInstrFlags::VOP2 &&
7462           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7463            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7464         SkippedVcc = true;
7465         continue;
7466       } else if (BasicInstType == SIInstrFlags::VOPC &&
7467                  Inst.getNumOperands() == 0) {
7468         SkippedVcc = true;
7469         continue;
7470       }
7471     }
7472     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7473       Op.addRegOrImmWithInputModsOperands(Inst, 2);
7474     } else if (Op.isImm()) {
7475       // Handle optional arguments
7476       OptionalIdx[Op.getImmTy()] = I;
7477     } else {
7478       llvm_unreachable("Invalid operand type");
7479     }
7480     SkippedVcc = false;
7481   }
7482 
7483   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7484       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7485       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7486     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7487     switch (BasicInstType) {
7488     case SIInstrFlags::VOP1:
7489       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7490       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7491         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7492       }
7493       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7494       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7495       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7496       break;
7497 
7498     case SIInstrFlags::VOP2:
7499       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7500       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7501         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7502       }
7503       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7504       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7505       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7506       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7507       break;
7508 
7509     case SIInstrFlags::VOPC:
7510       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7511         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7512       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7513       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7514       break;
7515 
7516     default:
7517       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7518     }
7519   }
7520 
7521   // special case v_mac_{f16, f32}:
7522   // it has src2 register operand that is tied to dst operand
7523   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7524       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7525     auto it = Inst.begin();
7526     std::advance(
7527       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7528     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7529   }
7530 }
7531 
7532 //===----------------------------------------------------------------------===//
7533 // mAI
7534 //===----------------------------------------------------------------------===//
7535 
7536 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7537   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7538 }
7539 
7540 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7541   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7542 }
7543 
7544 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7545   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7546 }
7547 
7548 /// Force static initialization.
7549 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7550   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7551   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7552 }
7553 
7554 #define GET_REGISTER_MATCHER
7555 #define GET_MATCHER_IMPLEMENTATION
7556 #define GET_MNEMONIC_SPELL_CHECKER
7557 #define GET_MNEMONIC_CHECKER
7558 #include "AMDGPUGenAsmMatcher.inc"
7559 
7560 // This fuction should be defined after auto-generated include so that we have
7561 // MatchClassKind enum defined
7562 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7563                                                      unsigned Kind) {
7564   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7565   // But MatchInstructionImpl() expects to meet token and fails to validate
7566   // operand. This method checks if we are given immediate operand but expect to
7567   // get corresponding token.
7568   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7569   switch (Kind) {
7570   case MCK_addr64:
7571     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7572   case MCK_gds:
7573     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7574   case MCK_lds:
7575     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7576   case MCK_glc:
7577     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7578   case MCK_idxen:
7579     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7580   case MCK_offen:
7581     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7582   case MCK_SSrcB32:
7583     // When operands have expression values, they will return true for isToken,
7584     // because it is not possible to distinguish between a token and an
7585     // expression at parse time. MatchInstructionImpl() will always try to
7586     // match an operand as a token, when isToken returns true, and when the
7587     // name of the expression is not a valid token, the match will fail,
7588     // so we need to handle it here.
7589     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7590   case MCK_SSrcF32:
7591     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7592   case MCK_SoppBrTarget:
7593     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7594   case MCK_VReg32OrOff:
7595     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7596   case MCK_InterpSlot:
7597     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7598   case MCK_Attr:
7599     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7600   case MCK_AttrChan:
7601     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7602   case MCK_ImmSMEMOffset:
7603     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
7604   case MCK_SReg_64:
7605   case MCK_SReg_64_XEXEC:
7606     // Null is defined as a 32-bit register but
7607     // it should also be enabled with 64-bit operands.
7608     // The following code enables it for SReg_64 operands
7609     // used as source and destination. Remaining source
7610     // operands are handled in isInlinableImm.
7611     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7612   default:
7613     return Match_InvalidOperand;
7614   }
7615 }
7616 
7617 //===----------------------------------------------------------------------===//
7618 // endpgm
7619 //===----------------------------------------------------------------------===//
7620 
7621 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7622   SMLoc S = Parser.getTok().getLoc();
7623   int64_t Imm = 0;
7624 
7625   if (!parseExpr(Imm)) {
7626     // The operand is optional, if not present default to 0
7627     Imm = 0;
7628   }
7629 
7630   if (!isUInt<16>(Imm)) {
7631     Error(S, "expected a 16-bit value");
7632     return MatchOperand_ParseFail;
7633   }
7634 
7635   Operands.push_back(
7636       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7637   return MatchOperand_Success;
7638 }
7639 
7640 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7641