1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyA16,
167     ImmTyLWE,
168     ImmTyExpTgt,
169     ImmTyExpCompr,
170     ImmTyExpVM,
171     ImmTyFORMAT,
172     ImmTyHwreg,
173     ImmTyOff,
174     ImmTySendMsg,
175     ImmTyInterpSlot,
176     ImmTyInterpAttr,
177     ImmTyAttrChan,
178     ImmTyOpSel,
179     ImmTyOpSelHi,
180     ImmTyNegLo,
181     ImmTyNegHi,
182     ImmTySwizzle,
183     ImmTyGprIdxMode,
184     ImmTyHigh,
185     ImmTyBLGP,
186     ImmTyCBSZ,
187     ImmTyABID,
188     ImmTyEndpgm,
189   };
190 
191 private:
192   struct TokOp {
193     const char *Data;
194     unsigned Length;
195   };
196 
197   struct ImmOp {
198     int64_t Val;
199     ImmTy Type;
200     bool IsFPImm;
201     Modifiers Mods;
202   };
203 
204   struct RegOp {
205     unsigned RegNo;
206     Modifiers Mods;
207   };
208 
209   union {
210     TokOp Tok;
211     ImmOp Imm;
212     RegOp Reg;
213     const MCExpr *Expr;
214   };
215 
216 public:
217   bool isToken() const override {
218     if (Kind == Token)
219       return true;
220 
221     // When parsing operands, we can't always tell if something was meant to be
222     // a token, like 'gds', or an expression that references a global variable.
223     // In this case, we assume the string is an expression, and if we need to
224     // interpret is a token, then we treat the symbol name as the token.
225     return isSymbolRefExpr();
226   }
227 
228   bool isSymbolRefExpr() const {
229     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230   }
231 
232   bool isImm() const override {
233     return Kind == Immediate;
234   }
235 
236   bool isInlinableImm(MVT type) const;
237   bool isLiteralImm(MVT type) const;
238 
239   bool isRegKind() const {
240     return Kind == Register;
241   }
242 
243   bool isReg() const override {
244     return isRegKind() && !hasModifiers();
245   }
246 
247   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
248     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
249   }
250 
251   bool isRegOrImmWithInt16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
253   }
254 
255   bool isRegOrImmWithInt32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
257   }
258 
259   bool isRegOrImmWithInt64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
261   }
262 
263   bool isRegOrImmWithFP16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
265   }
266 
267   bool isRegOrImmWithFP32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
269   }
270 
271   bool isRegOrImmWithFP64InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
273   }
274 
275   bool isVReg() const {
276     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
277            isRegClass(AMDGPU::VReg_64RegClassID) ||
278            isRegClass(AMDGPU::VReg_96RegClassID) ||
279            isRegClass(AMDGPU::VReg_128RegClassID) ||
280            isRegClass(AMDGPU::VReg_160RegClassID) ||
281            isRegClass(AMDGPU::VReg_192RegClassID) ||
282            isRegClass(AMDGPU::VReg_256RegClassID) ||
283            isRegClass(AMDGPU::VReg_512RegClassID) ||
284            isRegClass(AMDGPU::VReg_1024RegClassID);
285   }
286 
287   bool isVReg32() const {
288     return isRegClass(AMDGPU::VGPR_32RegClassID);
289   }
290 
291   bool isVReg32OrOff() const {
292     return isOff() || isVReg32();
293   }
294 
295   bool isNull() const {
296     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
297   }
298 
299   bool isSDWAOperand(MVT type) const;
300   bool isSDWAFP16Operand() const;
301   bool isSDWAFP32Operand() const;
302   bool isSDWAInt16Operand() const;
303   bool isSDWAInt32Operand() const;
304 
305   bool isImmTy(ImmTy ImmT) const {
306     return isImm() && Imm.Type == ImmT;
307   }
308 
309   bool isImmModifier() const {
310     return isImm() && Imm.Type != ImmTyNone;
311   }
312 
313   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
314   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
315   bool isDMask() const { return isImmTy(ImmTyDMask); }
316   bool isDim() const { return isImmTy(ImmTyDim); }
317   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
318   bool isDA() const { return isImmTy(ImmTyDA); }
319   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
320   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
321   bool isLWE() const { return isImmTy(ImmTyLWE); }
322   bool isOff() const { return isImmTy(ImmTyOff); }
323   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
324   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
325   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
326   bool isOffen() const { return isImmTy(ImmTyOffen); }
327   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
328   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
329   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
330   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
331   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
332 
333   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
334   bool isGDS() const { return isImmTy(ImmTyGDS); }
335   bool isLDS() const { return isImmTy(ImmTyLDS); }
336   bool isDLC() const { return isImmTy(ImmTyDLC); }
337   bool isGLC() const { return isImmTy(ImmTyGLC); }
338   bool isSLC() const { return isImmTy(ImmTySLC); }
339   bool isSWZ() const { return isImmTy(ImmTySWZ); }
340   bool isTFE() const { return isImmTy(ImmTyTFE); }
341   bool isD16() const { return isImmTy(ImmTyD16); }
342   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
343   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
344   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
345   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
346   bool isFI() const { return isImmTy(ImmTyDppFi); }
347   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
348   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
349   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
350   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
351   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
352   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
353   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
354   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
355   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
356   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
357   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
358   bool isHigh() const { return isImmTy(ImmTyHigh); }
359 
360   bool isMod() const {
361     return isClampSI() || isOModSI();
362   }
363 
364   bool isRegOrImm() const {
365     return isReg() || isImm();
366   }
367 
368   bool isRegClass(unsigned RCID) const;
369 
370   bool isInlineValue() const;
371 
372   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
373     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
374   }
375 
376   bool isSCSrcB16() const {
377     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
378   }
379 
380   bool isSCSrcV2B16() const {
381     return isSCSrcB16();
382   }
383 
384   bool isSCSrcB32() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
386   }
387 
388   bool isSCSrcB64() const {
389     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
390   }
391 
392   bool isBoolReg() const;
393 
394   bool isSCSrcF16() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
396   }
397 
398   bool isSCSrcV2F16() const {
399     return isSCSrcF16();
400   }
401 
402   bool isSCSrcF32() const {
403     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
404   }
405 
406   bool isSCSrcF64() const {
407     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
408   }
409 
410   bool isSSrcB32() const {
411     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
412   }
413 
414   bool isSSrcB16() const {
415     return isSCSrcB16() || isLiteralImm(MVT::i16);
416   }
417 
418   bool isSSrcV2B16() const {
419     llvm_unreachable("cannot happen");
420     return isSSrcB16();
421   }
422 
423   bool isSSrcB64() const {
424     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
425     // See isVSrc64().
426     return isSCSrcB64() || isLiteralImm(MVT::i64);
427   }
428 
429   bool isSSrcF32() const {
430     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
431   }
432 
433   bool isSSrcF64() const {
434     return isSCSrcB64() || isLiteralImm(MVT::f64);
435   }
436 
437   bool isSSrcF16() const {
438     return isSCSrcB16() || isLiteralImm(MVT::f16);
439   }
440 
441   bool isSSrcV2F16() const {
442     llvm_unreachable("cannot happen");
443     return isSSrcF16();
444   }
445 
446   bool isSSrcOrLdsB32() const {
447     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
448            isLiteralImm(MVT::i32) || isExpr();
449   }
450 
451   bool isVCSrcB32() const {
452     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
453   }
454 
455   bool isVCSrcB64() const {
456     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
457   }
458 
459   bool isVCSrcB16() const {
460     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
461   }
462 
463   bool isVCSrcV2B16() const {
464     return isVCSrcB16();
465   }
466 
467   bool isVCSrcF32() const {
468     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
469   }
470 
471   bool isVCSrcF64() const {
472     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
473   }
474 
475   bool isVCSrcF16() const {
476     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
477   }
478 
479   bool isVCSrcV2F16() const {
480     return isVCSrcF16();
481   }
482 
483   bool isVSrcB32() const {
484     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
485   }
486 
487   bool isVSrcB64() const {
488     return isVCSrcF64() || isLiteralImm(MVT::i64);
489   }
490 
491   bool isVSrcB16() const {
492     return isVCSrcB16() || isLiteralImm(MVT::i16);
493   }
494 
495   bool isVSrcV2B16() const {
496     return isVSrcB16() || isLiteralImm(MVT::v2i16);
497   }
498 
499   bool isVSrcF32() const {
500     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
501   }
502 
503   bool isVSrcF64() const {
504     return isVCSrcF64() || isLiteralImm(MVT::f64);
505   }
506 
507   bool isVSrcF16() const {
508     return isVCSrcF16() || isLiteralImm(MVT::f16);
509   }
510 
511   bool isVSrcV2F16() const {
512     return isVSrcF16() || isLiteralImm(MVT::v2f16);
513   }
514 
515   bool isVISrcB32() const {
516     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
517   }
518 
519   bool isVISrcB16() const {
520     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
521   }
522 
523   bool isVISrcV2B16() const {
524     return isVISrcB16();
525   }
526 
527   bool isVISrcF32() const {
528     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
529   }
530 
531   bool isVISrcF16() const {
532     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
533   }
534 
535   bool isVISrcV2F16() const {
536     return isVISrcF16() || isVISrcB32();
537   }
538 
539   bool isAISrcB32() const {
540     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
541   }
542 
543   bool isAISrcB16() const {
544     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
545   }
546 
547   bool isAISrcV2B16() const {
548     return isAISrcB16();
549   }
550 
551   bool isAISrcF32() const {
552     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
553   }
554 
555   bool isAISrcF16() const {
556     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
557   }
558 
559   bool isAISrcV2F16() const {
560     return isAISrcF16() || isAISrcB32();
561   }
562 
563   bool isAISrc_128B32() const {
564     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
565   }
566 
567   bool isAISrc_128B16() const {
568     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
569   }
570 
571   bool isAISrc_128V2B16() const {
572     return isAISrc_128B16();
573   }
574 
575   bool isAISrc_128F32() const {
576     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
577   }
578 
579   bool isAISrc_128F16() const {
580     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
581   }
582 
583   bool isAISrc_128V2F16() const {
584     return isAISrc_128F16() || isAISrc_128B32();
585   }
586 
587   bool isAISrc_512B32() const {
588     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
589   }
590 
591   bool isAISrc_512B16() const {
592     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
593   }
594 
595   bool isAISrc_512V2B16() const {
596     return isAISrc_512B16();
597   }
598 
599   bool isAISrc_512F32() const {
600     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
601   }
602 
603   bool isAISrc_512F16() const {
604     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
605   }
606 
607   bool isAISrc_512V2F16() const {
608     return isAISrc_512F16() || isAISrc_512B32();
609   }
610 
611   bool isAISrc_1024B32() const {
612     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
613   }
614 
615   bool isAISrc_1024B16() const {
616     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
617   }
618 
619   bool isAISrc_1024V2B16() const {
620     return isAISrc_1024B16();
621   }
622 
623   bool isAISrc_1024F32() const {
624     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
625   }
626 
627   bool isAISrc_1024F16() const {
628     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
629   }
630 
631   bool isAISrc_1024V2F16() const {
632     return isAISrc_1024F16() || isAISrc_1024B32();
633   }
634 
635   bool isKImmFP32() const {
636     return isLiteralImm(MVT::f32);
637   }
638 
639   bool isKImmFP16() const {
640     return isLiteralImm(MVT::f16);
641   }
642 
643   bool isMem() const override {
644     return false;
645   }
646 
647   bool isExpr() const {
648     return Kind == Expression;
649   }
650 
651   bool isSoppBrTarget() const {
652     return isExpr() || isImm();
653   }
654 
655   bool isSWaitCnt() const;
656   bool isHwreg() const;
657   bool isSendMsg() const;
658   bool isSwizzle() const;
659   bool isSMRDOffset8() const;
660   bool isSMEMOffset() const;
661   bool isSMRDLiteralOffset() const;
662   bool isDPP8() const;
663   bool isDPPCtrl() const;
664   bool isBLGP() const;
665   bool isCBSZ() const;
666   bool isABID() const;
667   bool isGPRIdxMode() const;
668   bool isS16Imm() const;
669   bool isU16Imm() const;
670   bool isEndpgm() const;
671 
672   StringRef getExpressionAsToken() const {
673     assert(isExpr());
674     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
675     return S->getSymbol().getName();
676   }
677 
678   StringRef getToken() const {
679     assert(isToken());
680 
681     if (Kind == Expression)
682       return getExpressionAsToken();
683 
684     return StringRef(Tok.Data, Tok.Length);
685   }
686 
687   int64_t getImm() const {
688     assert(isImm());
689     return Imm.Val;
690   }
691 
692   void setImm(int64_t Val) {
693     assert(isImm());
694     Imm.Val = Val;
695   }
696 
697   ImmTy getImmTy() const {
698     assert(isImm());
699     return Imm.Type;
700   }
701 
702   unsigned getReg() const override {
703     assert(isRegKind());
704     return Reg.RegNo;
705   }
706 
707   SMLoc getStartLoc() const override {
708     return StartLoc;
709   }
710 
711   SMLoc getEndLoc() const override {
712     return EndLoc;
713   }
714 
715   SMRange getLocRange() const {
716     return SMRange(StartLoc, EndLoc);
717   }
718 
719   Modifiers getModifiers() const {
720     assert(isRegKind() || isImmTy(ImmTyNone));
721     return isRegKind() ? Reg.Mods : Imm.Mods;
722   }
723 
724   void setModifiers(Modifiers Mods) {
725     assert(isRegKind() || isImmTy(ImmTyNone));
726     if (isRegKind())
727       Reg.Mods = Mods;
728     else
729       Imm.Mods = Mods;
730   }
731 
732   bool hasModifiers() const {
733     return getModifiers().hasModifiers();
734   }
735 
736   bool hasFPModifiers() const {
737     return getModifiers().hasFPModifiers();
738   }
739 
740   bool hasIntModifiers() const {
741     return getModifiers().hasIntModifiers();
742   }
743 
744   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
745 
746   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
747 
748   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
749 
750   template <unsigned Bitwidth>
751   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
752 
753   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
754     addKImmFPOperands<16>(Inst, N);
755   }
756 
757   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
758     addKImmFPOperands<32>(Inst, N);
759   }
760 
761   void addRegOperands(MCInst &Inst, unsigned N) const;
762 
763   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
764     addRegOperands(Inst, N);
765   }
766 
767   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
768     if (isRegKind())
769       addRegOperands(Inst, N);
770     else if (isExpr())
771       Inst.addOperand(MCOperand::createExpr(Expr));
772     else
773       addImmOperands(Inst, N);
774   }
775 
776   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
777     Modifiers Mods = getModifiers();
778     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
779     if (isRegKind()) {
780       addRegOperands(Inst, N);
781     } else {
782       addImmOperands(Inst, N, false);
783     }
784   }
785 
786   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
787     assert(!hasIntModifiers());
788     addRegOrImmWithInputModsOperands(Inst, N);
789   }
790 
791   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
792     assert(!hasFPModifiers());
793     addRegOrImmWithInputModsOperands(Inst, N);
794   }
795 
796   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
797     Modifiers Mods = getModifiers();
798     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
799     assert(isRegKind());
800     addRegOperands(Inst, N);
801   }
802 
803   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
804     assert(!hasIntModifiers());
805     addRegWithInputModsOperands(Inst, N);
806   }
807 
808   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
809     assert(!hasFPModifiers());
810     addRegWithInputModsOperands(Inst, N);
811   }
812 
813   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
814     if (isImm())
815       addImmOperands(Inst, N);
816     else {
817       assert(isExpr());
818       Inst.addOperand(MCOperand::createExpr(Expr));
819     }
820   }
821 
822   static void printImmTy(raw_ostream& OS, ImmTy Type) {
823     switch (Type) {
824     case ImmTyNone: OS << "None"; break;
825     case ImmTyGDS: OS << "GDS"; break;
826     case ImmTyLDS: OS << "LDS"; break;
827     case ImmTyOffen: OS << "Offen"; break;
828     case ImmTyIdxen: OS << "Idxen"; break;
829     case ImmTyAddr64: OS << "Addr64"; break;
830     case ImmTyOffset: OS << "Offset"; break;
831     case ImmTyInstOffset: OS << "InstOffset"; break;
832     case ImmTyOffset0: OS << "Offset0"; break;
833     case ImmTyOffset1: OS << "Offset1"; break;
834     case ImmTyDLC: OS << "DLC"; break;
835     case ImmTyGLC: OS << "GLC"; break;
836     case ImmTySLC: OS << "SLC"; break;
837     case ImmTySWZ: OS << "SWZ"; break;
838     case ImmTyTFE: OS << "TFE"; break;
839     case ImmTyD16: OS << "D16"; break;
840     case ImmTyFORMAT: OS << "FORMAT"; break;
841     case ImmTyClampSI: OS << "ClampSI"; break;
842     case ImmTyOModSI: OS << "OModSI"; break;
843     case ImmTyDPP8: OS << "DPP8"; break;
844     case ImmTyDppCtrl: OS << "DppCtrl"; break;
845     case ImmTyDppRowMask: OS << "DppRowMask"; break;
846     case ImmTyDppBankMask: OS << "DppBankMask"; break;
847     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
848     case ImmTyDppFi: OS << "FI"; break;
849     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
850     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
851     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
852     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
853     case ImmTyDMask: OS << "DMask"; break;
854     case ImmTyDim: OS << "Dim"; break;
855     case ImmTyUNorm: OS << "UNorm"; break;
856     case ImmTyDA: OS << "DA"; break;
857     case ImmTyR128A16: OS << "R128A16"; break;
858     case ImmTyA16: OS << "A16"; break;
859     case ImmTyLWE: OS << "LWE"; break;
860     case ImmTyOff: OS << "Off"; break;
861     case ImmTyExpTgt: OS << "ExpTgt"; break;
862     case ImmTyExpCompr: OS << "ExpCompr"; break;
863     case ImmTyExpVM: OS << "ExpVM"; break;
864     case ImmTyHwreg: OS << "Hwreg"; break;
865     case ImmTySendMsg: OS << "SendMsg"; break;
866     case ImmTyInterpSlot: OS << "InterpSlot"; break;
867     case ImmTyInterpAttr: OS << "InterpAttr"; break;
868     case ImmTyAttrChan: OS << "AttrChan"; break;
869     case ImmTyOpSel: OS << "OpSel"; break;
870     case ImmTyOpSelHi: OS << "OpSelHi"; break;
871     case ImmTyNegLo: OS << "NegLo"; break;
872     case ImmTyNegHi: OS << "NegHi"; break;
873     case ImmTySwizzle: OS << "Swizzle"; break;
874     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
875     case ImmTyHigh: OS << "High"; break;
876     case ImmTyBLGP: OS << "BLGP"; break;
877     case ImmTyCBSZ: OS << "CBSZ"; break;
878     case ImmTyABID: OS << "ABID"; break;
879     case ImmTyEndpgm: OS << "Endpgm"; break;
880     }
881   }
882 
883   void print(raw_ostream &OS) const override {
884     switch (Kind) {
885     case Register:
886       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
887       break;
888     case Immediate:
889       OS << '<' << getImm();
890       if (getImmTy() != ImmTyNone) {
891         OS << " type: "; printImmTy(OS, getImmTy());
892       }
893       OS << " mods: " << Imm.Mods << '>';
894       break;
895     case Token:
896       OS << '\'' << getToken() << '\'';
897       break;
898     case Expression:
899       OS << "<expr " << *Expr << '>';
900       break;
901     }
902   }
903 
904   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
905                                       int64_t Val, SMLoc Loc,
906                                       ImmTy Type = ImmTyNone,
907                                       bool IsFPImm = false) {
908     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
909     Op->Imm.Val = Val;
910     Op->Imm.IsFPImm = IsFPImm;
911     Op->Imm.Type = Type;
912     Op->Imm.Mods = Modifiers();
913     Op->StartLoc = Loc;
914     Op->EndLoc = Loc;
915     return Op;
916   }
917 
918   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
919                                         StringRef Str, SMLoc Loc,
920                                         bool HasExplicitEncodingSize = true) {
921     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
922     Res->Tok.Data = Str.data();
923     Res->Tok.Length = Str.size();
924     Res->StartLoc = Loc;
925     Res->EndLoc = Loc;
926     return Res;
927   }
928 
929   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
930                                       unsigned RegNo, SMLoc S,
931                                       SMLoc E) {
932     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
933     Op->Reg.RegNo = RegNo;
934     Op->Reg.Mods = Modifiers();
935     Op->StartLoc = S;
936     Op->EndLoc = E;
937     return Op;
938   }
939 
940   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
941                                        const class MCExpr *Expr, SMLoc S) {
942     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
943     Op->Expr = Expr;
944     Op->StartLoc = S;
945     Op->EndLoc = S;
946     return Op;
947   }
948 };
949 
950 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
951   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
952   return OS;
953 }
954 
955 //===----------------------------------------------------------------------===//
956 // AsmParser
957 //===----------------------------------------------------------------------===//
958 
959 // Holds info related to the current kernel, e.g. count of SGPRs used.
960 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
961 // .amdgpu_hsa_kernel or at EOF.
962 class KernelScopeInfo {
963   int SgprIndexUnusedMin = -1;
964   int VgprIndexUnusedMin = -1;
965   MCContext *Ctx = nullptr;
966 
967   void usesSgprAt(int i) {
968     if (i >= SgprIndexUnusedMin) {
969       SgprIndexUnusedMin = ++i;
970       if (Ctx) {
971         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
972         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
973       }
974     }
975   }
976 
977   void usesVgprAt(int i) {
978     if (i >= VgprIndexUnusedMin) {
979       VgprIndexUnusedMin = ++i;
980       if (Ctx) {
981         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
982         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
983       }
984     }
985   }
986 
987 public:
988   KernelScopeInfo() = default;
989 
990   void initialize(MCContext &Context) {
991     Ctx = &Context;
992     usesSgprAt(SgprIndexUnusedMin = -1);
993     usesVgprAt(VgprIndexUnusedMin = -1);
994   }
995 
996   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
997     switch (RegKind) {
998       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
999       case IS_AGPR: // fall through
1000       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1001       default: break;
1002     }
1003   }
1004 };
1005 
1006 class AMDGPUAsmParser : public MCTargetAsmParser {
1007   MCAsmParser &Parser;
1008 
1009   // Number of extra operands parsed after the first optional operand.
1010   // This may be necessary to skip hardcoded mandatory operands.
1011   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1012 
1013   unsigned ForcedEncodingSize = 0;
1014   bool ForcedDPP = false;
1015   bool ForcedSDWA = false;
1016   KernelScopeInfo KernelScope;
1017 
1018   /// @name Auto-generated Match Functions
1019   /// {
1020 
1021 #define GET_ASSEMBLER_HEADER
1022 #include "AMDGPUGenAsmMatcher.inc"
1023 
1024   /// }
1025 
1026 private:
1027   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1028   bool OutOfRangeError(SMRange Range);
1029   /// Calculate VGPR/SGPR blocks required for given target, reserved
1030   /// registers, and user-specified NextFreeXGPR values.
1031   ///
1032   /// \param Features [in] Target features, used for bug corrections.
1033   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1034   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1035   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1036   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1037   /// descriptor field, if valid.
1038   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1039   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1040   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1041   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1042   /// \param VGPRBlocks [out] Result VGPR block count.
1043   /// \param SGPRBlocks [out] Result SGPR block count.
1044   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1045                           bool FlatScrUsed, bool XNACKUsed,
1046                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1047                           SMRange VGPRRange, unsigned NextFreeSGPR,
1048                           SMRange SGPRRange, unsigned &VGPRBlocks,
1049                           unsigned &SGPRBlocks);
1050   bool ParseDirectiveAMDGCNTarget();
1051   bool ParseDirectiveAMDHSAKernel();
1052   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1053   bool ParseDirectiveHSACodeObjectVersion();
1054   bool ParseDirectiveHSACodeObjectISA();
1055   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1056   bool ParseDirectiveAMDKernelCodeT();
1057   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1058   bool ParseDirectiveAMDGPUHsaKernel();
1059 
1060   bool ParseDirectiveISAVersion();
1061   bool ParseDirectiveHSAMetadata();
1062   bool ParseDirectivePALMetadataBegin();
1063   bool ParseDirectivePALMetadata();
1064   bool ParseDirectiveAMDGPULDS();
1065 
1066   /// Common code to parse out a block of text (typically YAML) between start and
1067   /// end directives.
1068   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1069                            const char *AssemblerDirectiveEnd,
1070                            std::string &CollectString);
1071 
1072   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1073                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1074   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1075                            unsigned &RegNum, unsigned &RegWidth,
1076                            bool RestoreOnFailure = false);
1077   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1078                            unsigned &RegNum, unsigned &RegWidth,
1079                            SmallVectorImpl<AsmToken> &Tokens);
1080   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1081                            unsigned &RegWidth,
1082                            SmallVectorImpl<AsmToken> &Tokens);
1083   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1084                            unsigned &RegWidth,
1085                            SmallVectorImpl<AsmToken> &Tokens);
1086   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1087                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1088   bool ParseRegRange(unsigned& Num, unsigned& Width);
1089   unsigned getRegularReg(RegisterKind RegKind,
1090                          unsigned RegNum,
1091                          unsigned RegWidth,
1092                          SMLoc Loc);
1093 
1094   bool isRegister();
1095   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1096   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1097   void initializeGprCountSymbol(RegisterKind RegKind);
1098   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1099                              unsigned RegWidth);
1100   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1101                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1102   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1103                  bool IsGdsHardcoded);
1104 
1105 public:
1106   enum AMDGPUMatchResultTy {
1107     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1108   };
1109   enum OperandMode {
1110     OperandMode_Default,
1111     OperandMode_NSA,
1112   };
1113 
1114   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1115 
1116   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1117                const MCInstrInfo &MII,
1118                const MCTargetOptions &Options)
1119       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1120     MCAsmParserExtension::Initialize(Parser);
1121 
1122     if (getFeatureBits().none()) {
1123       // Set default features.
1124       copySTI().ToggleFeature("southern-islands");
1125     }
1126 
1127     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1128 
1129     {
1130       // TODO: make those pre-defined variables read-only.
1131       // Currently there is none suitable machinery in the core llvm-mc for this.
1132       // MCSymbol::isRedefinable is intended for another purpose, and
1133       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1134       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1135       MCContext &Ctx = getContext();
1136       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1137         MCSymbol *Sym =
1138             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1139         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1140         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1141         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1142         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1143         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1144       } else {
1145         MCSymbol *Sym =
1146             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1147         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1148         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1149         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1150         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1151         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1152       }
1153       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1154         initializeGprCountSymbol(IS_VGPR);
1155         initializeGprCountSymbol(IS_SGPR);
1156       } else
1157         KernelScope.initialize(getContext());
1158     }
1159   }
1160 
1161   bool hasXNACK() const {
1162     return AMDGPU::hasXNACK(getSTI());
1163   }
1164 
1165   bool hasMIMG_R128() const {
1166     return AMDGPU::hasMIMG_R128(getSTI());
1167   }
1168 
1169   bool hasPackedD16() const {
1170     return AMDGPU::hasPackedD16(getSTI());
1171   }
1172 
1173   bool hasGFX10A16() const {
1174     return AMDGPU::hasGFX10A16(getSTI());
1175   }
1176 
1177   bool isSI() const {
1178     return AMDGPU::isSI(getSTI());
1179   }
1180 
1181   bool isCI() const {
1182     return AMDGPU::isCI(getSTI());
1183   }
1184 
1185   bool isVI() const {
1186     return AMDGPU::isVI(getSTI());
1187   }
1188 
1189   bool isGFX9() const {
1190     return AMDGPU::isGFX9(getSTI());
1191   }
1192 
1193   bool isGFX10() const {
1194     return AMDGPU::isGFX10(getSTI());
1195   }
1196 
1197   bool isGFX10_BEncoding() const {
1198     return AMDGPU::isGFX10_BEncoding(getSTI());
1199   }
1200 
1201   bool hasInv2PiInlineImm() const {
1202     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1203   }
1204 
1205   bool hasFlatOffsets() const {
1206     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1207   }
1208 
1209   bool hasSGPR102_SGPR103() const {
1210     return !isVI() && !isGFX9();
1211   }
1212 
1213   bool hasSGPR104_SGPR105() const {
1214     return isGFX10();
1215   }
1216 
1217   bool hasIntClamp() const {
1218     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1219   }
1220 
1221   AMDGPUTargetStreamer &getTargetStreamer() {
1222     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1223     return static_cast<AMDGPUTargetStreamer &>(TS);
1224   }
1225 
1226   const MCRegisterInfo *getMRI() const {
1227     // We need this const_cast because for some reason getContext() is not const
1228     // in MCAsmParser.
1229     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1230   }
1231 
1232   const MCInstrInfo *getMII() const {
1233     return &MII;
1234   }
1235 
1236   const FeatureBitset &getFeatureBits() const {
1237     return getSTI().getFeatureBits();
1238   }
1239 
1240   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1241   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1242   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1243 
1244   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1245   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1246   bool isForcedDPP() const { return ForcedDPP; }
1247   bool isForcedSDWA() const { return ForcedSDWA; }
1248   ArrayRef<unsigned> getMatchedVariants() const;
1249 
1250   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1251   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1252                      bool RestoreOnFailure);
1253   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1254   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1255                                         SMLoc &EndLoc) override;
1256   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1257   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1258                                       unsigned Kind) override;
1259   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1260                                OperandVector &Operands, MCStreamer &Out,
1261                                uint64_t &ErrorInfo,
1262                                bool MatchingInlineAsm) override;
1263   bool ParseDirective(AsmToken DirectiveID) override;
1264   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1265                                     OperandMode Mode = OperandMode_Default);
1266   StringRef parseMnemonicSuffix(StringRef Name);
1267   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1268                         SMLoc NameLoc, OperandVector &Operands) override;
1269   //bool ProcessInstruction(MCInst &Inst);
1270 
1271   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1272 
1273   OperandMatchResultTy
1274   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1275                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1276                      bool (*ConvertResult)(int64_t &) = nullptr);
1277 
1278   OperandMatchResultTy
1279   parseOperandArrayWithPrefix(const char *Prefix,
1280                               OperandVector &Operands,
1281                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1282                               bool (*ConvertResult)(int64_t&) = nullptr);
1283 
1284   OperandMatchResultTy
1285   parseNamedBit(const char *Name, OperandVector &Operands,
1286                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1287   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1288                                              StringRef &Value);
1289 
1290   bool isModifier();
1291   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1292   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1293   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1294   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1295   bool parseSP3NegModifier();
1296   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1297   OperandMatchResultTy parseReg(OperandVector &Operands);
1298   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1299   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1300   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1301   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1302   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1303   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1304   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1305   OperandMatchResultTy parseUfmt(int64_t &Format);
1306   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1307   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1308   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1309   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1310   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1311   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1312   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1313 
1314   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1315   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1316   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1317   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1318 
1319   bool parseCnt(int64_t &IntVal);
1320   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1321   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1322 
1323 private:
1324   struct OperandInfoTy {
1325     int64_t Id;
1326     bool IsSymbolic = false;
1327     bool IsDefined = false;
1328 
1329     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1330   };
1331 
1332   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1333   bool validateSendMsg(const OperandInfoTy &Msg,
1334                        const OperandInfoTy &Op,
1335                        const OperandInfoTy &Stream,
1336                        const SMLoc Loc);
1337 
1338   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1339   bool validateHwreg(const OperandInfoTy &HwReg,
1340                      const int64_t Offset,
1341                      const int64_t Width,
1342                      const SMLoc Loc);
1343 
1344   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1345   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1346   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1347 
1348   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1349   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1350   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1351   bool validateSOPLiteral(const MCInst &Inst) const;
1352   bool validateConstantBusLimitations(const MCInst &Inst);
1353   bool validateEarlyClobberLimitations(const MCInst &Inst);
1354   bool validateIntClampSupported(const MCInst &Inst);
1355   bool validateMIMGAtomicDMask(const MCInst &Inst);
1356   bool validateMIMGGatherDMask(const MCInst &Inst);
1357   bool validateMovrels(const MCInst &Inst);
1358   bool validateMIMGDataSize(const MCInst &Inst);
1359   bool validateMIMGAddrSize(const MCInst &Inst);
1360   bool validateMIMGD16(const MCInst &Inst);
1361   bool validateMIMGDim(const MCInst &Inst);
1362   bool validateLdsDirect(const MCInst &Inst);
1363   bool validateOpSel(const MCInst &Inst);
1364   bool validateVccOperand(unsigned Reg) const;
1365   bool validateVOP3Literal(const MCInst &Inst) const;
1366   bool validateMAIAccWrite(const MCInst &Inst);
1367   unsigned getConstantBusLimit(unsigned Opcode) const;
1368   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1369   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1370   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1371 
1372   bool isId(const StringRef Id) const;
1373   bool isId(const AsmToken &Token, const StringRef Id) const;
1374   bool isToken(const AsmToken::TokenKind Kind) const;
1375   bool trySkipId(const StringRef Id);
1376   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1377   bool trySkipToken(const AsmToken::TokenKind Kind);
1378   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1379   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1380   bool parseId(StringRef &Val, const StringRef ErrMsg);
1381 
1382   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1383   AsmToken::TokenKind getTokenKind() const;
1384   bool parseExpr(int64_t &Imm);
1385   bool parseExpr(OperandVector &Operands);
1386   StringRef getTokenStr() const;
1387   AsmToken peekToken();
1388   AsmToken getToken() const;
1389   SMLoc getLoc() const;
1390   void lex();
1391 
1392 public:
1393   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1394   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1395 
1396   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1397   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1398   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1399   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1400   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1401   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1402 
1403   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1404                             const unsigned MinVal,
1405                             const unsigned MaxVal,
1406                             const StringRef ErrMsg);
1407   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1408   bool parseSwizzleOffset(int64_t &Imm);
1409   bool parseSwizzleMacro(int64_t &Imm);
1410   bool parseSwizzleQuadPerm(int64_t &Imm);
1411   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1412   bool parseSwizzleBroadcast(int64_t &Imm);
1413   bool parseSwizzleSwap(int64_t &Imm);
1414   bool parseSwizzleReverse(int64_t &Imm);
1415 
1416   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1417   int64_t parseGPRIdxMacro();
1418 
1419   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1420   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1421   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1422   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1423   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1424 
1425   AMDGPUOperand::Ptr defaultDLC() const;
1426   AMDGPUOperand::Ptr defaultGLC() const;
1427   AMDGPUOperand::Ptr defaultSLC() const;
1428 
1429   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1430   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1431   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1432   AMDGPUOperand::Ptr defaultFlatOffset() const;
1433 
1434   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1435 
1436   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1437                OptionalImmIndexMap &OptionalIdx);
1438   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1439   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1440   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1441 
1442   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1443 
1444   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1445                bool IsAtomic = false);
1446   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1447   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1448 
1449   OperandMatchResultTy parseDim(OperandVector &Operands);
1450   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1451   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1452   AMDGPUOperand::Ptr defaultRowMask() const;
1453   AMDGPUOperand::Ptr defaultBankMask() const;
1454   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1455   AMDGPUOperand::Ptr defaultFI() const;
1456   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1457   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1458 
1459   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1460                                     AMDGPUOperand::ImmTy Type);
1461   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1462   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1463   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1464   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1465   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1466   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1467   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1468                uint64_t BasicInstType,
1469                bool SkipDstVcc = false,
1470                bool SkipSrcVcc = false);
1471 
1472   AMDGPUOperand::Ptr defaultBLGP() const;
1473   AMDGPUOperand::Ptr defaultCBSZ() const;
1474   AMDGPUOperand::Ptr defaultABID() const;
1475 
1476   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1477   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1478 };
1479 
1480 struct OptionalOperand {
1481   const char *Name;
1482   AMDGPUOperand::ImmTy Type;
1483   bool IsBit;
1484   bool (*ConvertResult)(int64_t&);
1485 };
1486 
1487 } // end anonymous namespace
1488 
1489 // May be called with integer type with equivalent bitwidth.
1490 static const fltSemantics *getFltSemantics(unsigned Size) {
1491   switch (Size) {
1492   case 4:
1493     return &APFloat::IEEEsingle();
1494   case 8:
1495     return &APFloat::IEEEdouble();
1496   case 2:
1497     return &APFloat::IEEEhalf();
1498   default:
1499     llvm_unreachable("unsupported fp type");
1500   }
1501 }
1502 
1503 static const fltSemantics *getFltSemantics(MVT VT) {
1504   return getFltSemantics(VT.getSizeInBits() / 8);
1505 }
1506 
1507 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1508   switch (OperandType) {
1509   case AMDGPU::OPERAND_REG_IMM_INT32:
1510   case AMDGPU::OPERAND_REG_IMM_FP32:
1511   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1512   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1513   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1514   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1515     return &APFloat::IEEEsingle();
1516   case AMDGPU::OPERAND_REG_IMM_INT64:
1517   case AMDGPU::OPERAND_REG_IMM_FP64:
1518   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1519   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1520     return &APFloat::IEEEdouble();
1521   case AMDGPU::OPERAND_REG_IMM_INT16:
1522   case AMDGPU::OPERAND_REG_IMM_FP16:
1523   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1524   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1525   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1526   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1527   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1528   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1529   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1530   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1531   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1532   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1533     return &APFloat::IEEEhalf();
1534   default:
1535     llvm_unreachable("unsupported fp type");
1536   }
1537 }
1538 
1539 //===----------------------------------------------------------------------===//
1540 // Operand
1541 //===----------------------------------------------------------------------===//
1542 
1543 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1544   bool Lost;
1545 
1546   // Convert literal to single precision
1547   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1548                                                APFloat::rmNearestTiesToEven,
1549                                                &Lost);
1550   // We allow precision lost but not overflow or underflow
1551   if (Status != APFloat::opOK &&
1552       Lost &&
1553       ((Status & APFloat::opOverflow)  != 0 ||
1554        (Status & APFloat::opUnderflow) != 0)) {
1555     return false;
1556   }
1557 
1558   return true;
1559 }
1560 
1561 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1562   return isUIntN(Size, Val) || isIntN(Size, Val);
1563 }
1564 
1565 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1566   if (VT.getScalarType() == MVT::i16) {
1567     // FP immediate values are broken.
1568     return isInlinableIntLiteral(Val);
1569   }
1570 
1571   // f16/v2f16 operands work correctly for all values.
1572   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1573 }
1574 
1575 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1576 
1577   // This is a hack to enable named inline values like
1578   // shared_base with both 32-bit and 64-bit operands.
1579   // Note that these values are defined as
1580   // 32-bit operands only.
1581   if (isInlineValue()) {
1582     return true;
1583   }
1584 
1585   if (!isImmTy(ImmTyNone)) {
1586     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1587     return false;
1588   }
1589   // TODO: We should avoid using host float here. It would be better to
1590   // check the float bit values which is what a few other places do.
1591   // We've had bot failures before due to weird NaN support on mips hosts.
1592 
1593   APInt Literal(64, Imm.Val);
1594 
1595   if (Imm.IsFPImm) { // We got fp literal token
1596     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1597       return AMDGPU::isInlinableLiteral64(Imm.Val,
1598                                           AsmParser->hasInv2PiInlineImm());
1599     }
1600 
1601     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1602     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1603       return false;
1604 
1605     if (type.getScalarSizeInBits() == 16) {
1606       return isInlineableLiteralOp16(
1607         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1608         type, AsmParser->hasInv2PiInlineImm());
1609     }
1610 
1611     // Check if single precision literal is inlinable
1612     return AMDGPU::isInlinableLiteral32(
1613       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1614       AsmParser->hasInv2PiInlineImm());
1615   }
1616 
1617   // We got int literal token.
1618   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1619     return AMDGPU::isInlinableLiteral64(Imm.Val,
1620                                         AsmParser->hasInv2PiInlineImm());
1621   }
1622 
1623   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1624     return false;
1625   }
1626 
1627   if (type.getScalarSizeInBits() == 16) {
1628     return isInlineableLiteralOp16(
1629       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1630       type, AsmParser->hasInv2PiInlineImm());
1631   }
1632 
1633   return AMDGPU::isInlinableLiteral32(
1634     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1635     AsmParser->hasInv2PiInlineImm());
1636 }
1637 
1638 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1639   // Check that this immediate can be added as literal
1640   if (!isImmTy(ImmTyNone)) {
1641     return false;
1642   }
1643 
1644   if (!Imm.IsFPImm) {
1645     // We got int literal token.
1646 
1647     if (type == MVT::f64 && hasFPModifiers()) {
1648       // Cannot apply fp modifiers to int literals preserving the same semantics
1649       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1650       // disable these cases.
1651       return false;
1652     }
1653 
1654     unsigned Size = type.getSizeInBits();
1655     if (Size == 64)
1656       Size = 32;
1657 
1658     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1659     // types.
1660     return isSafeTruncation(Imm.Val, Size);
1661   }
1662 
1663   // We got fp literal token
1664   if (type == MVT::f64) { // Expected 64-bit fp operand
1665     // We would set low 64-bits of literal to zeroes but we accept this literals
1666     return true;
1667   }
1668 
1669   if (type == MVT::i64) { // Expected 64-bit int operand
1670     // We don't allow fp literals in 64-bit integer instructions. It is
1671     // unclear how we should encode them.
1672     return false;
1673   }
1674 
1675   // We allow fp literals with f16x2 operands assuming that the specified
1676   // literal goes into the lower half and the upper half is zero. We also
1677   // require that the literal may be losslesly converted to f16.
1678   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1679                      (type == MVT::v2i16)? MVT::i16 : type;
1680 
1681   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1682   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1683 }
1684 
1685 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1686   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1687 }
1688 
1689 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1690   if (AsmParser->isVI())
1691     return isVReg32();
1692   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1693     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1694   else
1695     return false;
1696 }
1697 
1698 bool AMDGPUOperand::isSDWAFP16Operand() const {
1699   return isSDWAOperand(MVT::f16);
1700 }
1701 
1702 bool AMDGPUOperand::isSDWAFP32Operand() const {
1703   return isSDWAOperand(MVT::f32);
1704 }
1705 
1706 bool AMDGPUOperand::isSDWAInt16Operand() const {
1707   return isSDWAOperand(MVT::i16);
1708 }
1709 
1710 bool AMDGPUOperand::isSDWAInt32Operand() const {
1711   return isSDWAOperand(MVT::i32);
1712 }
1713 
1714 bool AMDGPUOperand::isBoolReg() const {
1715   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1716          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1717 }
1718 
1719 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1720 {
1721   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1722   assert(Size == 2 || Size == 4 || Size == 8);
1723 
1724   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1725 
1726   if (Imm.Mods.Abs) {
1727     Val &= ~FpSignMask;
1728   }
1729   if (Imm.Mods.Neg) {
1730     Val ^= FpSignMask;
1731   }
1732 
1733   return Val;
1734 }
1735 
1736 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1737   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1738                              Inst.getNumOperands())) {
1739     addLiteralImmOperand(Inst, Imm.Val,
1740                          ApplyModifiers &
1741                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1742   } else {
1743     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1744     Inst.addOperand(MCOperand::createImm(Imm.Val));
1745   }
1746 }
1747 
1748 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1749   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1750   auto OpNum = Inst.getNumOperands();
1751   // Check that this operand accepts literals
1752   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1753 
1754   if (ApplyModifiers) {
1755     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1756     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1757     Val = applyInputFPModifiers(Val, Size);
1758   }
1759 
1760   APInt Literal(64, Val);
1761   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1762 
1763   if (Imm.IsFPImm) { // We got fp literal token
1764     switch (OpTy) {
1765     case AMDGPU::OPERAND_REG_IMM_INT64:
1766     case AMDGPU::OPERAND_REG_IMM_FP64:
1767     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1768     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1769       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1770                                        AsmParser->hasInv2PiInlineImm())) {
1771         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1772         return;
1773       }
1774 
1775       // Non-inlineable
1776       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1777         // For fp operands we check if low 32 bits are zeros
1778         if (Literal.getLoBits(32) != 0) {
1779           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1780           "Can't encode literal as exact 64-bit floating-point operand. "
1781           "Low 32-bits will be set to zero");
1782         }
1783 
1784         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1785         return;
1786       }
1787 
1788       // We don't allow fp literals in 64-bit integer instructions. It is
1789       // unclear how we should encode them. This case should be checked earlier
1790       // in predicate methods (isLiteralImm())
1791       llvm_unreachable("fp literal in 64-bit integer instruction.");
1792 
1793     case AMDGPU::OPERAND_REG_IMM_INT32:
1794     case AMDGPU::OPERAND_REG_IMM_FP32:
1795     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1796     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1797     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1798     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1799     case AMDGPU::OPERAND_REG_IMM_INT16:
1800     case AMDGPU::OPERAND_REG_IMM_FP16:
1801     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1802     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1803     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1804     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1805     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1806     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1807     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1808     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1809     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1810     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1811       bool lost;
1812       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1813       // Convert literal to single precision
1814       FPLiteral.convert(*getOpFltSemantics(OpTy),
1815                         APFloat::rmNearestTiesToEven, &lost);
1816       // We allow precision lost but not overflow or underflow. This should be
1817       // checked earlier in isLiteralImm()
1818 
1819       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1820       Inst.addOperand(MCOperand::createImm(ImmVal));
1821       return;
1822     }
1823     default:
1824       llvm_unreachable("invalid operand size");
1825     }
1826 
1827     return;
1828   }
1829 
1830   // We got int literal token.
1831   // Only sign extend inline immediates.
1832   switch (OpTy) {
1833   case AMDGPU::OPERAND_REG_IMM_INT32:
1834   case AMDGPU::OPERAND_REG_IMM_FP32:
1835   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1836   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1837   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1838   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1839   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1840   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1841     if (isSafeTruncation(Val, 32) &&
1842         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1843                                      AsmParser->hasInv2PiInlineImm())) {
1844       Inst.addOperand(MCOperand::createImm(Val));
1845       return;
1846     }
1847 
1848     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1849     return;
1850 
1851   case AMDGPU::OPERAND_REG_IMM_INT64:
1852   case AMDGPU::OPERAND_REG_IMM_FP64:
1853   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1854   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1855     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1856       Inst.addOperand(MCOperand::createImm(Val));
1857       return;
1858     }
1859 
1860     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1861     return;
1862 
1863   case AMDGPU::OPERAND_REG_IMM_INT16:
1864   case AMDGPU::OPERAND_REG_IMM_FP16:
1865   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1866   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1867   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1868   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1869     if (isSafeTruncation(Val, 16) &&
1870         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1871                                      AsmParser->hasInv2PiInlineImm())) {
1872       Inst.addOperand(MCOperand::createImm(Val));
1873       return;
1874     }
1875 
1876     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1877     return;
1878 
1879   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1880   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1881   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1882   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1883     assert(isSafeTruncation(Val, 16));
1884     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1885                                         AsmParser->hasInv2PiInlineImm()));
1886 
1887     Inst.addOperand(MCOperand::createImm(Val));
1888     return;
1889   }
1890   default:
1891     llvm_unreachable("invalid operand size");
1892   }
1893 }
1894 
1895 template <unsigned Bitwidth>
1896 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1897   APInt Literal(64, Imm.Val);
1898 
1899   if (!Imm.IsFPImm) {
1900     // We got int literal token.
1901     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1902     return;
1903   }
1904 
1905   bool Lost;
1906   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1907   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1908                     APFloat::rmNearestTiesToEven, &Lost);
1909   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1910 }
1911 
1912 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1913   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1914 }
1915 
1916 static bool isInlineValue(unsigned Reg) {
1917   switch (Reg) {
1918   case AMDGPU::SRC_SHARED_BASE:
1919   case AMDGPU::SRC_SHARED_LIMIT:
1920   case AMDGPU::SRC_PRIVATE_BASE:
1921   case AMDGPU::SRC_PRIVATE_LIMIT:
1922   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1923     return true;
1924   case AMDGPU::SRC_VCCZ:
1925   case AMDGPU::SRC_EXECZ:
1926   case AMDGPU::SRC_SCC:
1927     return true;
1928   case AMDGPU::SGPR_NULL:
1929     return true;
1930   default:
1931     return false;
1932   }
1933 }
1934 
1935 bool AMDGPUOperand::isInlineValue() const {
1936   return isRegKind() && ::isInlineValue(getReg());
1937 }
1938 
1939 //===----------------------------------------------------------------------===//
1940 // AsmParser
1941 //===----------------------------------------------------------------------===//
1942 
1943 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1944   if (Is == IS_VGPR) {
1945     switch (RegWidth) {
1946       default: return -1;
1947       case 1: return AMDGPU::VGPR_32RegClassID;
1948       case 2: return AMDGPU::VReg_64RegClassID;
1949       case 3: return AMDGPU::VReg_96RegClassID;
1950       case 4: return AMDGPU::VReg_128RegClassID;
1951       case 5: return AMDGPU::VReg_160RegClassID;
1952       case 6: return AMDGPU::VReg_192RegClassID;
1953       case 8: return AMDGPU::VReg_256RegClassID;
1954       case 16: return AMDGPU::VReg_512RegClassID;
1955       case 32: return AMDGPU::VReg_1024RegClassID;
1956     }
1957   } else if (Is == IS_TTMP) {
1958     switch (RegWidth) {
1959       default: return -1;
1960       case 1: return AMDGPU::TTMP_32RegClassID;
1961       case 2: return AMDGPU::TTMP_64RegClassID;
1962       case 4: return AMDGPU::TTMP_128RegClassID;
1963       case 8: return AMDGPU::TTMP_256RegClassID;
1964       case 16: return AMDGPU::TTMP_512RegClassID;
1965     }
1966   } else if (Is == IS_SGPR) {
1967     switch (RegWidth) {
1968       default: return -1;
1969       case 1: return AMDGPU::SGPR_32RegClassID;
1970       case 2: return AMDGPU::SGPR_64RegClassID;
1971       case 3: return AMDGPU::SGPR_96RegClassID;
1972       case 4: return AMDGPU::SGPR_128RegClassID;
1973       case 5: return AMDGPU::SGPR_160RegClassID;
1974       case 6: return AMDGPU::SGPR_192RegClassID;
1975       case 8: return AMDGPU::SGPR_256RegClassID;
1976       case 16: return AMDGPU::SGPR_512RegClassID;
1977     }
1978   } else if (Is == IS_AGPR) {
1979     switch (RegWidth) {
1980       default: return -1;
1981       case 1: return AMDGPU::AGPR_32RegClassID;
1982       case 2: return AMDGPU::AReg_64RegClassID;
1983       case 3: return AMDGPU::AReg_96RegClassID;
1984       case 4: return AMDGPU::AReg_128RegClassID;
1985       case 5: return AMDGPU::AReg_160RegClassID;
1986       case 6: return AMDGPU::AReg_192RegClassID;
1987       case 8: return AMDGPU::AReg_256RegClassID;
1988       case 16: return AMDGPU::AReg_512RegClassID;
1989       case 32: return AMDGPU::AReg_1024RegClassID;
1990     }
1991   }
1992   return -1;
1993 }
1994 
1995 static unsigned getSpecialRegForName(StringRef RegName) {
1996   return StringSwitch<unsigned>(RegName)
1997     .Case("exec", AMDGPU::EXEC)
1998     .Case("vcc", AMDGPU::VCC)
1999     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2000     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2001     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2002     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2003     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2004     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2005     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2006     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2007     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2008     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2009     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2010     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2011     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2012     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2013     .Case("m0", AMDGPU::M0)
2014     .Case("vccz", AMDGPU::SRC_VCCZ)
2015     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2016     .Case("execz", AMDGPU::SRC_EXECZ)
2017     .Case("src_execz", AMDGPU::SRC_EXECZ)
2018     .Case("scc", AMDGPU::SRC_SCC)
2019     .Case("src_scc", AMDGPU::SRC_SCC)
2020     .Case("tba", AMDGPU::TBA)
2021     .Case("tma", AMDGPU::TMA)
2022     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2023     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2024     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2025     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2026     .Case("vcc_lo", AMDGPU::VCC_LO)
2027     .Case("vcc_hi", AMDGPU::VCC_HI)
2028     .Case("exec_lo", AMDGPU::EXEC_LO)
2029     .Case("exec_hi", AMDGPU::EXEC_HI)
2030     .Case("tma_lo", AMDGPU::TMA_LO)
2031     .Case("tma_hi", AMDGPU::TMA_HI)
2032     .Case("tba_lo", AMDGPU::TBA_LO)
2033     .Case("tba_hi", AMDGPU::TBA_HI)
2034     .Case("pc", AMDGPU::PC_REG)
2035     .Case("null", AMDGPU::SGPR_NULL)
2036     .Default(AMDGPU::NoRegister);
2037 }
2038 
2039 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2040                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2041   auto R = parseRegister();
2042   if (!R) return true;
2043   assert(R->isReg());
2044   RegNo = R->getReg();
2045   StartLoc = R->getStartLoc();
2046   EndLoc = R->getEndLoc();
2047   return false;
2048 }
2049 
2050 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2051                                     SMLoc &EndLoc) {
2052   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2053 }
2054 
2055 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2056                                                        SMLoc &StartLoc,
2057                                                        SMLoc &EndLoc) {
2058   bool Result =
2059       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2060   bool PendingErrors = getParser().hasPendingError();
2061   getParser().clearPendingErrors();
2062   if (PendingErrors)
2063     return MatchOperand_ParseFail;
2064   if (Result)
2065     return MatchOperand_NoMatch;
2066   return MatchOperand_Success;
2067 }
2068 
2069 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2070                                             RegisterKind RegKind, unsigned Reg1,
2071                                             SMLoc Loc) {
2072   switch (RegKind) {
2073   case IS_SPECIAL:
2074     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2075       Reg = AMDGPU::EXEC;
2076       RegWidth = 2;
2077       return true;
2078     }
2079     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2080       Reg = AMDGPU::FLAT_SCR;
2081       RegWidth = 2;
2082       return true;
2083     }
2084     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2085       Reg = AMDGPU::XNACK_MASK;
2086       RegWidth = 2;
2087       return true;
2088     }
2089     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2090       Reg = AMDGPU::VCC;
2091       RegWidth = 2;
2092       return true;
2093     }
2094     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2095       Reg = AMDGPU::TBA;
2096       RegWidth = 2;
2097       return true;
2098     }
2099     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2100       Reg = AMDGPU::TMA;
2101       RegWidth = 2;
2102       return true;
2103     }
2104     Error(Loc, "register does not fit in the list");
2105     return false;
2106   case IS_VGPR:
2107   case IS_SGPR:
2108   case IS_AGPR:
2109   case IS_TTMP:
2110     if (Reg1 != Reg + RegWidth) {
2111       Error(Loc, "registers in a list must have consecutive indices");
2112       return false;
2113     }
2114     RegWidth++;
2115     return true;
2116   default:
2117     llvm_unreachable("unexpected register kind");
2118   }
2119 }
2120 
2121 struct RegInfo {
2122   StringLiteral Name;
2123   RegisterKind Kind;
2124 };
2125 
2126 static constexpr RegInfo RegularRegisters[] = {
2127   {{"v"},    IS_VGPR},
2128   {{"s"},    IS_SGPR},
2129   {{"ttmp"}, IS_TTMP},
2130   {{"acc"},  IS_AGPR},
2131   {{"a"},    IS_AGPR},
2132 };
2133 
2134 static bool isRegularReg(RegisterKind Kind) {
2135   return Kind == IS_VGPR ||
2136          Kind == IS_SGPR ||
2137          Kind == IS_TTMP ||
2138          Kind == IS_AGPR;
2139 }
2140 
2141 static const RegInfo* getRegularRegInfo(StringRef Str) {
2142   for (const RegInfo &Reg : RegularRegisters)
2143     if (Str.startswith(Reg.Name))
2144       return &Reg;
2145   return nullptr;
2146 }
2147 
2148 static bool getRegNum(StringRef Str, unsigned& Num) {
2149   return !Str.getAsInteger(10, Num);
2150 }
2151 
2152 bool
2153 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2154                             const AsmToken &NextToken) const {
2155 
2156   // A list of consecutive registers: [s0,s1,s2,s3]
2157   if (Token.is(AsmToken::LBrac))
2158     return true;
2159 
2160   if (!Token.is(AsmToken::Identifier))
2161     return false;
2162 
2163   // A single register like s0 or a range of registers like s[0:1]
2164 
2165   StringRef Str = Token.getString();
2166   const RegInfo *Reg = getRegularRegInfo(Str);
2167   if (Reg) {
2168     StringRef RegName = Reg->Name;
2169     StringRef RegSuffix = Str.substr(RegName.size());
2170     if (!RegSuffix.empty()) {
2171       unsigned Num;
2172       // A single register with an index: rXX
2173       if (getRegNum(RegSuffix, Num))
2174         return true;
2175     } else {
2176       // A range of registers: r[XX:YY].
2177       if (NextToken.is(AsmToken::LBrac))
2178         return true;
2179     }
2180   }
2181 
2182   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2183 }
2184 
2185 bool
2186 AMDGPUAsmParser::isRegister()
2187 {
2188   return isRegister(getToken(), peekToken());
2189 }
2190 
2191 unsigned
2192 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2193                                unsigned RegNum,
2194                                unsigned RegWidth,
2195                                SMLoc Loc) {
2196 
2197   assert(isRegularReg(RegKind));
2198 
2199   unsigned AlignSize = 1;
2200   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2201     // SGPR and TTMP registers must be aligned.
2202     // Max required alignment is 4 dwords.
2203     AlignSize = std::min(RegWidth, 4u);
2204   }
2205 
2206   if (RegNum % AlignSize != 0) {
2207     Error(Loc, "invalid register alignment");
2208     return AMDGPU::NoRegister;
2209   }
2210 
2211   unsigned RegIdx = RegNum / AlignSize;
2212   int RCID = getRegClass(RegKind, RegWidth);
2213   if (RCID == -1) {
2214     Error(Loc, "invalid or unsupported register size");
2215     return AMDGPU::NoRegister;
2216   }
2217 
2218   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2219   const MCRegisterClass RC = TRI->getRegClass(RCID);
2220   if (RegIdx >= RC.getNumRegs()) {
2221     Error(Loc, "register index is out of range");
2222     return AMDGPU::NoRegister;
2223   }
2224 
2225   return RC.getRegister(RegIdx);
2226 }
2227 
2228 bool
2229 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2230   int64_t RegLo, RegHi;
2231   if (!skipToken(AsmToken::LBrac, "missing register index"))
2232     return false;
2233 
2234   SMLoc FirstIdxLoc = getLoc();
2235   SMLoc SecondIdxLoc;
2236 
2237   if (!parseExpr(RegLo))
2238     return false;
2239 
2240   if (trySkipToken(AsmToken::Colon)) {
2241     SecondIdxLoc = getLoc();
2242     if (!parseExpr(RegHi))
2243       return false;
2244   } else {
2245     RegHi = RegLo;
2246   }
2247 
2248   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2249     return false;
2250 
2251   if (!isUInt<32>(RegLo)) {
2252     Error(FirstIdxLoc, "invalid register index");
2253     return false;
2254   }
2255 
2256   if (!isUInt<32>(RegHi)) {
2257     Error(SecondIdxLoc, "invalid register index");
2258     return false;
2259   }
2260 
2261   if (RegLo > RegHi) {
2262     Error(FirstIdxLoc, "first register index should not exceed second index");
2263     return false;
2264   }
2265 
2266   Num = static_cast<unsigned>(RegLo);
2267   Width = (RegHi - RegLo) + 1;
2268   return true;
2269 }
2270 
2271 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2272                                           unsigned &RegNum, unsigned &RegWidth,
2273                                           SmallVectorImpl<AsmToken> &Tokens) {
2274   assert(isToken(AsmToken::Identifier));
2275   unsigned Reg = getSpecialRegForName(getTokenStr());
2276   if (Reg) {
2277     RegNum = 0;
2278     RegWidth = 1;
2279     RegKind = IS_SPECIAL;
2280     Tokens.push_back(getToken());
2281     lex(); // skip register name
2282   }
2283   return Reg;
2284 }
2285 
2286 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2287                                           unsigned &RegNum, unsigned &RegWidth,
2288                                           SmallVectorImpl<AsmToken> &Tokens) {
2289   assert(isToken(AsmToken::Identifier));
2290   StringRef RegName = getTokenStr();
2291   auto Loc = getLoc();
2292 
2293   const RegInfo *RI = getRegularRegInfo(RegName);
2294   if (!RI) {
2295     Error(Loc, "invalid register name");
2296     return AMDGPU::NoRegister;
2297   }
2298 
2299   Tokens.push_back(getToken());
2300   lex(); // skip register name
2301 
2302   RegKind = RI->Kind;
2303   StringRef RegSuffix = RegName.substr(RI->Name.size());
2304   if (!RegSuffix.empty()) {
2305     // Single 32-bit register: vXX.
2306     if (!getRegNum(RegSuffix, RegNum)) {
2307       Error(Loc, "invalid register index");
2308       return AMDGPU::NoRegister;
2309     }
2310     RegWidth = 1;
2311   } else {
2312     // Range of registers: v[XX:YY]. ":YY" is optional.
2313     if (!ParseRegRange(RegNum, RegWidth))
2314       return AMDGPU::NoRegister;
2315   }
2316 
2317   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2318 }
2319 
2320 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2321                                        unsigned &RegWidth,
2322                                        SmallVectorImpl<AsmToken> &Tokens) {
2323   unsigned Reg = AMDGPU::NoRegister;
2324   auto ListLoc = getLoc();
2325 
2326   if (!skipToken(AsmToken::LBrac,
2327                  "expected a register or a list of registers")) {
2328     return AMDGPU::NoRegister;
2329   }
2330 
2331   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2332 
2333   auto Loc = getLoc();
2334   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2335     return AMDGPU::NoRegister;
2336   if (RegWidth != 1) {
2337     Error(Loc, "expected a single 32-bit register");
2338     return AMDGPU::NoRegister;
2339   }
2340 
2341   for (; trySkipToken(AsmToken::Comma); ) {
2342     RegisterKind NextRegKind;
2343     unsigned NextReg, NextRegNum, NextRegWidth;
2344     Loc = getLoc();
2345 
2346     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2347                              NextRegNum, NextRegWidth,
2348                              Tokens)) {
2349       return AMDGPU::NoRegister;
2350     }
2351     if (NextRegWidth != 1) {
2352       Error(Loc, "expected a single 32-bit register");
2353       return AMDGPU::NoRegister;
2354     }
2355     if (NextRegKind != RegKind) {
2356       Error(Loc, "registers in a list must be of the same kind");
2357       return AMDGPU::NoRegister;
2358     }
2359     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2360       return AMDGPU::NoRegister;
2361   }
2362 
2363   if (!skipToken(AsmToken::RBrac,
2364                  "expected a comma or a closing square bracket")) {
2365     return AMDGPU::NoRegister;
2366   }
2367 
2368   if (isRegularReg(RegKind))
2369     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2370 
2371   return Reg;
2372 }
2373 
2374 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2375                                           unsigned &RegNum, unsigned &RegWidth,
2376                                           SmallVectorImpl<AsmToken> &Tokens) {
2377   auto Loc = getLoc();
2378   Reg = AMDGPU::NoRegister;
2379 
2380   if (isToken(AsmToken::Identifier)) {
2381     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2382     if (Reg == AMDGPU::NoRegister)
2383       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2384   } else {
2385     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2386   }
2387 
2388   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2389   if (Reg == AMDGPU::NoRegister) {
2390     assert(Parser.hasPendingError());
2391     return false;
2392   }
2393 
2394   if (!subtargetHasRegister(*TRI, Reg)) {
2395     if (Reg == AMDGPU::SGPR_NULL) {
2396       Error(Loc, "'null' operand is not supported on this GPU");
2397     } else {
2398       Error(Loc, "register not available on this GPU");
2399     }
2400     return false;
2401   }
2402 
2403   return true;
2404 }
2405 
2406 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2407                                           unsigned &RegNum, unsigned &RegWidth,
2408                                           bool RestoreOnFailure /*=false*/) {
2409   Reg = AMDGPU::NoRegister;
2410 
2411   SmallVector<AsmToken, 1> Tokens;
2412   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2413     if (RestoreOnFailure) {
2414       while (!Tokens.empty()) {
2415         getLexer().UnLex(Tokens.pop_back_val());
2416       }
2417     }
2418     return true;
2419   }
2420   return false;
2421 }
2422 
2423 Optional<StringRef>
2424 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2425   switch (RegKind) {
2426   case IS_VGPR:
2427     return StringRef(".amdgcn.next_free_vgpr");
2428   case IS_SGPR:
2429     return StringRef(".amdgcn.next_free_sgpr");
2430   default:
2431     return None;
2432   }
2433 }
2434 
2435 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2436   auto SymbolName = getGprCountSymbolName(RegKind);
2437   assert(SymbolName && "initializing invalid register kind");
2438   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2439   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2440 }
2441 
2442 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2443                                             unsigned DwordRegIndex,
2444                                             unsigned RegWidth) {
2445   // Symbols are only defined for GCN targets
2446   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2447     return true;
2448 
2449   auto SymbolName = getGprCountSymbolName(RegKind);
2450   if (!SymbolName)
2451     return true;
2452   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2453 
2454   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2455   int64_t OldCount;
2456 
2457   if (!Sym->isVariable())
2458     return !Error(getParser().getTok().getLoc(),
2459                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2460   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2461     return !Error(
2462         getParser().getTok().getLoc(),
2463         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2464 
2465   if (OldCount <= NewMax)
2466     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2467 
2468   return true;
2469 }
2470 
2471 std::unique_ptr<AMDGPUOperand>
2472 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2473   const auto &Tok = Parser.getTok();
2474   SMLoc StartLoc = Tok.getLoc();
2475   SMLoc EndLoc = Tok.getEndLoc();
2476   RegisterKind RegKind;
2477   unsigned Reg, RegNum, RegWidth;
2478 
2479   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2480     return nullptr;
2481   }
2482   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2483     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2484       return nullptr;
2485   } else
2486     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2487   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2488 }
2489 
2490 OperandMatchResultTy
2491 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2492   // TODO: add syntactic sugar for 1/(2*PI)
2493 
2494   assert(!isRegister());
2495   assert(!isModifier());
2496 
2497   const auto& Tok = getToken();
2498   const auto& NextTok = peekToken();
2499   bool IsReal = Tok.is(AsmToken::Real);
2500   SMLoc S = getLoc();
2501   bool Negate = false;
2502 
2503   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2504     lex();
2505     IsReal = true;
2506     Negate = true;
2507   }
2508 
2509   if (IsReal) {
2510     // Floating-point expressions are not supported.
2511     // Can only allow floating-point literals with an
2512     // optional sign.
2513 
2514     StringRef Num = getTokenStr();
2515     lex();
2516 
2517     APFloat RealVal(APFloat::IEEEdouble());
2518     auto roundMode = APFloat::rmNearestTiesToEven;
2519     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2520       return MatchOperand_ParseFail;
2521     }
2522     if (Negate)
2523       RealVal.changeSign();
2524 
2525     Operands.push_back(
2526       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2527                                AMDGPUOperand::ImmTyNone, true));
2528 
2529     return MatchOperand_Success;
2530 
2531   } else {
2532     int64_t IntVal;
2533     const MCExpr *Expr;
2534     SMLoc S = getLoc();
2535 
2536     if (HasSP3AbsModifier) {
2537       // This is a workaround for handling expressions
2538       // as arguments of SP3 'abs' modifier, for example:
2539       //     |1.0|
2540       //     |-1|
2541       //     |1+x|
2542       // This syntax is not compatible with syntax of standard
2543       // MC expressions (due to the trailing '|').
2544       SMLoc EndLoc;
2545       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2546         return MatchOperand_ParseFail;
2547     } else {
2548       if (Parser.parseExpression(Expr))
2549         return MatchOperand_ParseFail;
2550     }
2551 
2552     if (Expr->evaluateAsAbsolute(IntVal)) {
2553       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2554     } else {
2555       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2556     }
2557 
2558     return MatchOperand_Success;
2559   }
2560 
2561   return MatchOperand_NoMatch;
2562 }
2563 
2564 OperandMatchResultTy
2565 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2566   if (!isRegister())
2567     return MatchOperand_NoMatch;
2568 
2569   if (auto R = parseRegister()) {
2570     assert(R->isReg());
2571     Operands.push_back(std::move(R));
2572     return MatchOperand_Success;
2573   }
2574   return MatchOperand_ParseFail;
2575 }
2576 
2577 OperandMatchResultTy
2578 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2579   auto res = parseReg(Operands);
2580   if (res != MatchOperand_NoMatch) {
2581     return res;
2582   } else if (isModifier()) {
2583     return MatchOperand_NoMatch;
2584   } else {
2585     return parseImm(Operands, HasSP3AbsMod);
2586   }
2587 }
2588 
2589 bool
2590 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2591   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2592     const auto &str = Token.getString();
2593     return str == "abs" || str == "neg" || str == "sext";
2594   }
2595   return false;
2596 }
2597 
2598 bool
2599 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2600   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2601 }
2602 
2603 bool
2604 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2605   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2606 }
2607 
2608 bool
2609 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2610   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2611 }
2612 
2613 // Check if this is an operand modifier or an opcode modifier
2614 // which may look like an expression but it is not. We should
2615 // avoid parsing these modifiers as expressions. Currently
2616 // recognized sequences are:
2617 //   |...|
2618 //   abs(...)
2619 //   neg(...)
2620 //   sext(...)
2621 //   -reg
2622 //   -|...|
2623 //   -abs(...)
2624 //   name:...
2625 // Note that simple opcode modifiers like 'gds' may be parsed as
2626 // expressions; this is a special case. See getExpressionAsToken.
2627 //
2628 bool
2629 AMDGPUAsmParser::isModifier() {
2630 
2631   AsmToken Tok = getToken();
2632   AsmToken NextToken[2];
2633   peekTokens(NextToken);
2634 
2635   return isOperandModifier(Tok, NextToken[0]) ||
2636          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2637          isOpcodeModifierWithVal(Tok, NextToken[0]);
2638 }
2639 
2640 // Check if the current token is an SP3 'neg' modifier.
2641 // Currently this modifier is allowed in the following context:
2642 //
2643 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2644 // 2. Before an 'abs' modifier: -abs(...)
2645 // 3. Before an SP3 'abs' modifier: -|...|
2646 //
2647 // In all other cases "-" is handled as a part
2648 // of an expression that follows the sign.
2649 //
2650 // Note: When "-" is followed by an integer literal,
2651 // this is interpreted as integer negation rather
2652 // than a floating-point NEG modifier applied to N.
2653 // Beside being contr-intuitive, such use of floating-point
2654 // NEG modifier would have resulted in different meaning
2655 // of integer literals used with VOP1/2/C and VOP3,
2656 // for example:
2657 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2658 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2659 // Negative fp literals with preceding "-" are
2660 // handled likewise for unifomtity
2661 //
2662 bool
2663 AMDGPUAsmParser::parseSP3NegModifier() {
2664 
2665   AsmToken NextToken[2];
2666   peekTokens(NextToken);
2667 
2668   if (isToken(AsmToken::Minus) &&
2669       (isRegister(NextToken[0], NextToken[1]) ||
2670        NextToken[0].is(AsmToken::Pipe) ||
2671        isId(NextToken[0], "abs"))) {
2672     lex();
2673     return true;
2674   }
2675 
2676   return false;
2677 }
2678 
2679 OperandMatchResultTy
2680 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2681                                               bool AllowImm) {
2682   bool Neg, SP3Neg;
2683   bool Abs, SP3Abs;
2684   SMLoc Loc;
2685 
2686   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2687   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2688     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2689     return MatchOperand_ParseFail;
2690   }
2691 
2692   SP3Neg = parseSP3NegModifier();
2693 
2694   Loc = getLoc();
2695   Neg = trySkipId("neg");
2696   if (Neg && SP3Neg) {
2697     Error(Loc, "expected register or immediate");
2698     return MatchOperand_ParseFail;
2699   }
2700   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2701     return MatchOperand_ParseFail;
2702 
2703   Abs = trySkipId("abs");
2704   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2705     return MatchOperand_ParseFail;
2706 
2707   Loc = getLoc();
2708   SP3Abs = trySkipToken(AsmToken::Pipe);
2709   if (Abs && SP3Abs) {
2710     Error(Loc, "expected register or immediate");
2711     return MatchOperand_ParseFail;
2712   }
2713 
2714   OperandMatchResultTy Res;
2715   if (AllowImm) {
2716     Res = parseRegOrImm(Operands, SP3Abs);
2717   } else {
2718     Res = parseReg(Operands);
2719   }
2720   if (Res != MatchOperand_Success) {
2721     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2722   }
2723 
2724   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2725     return MatchOperand_ParseFail;
2726   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2727     return MatchOperand_ParseFail;
2728   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2729     return MatchOperand_ParseFail;
2730 
2731   AMDGPUOperand::Modifiers Mods;
2732   Mods.Abs = Abs || SP3Abs;
2733   Mods.Neg = Neg || SP3Neg;
2734 
2735   if (Mods.hasFPModifiers()) {
2736     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2737     if (Op.isExpr()) {
2738       Error(Op.getStartLoc(), "expected an absolute expression");
2739       return MatchOperand_ParseFail;
2740     }
2741     Op.setModifiers(Mods);
2742   }
2743   return MatchOperand_Success;
2744 }
2745 
2746 OperandMatchResultTy
2747 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2748                                                bool AllowImm) {
2749   bool Sext = trySkipId("sext");
2750   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2751     return MatchOperand_ParseFail;
2752 
2753   OperandMatchResultTy Res;
2754   if (AllowImm) {
2755     Res = parseRegOrImm(Operands);
2756   } else {
2757     Res = parseReg(Operands);
2758   }
2759   if (Res != MatchOperand_Success) {
2760     return Sext? MatchOperand_ParseFail : Res;
2761   }
2762 
2763   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2764     return MatchOperand_ParseFail;
2765 
2766   AMDGPUOperand::Modifiers Mods;
2767   Mods.Sext = Sext;
2768 
2769   if (Mods.hasIntModifiers()) {
2770     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2771     if (Op.isExpr()) {
2772       Error(Op.getStartLoc(), "expected an absolute expression");
2773       return MatchOperand_ParseFail;
2774     }
2775     Op.setModifiers(Mods);
2776   }
2777 
2778   return MatchOperand_Success;
2779 }
2780 
2781 OperandMatchResultTy
2782 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2783   return parseRegOrImmWithFPInputMods(Operands, false);
2784 }
2785 
2786 OperandMatchResultTy
2787 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2788   return parseRegOrImmWithIntInputMods(Operands, false);
2789 }
2790 
2791 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2792   auto Loc = getLoc();
2793   if (trySkipId("off")) {
2794     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2795                                                 AMDGPUOperand::ImmTyOff, false));
2796     return MatchOperand_Success;
2797   }
2798 
2799   if (!isRegister())
2800     return MatchOperand_NoMatch;
2801 
2802   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2803   if (Reg) {
2804     Operands.push_back(std::move(Reg));
2805     return MatchOperand_Success;
2806   }
2807 
2808   return MatchOperand_ParseFail;
2809 
2810 }
2811 
2812 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2813   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2814 
2815   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2816       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2817       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2818       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2819     return Match_InvalidOperand;
2820 
2821   if ((TSFlags & SIInstrFlags::VOP3) &&
2822       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2823       getForcedEncodingSize() != 64)
2824     return Match_PreferE32;
2825 
2826   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2827       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2828     // v_mac_f32/16 allow only dst_sel == DWORD;
2829     auto OpNum =
2830         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2831     const auto &Op = Inst.getOperand(OpNum);
2832     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2833       return Match_InvalidOperand;
2834     }
2835   }
2836 
2837   return Match_Success;
2838 }
2839 
2840 // What asm variants we should check
2841 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2842   if (getForcedEncodingSize() == 32) {
2843     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2844     return makeArrayRef(Variants);
2845   }
2846 
2847   if (isForcedVOP3()) {
2848     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2849     return makeArrayRef(Variants);
2850   }
2851 
2852   if (isForcedSDWA()) {
2853     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2854                                         AMDGPUAsmVariants::SDWA9};
2855     return makeArrayRef(Variants);
2856   }
2857 
2858   if (isForcedDPP()) {
2859     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2860     return makeArrayRef(Variants);
2861   }
2862 
2863   static const unsigned Variants[] = {
2864     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2865     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2866   };
2867 
2868   return makeArrayRef(Variants);
2869 }
2870 
2871 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2872   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2873   const unsigned Num = Desc.getNumImplicitUses();
2874   for (unsigned i = 0; i < Num; ++i) {
2875     unsigned Reg = Desc.ImplicitUses[i];
2876     switch (Reg) {
2877     case AMDGPU::FLAT_SCR:
2878     case AMDGPU::VCC:
2879     case AMDGPU::VCC_LO:
2880     case AMDGPU::VCC_HI:
2881     case AMDGPU::M0:
2882       return Reg;
2883     default:
2884       break;
2885     }
2886   }
2887   return AMDGPU::NoRegister;
2888 }
2889 
2890 // NB: This code is correct only when used to check constant
2891 // bus limitations because GFX7 support no f16 inline constants.
2892 // Note that there are no cases when a GFX7 opcode violates
2893 // constant bus limitations due to the use of an f16 constant.
2894 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2895                                        unsigned OpIdx) const {
2896   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2897 
2898   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2899     return false;
2900   }
2901 
2902   const MCOperand &MO = Inst.getOperand(OpIdx);
2903 
2904   int64_t Val = MO.getImm();
2905   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2906 
2907   switch (OpSize) { // expected operand size
2908   case 8:
2909     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2910   case 4:
2911     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2912   case 2: {
2913     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2914     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
2915         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
2916         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
2917       return AMDGPU::isInlinableIntLiteral(Val);
2918 
2919     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2920         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2921         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
2922       return AMDGPU::isInlinableIntLiteralV216(Val);
2923 
2924     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2925         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2926         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
2927       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2928 
2929     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2930   }
2931   default:
2932     llvm_unreachable("invalid operand size");
2933   }
2934 }
2935 
2936 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2937   if (!isGFX10())
2938     return 1;
2939 
2940   switch (Opcode) {
2941   // 64-bit shift instructions can use only one scalar value input
2942   case AMDGPU::V_LSHLREV_B64:
2943   case AMDGPU::V_LSHLREV_B64_gfx10:
2944   case AMDGPU::V_LSHL_B64:
2945   case AMDGPU::V_LSHRREV_B64:
2946   case AMDGPU::V_LSHRREV_B64_gfx10:
2947   case AMDGPU::V_LSHR_B64:
2948   case AMDGPU::V_ASHRREV_I64:
2949   case AMDGPU::V_ASHRREV_I64_gfx10:
2950   case AMDGPU::V_ASHR_I64:
2951     return 1;
2952   default:
2953     return 2;
2954   }
2955 }
2956 
2957 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2958   const MCOperand &MO = Inst.getOperand(OpIdx);
2959   if (MO.isImm()) {
2960     return !isInlineConstant(Inst, OpIdx);
2961   } else if (MO.isReg()) {
2962     auto Reg = MO.getReg();
2963     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2964     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2965   } else {
2966     return true;
2967   }
2968 }
2969 
2970 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2971   const unsigned Opcode = Inst.getOpcode();
2972   const MCInstrDesc &Desc = MII.get(Opcode);
2973   unsigned ConstantBusUseCount = 0;
2974   unsigned NumLiterals = 0;
2975   unsigned LiteralSize;
2976 
2977   if (Desc.TSFlags &
2978       (SIInstrFlags::VOPC |
2979        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2980        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2981        SIInstrFlags::SDWA)) {
2982     // Check special imm operands (used by madmk, etc)
2983     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2984       ++ConstantBusUseCount;
2985     }
2986 
2987     SmallDenseSet<unsigned> SGPRsUsed;
2988     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2989     if (SGPRUsed != AMDGPU::NoRegister) {
2990       SGPRsUsed.insert(SGPRUsed);
2991       ++ConstantBusUseCount;
2992     }
2993 
2994     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2995     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2996     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2997 
2998     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2999 
3000     for (int OpIdx : OpIndices) {
3001       if (OpIdx == -1) break;
3002 
3003       const MCOperand &MO = Inst.getOperand(OpIdx);
3004       if (usesConstantBus(Inst, OpIdx)) {
3005         if (MO.isReg()) {
3006           const unsigned Reg = mc2PseudoReg(MO.getReg());
3007           // Pairs of registers with a partial intersections like these
3008           //   s0, s[0:1]
3009           //   flat_scratch_lo, flat_scratch
3010           //   flat_scratch_lo, flat_scratch_hi
3011           // are theoretically valid but they are disabled anyway.
3012           // Note that this code mimics SIInstrInfo::verifyInstruction
3013           if (!SGPRsUsed.count(Reg)) {
3014             SGPRsUsed.insert(Reg);
3015             ++ConstantBusUseCount;
3016           }
3017         } else { // Expression or a literal
3018 
3019           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3020             continue; // special operand like VINTERP attr_chan
3021 
3022           // An instruction may use only one literal.
3023           // This has been validated on the previous step.
3024           // See validateVOP3Literal.
3025           // This literal may be used as more than one operand.
3026           // If all these operands are of the same size,
3027           // this literal counts as one scalar value.
3028           // Otherwise it counts as 2 scalar values.
3029           // See "GFX10 Shader Programming", section 3.6.2.3.
3030 
3031           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3032           if (Size < 4) Size = 4;
3033 
3034           if (NumLiterals == 0) {
3035             NumLiterals = 1;
3036             LiteralSize = Size;
3037           } else if (LiteralSize != Size) {
3038             NumLiterals = 2;
3039           }
3040         }
3041       }
3042     }
3043   }
3044   ConstantBusUseCount += NumLiterals;
3045 
3046   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
3047 }
3048 
3049 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
3050   const unsigned Opcode = Inst.getOpcode();
3051   const MCInstrDesc &Desc = MII.get(Opcode);
3052 
3053   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3054   if (DstIdx == -1 ||
3055       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3056     return true;
3057   }
3058 
3059   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3060 
3061   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3062   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3063   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3064 
3065   assert(DstIdx != -1);
3066   const MCOperand &Dst = Inst.getOperand(DstIdx);
3067   assert(Dst.isReg());
3068   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3069 
3070   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3071 
3072   for (int SrcIdx : SrcIndices) {
3073     if (SrcIdx == -1) break;
3074     const MCOperand &Src = Inst.getOperand(SrcIdx);
3075     if (Src.isReg()) {
3076       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3077       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3078         return false;
3079       }
3080     }
3081   }
3082 
3083   return true;
3084 }
3085 
3086 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3087 
3088   const unsigned Opc = Inst.getOpcode();
3089   const MCInstrDesc &Desc = MII.get(Opc);
3090 
3091   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3092     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3093     assert(ClampIdx != -1);
3094     return Inst.getOperand(ClampIdx).getImm() == 0;
3095   }
3096 
3097   return true;
3098 }
3099 
3100 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3101 
3102   const unsigned Opc = Inst.getOpcode();
3103   const MCInstrDesc &Desc = MII.get(Opc);
3104 
3105   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3106     return true;
3107 
3108   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3109   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3110   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3111 
3112   assert(VDataIdx != -1);
3113 
3114   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3115     return true;
3116 
3117   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3118   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3119   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3120   if (DMask == 0)
3121     DMask = 1;
3122 
3123   unsigned DataSize =
3124     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3125   if (hasPackedD16()) {
3126     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3127     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3128       DataSize = (DataSize + 1) / 2;
3129   }
3130 
3131   return (VDataSize / 4) == DataSize + TFESize;
3132 }
3133 
3134 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3135   const unsigned Opc = Inst.getOpcode();
3136   const MCInstrDesc &Desc = MII.get(Opc);
3137 
3138   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
3139     return true;
3140 
3141   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3142 
3143   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3144       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3145   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3146   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3147   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3148 
3149   assert(VAddr0Idx != -1);
3150   assert(SrsrcIdx != -1);
3151   assert(SrsrcIdx > VAddr0Idx);
3152 
3153   if (DimIdx == -1)
3154     return true; // intersect_ray
3155 
3156   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3157   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3158   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3159   unsigned VAddrSize =
3160       IsNSA ? SrsrcIdx - VAddr0Idx
3161             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3162 
3163   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3164                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3165                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3166                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3167   if (!IsNSA) {
3168     if (AddrSize > 8)
3169       AddrSize = 16;
3170     else if (AddrSize > 4)
3171       AddrSize = 8;
3172   }
3173 
3174   return VAddrSize == AddrSize;
3175 }
3176 
3177 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3178 
3179   const unsigned Opc = Inst.getOpcode();
3180   const MCInstrDesc &Desc = MII.get(Opc);
3181 
3182   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3183     return true;
3184   if (!Desc.mayLoad() || !Desc.mayStore())
3185     return true; // Not atomic
3186 
3187   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3188   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3189 
3190   // This is an incomplete check because image_atomic_cmpswap
3191   // may only use 0x3 and 0xf while other atomic operations
3192   // may use 0x1 and 0x3. However these limitations are
3193   // verified when we check that dmask matches dst size.
3194   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3195 }
3196 
3197 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3198 
3199   const unsigned Opc = Inst.getOpcode();
3200   const MCInstrDesc &Desc = MII.get(Opc);
3201 
3202   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3203     return true;
3204 
3205   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3206   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3207 
3208   // GATHER4 instructions use dmask in a different fashion compared to
3209   // other MIMG instructions. The only useful DMASK values are
3210   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3211   // (red,red,red,red) etc.) The ISA document doesn't mention
3212   // this.
3213   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3214 }
3215 
3216 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3217 {
3218   switch (Opcode) {
3219   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3220   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3221   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3222     return true;
3223   default:
3224     return false;
3225   }
3226 }
3227 
3228 // movrels* opcodes should only allow VGPRS as src0.
3229 // This is specified in .td description for vop1/vop3,
3230 // but sdwa is handled differently. See isSDWAOperand.
3231 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3232 
3233   const unsigned Opc = Inst.getOpcode();
3234   const MCInstrDesc &Desc = MII.get(Opc);
3235 
3236   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3237     return true;
3238 
3239   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3240   assert(Src0Idx != -1);
3241 
3242   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3243   if (!Src0.isReg())
3244     return false;
3245 
3246   auto Reg = Src0.getReg();
3247   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3248   return !isSGPR(mc2PseudoReg(Reg), TRI);
3249 }
3250 
3251 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) {
3252 
3253   const unsigned Opc = Inst.getOpcode();
3254 
3255   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3256     return true;
3257 
3258   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3259   assert(Src0Idx != -1);
3260 
3261   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3262   if (!Src0.isReg())
3263     return true;
3264 
3265   auto Reg = Src0.getReg();
3266   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3267   if (isSGPR(mc2PseudoReg(Reg), TRI)) {
3268     Error(getLoc(), "source operand must be either a VGPR or an inline constant");
3269     return false;
3270   }
3271 
3272   return true;
3273 }
3274 
3275 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3276 
3277   const unsigned Opc = Inst.getOpcode();
3278   const MCInstrDesc &Desc = MII.get(Opc);
3279 
3280   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3281     return true;
3282 
3283   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3284   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3285     if (isCI() || isSI())
3286       return false;
3287   }
3288 
3289   return true;
3290 }
3291 
3292 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3293   const unsigned Opc = Inst.getOpcode();
3294   const MCInstrDesc &Desc = MII.get(Opc);
3295 
3296   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3297     return true;
3298 
3299   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3300   if (DimIdx < 0)
3301     return true;
3302 
3303   long Imm = Inst.getOperand(DimIdx).getImm();
3304   if (Imm < 0 || Imm >= 8)
3305     return false;
3306 
3307   return true;
3308 }
3309 
3310 static bool IsRevOpcode(const unsigned Opcode)
3311 {
3312   switch (Opcode) {
3313   case AMDGPU::V_SUBREV_F32_e32:
3314   case AMDGPU::V_SUBREV_F32_e64:
3315   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3316   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3317   case AMDGPU::V_SUBREV_F32_e32_vi:
3318   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3319   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3320   case AMDGPU::V_SUBREV_F32_e64_vi:
3321 
3322   case AMDGPU::V_SUBREV_CO_U32_e32:
3323   case AMDGPU::V_SUBREV_CO_U32_e64:
3324   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3325   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3326 
3327   case AMDGPU::V_SUBBREV_U32_e32:
3328   case AMDGPU::V_SUBBREV_U32_e64:
3329   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3330   case AMDGPU::V_SUBBREV_U32_e32_vi:
3331   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3332   case AMDGPU::V_SUBBREV_U32_e64_vi:
3333 
3334   case AMDGPU::V_SUBREV_U32_e32:
3335   case AMDGPU::V_SUBREV_U32_e64:
3336   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3337   case AMDGPU::V_SUBREV_U32_e32_vi:
3338   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3339   case AMDGPU::V_SUBREV_U32_e64_vi:
3340 
3341   case AMDGPU::V_SUBREV_F16_e32:
3342   case AMDGPU::V_SUBREV_F16_e64:
3343   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3344   case AMDGPU::V_SUBREV_F16_e32_vi:
3345   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3346   case AMDGPU::V_SUBREV_F16_e64_vi:
3347 
3348   case AMDGPU::V_SUBREV_U16_e32:
3349   case AMDGPU::V_SUBREV_U16_e64:
3350   case AMDGPU::V_SUBREV_U16_e32_vi:
3351   case AMDGPU::V_SUBREV_U16_e64_vi:
3352 
3353   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3354   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3355   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3356 
3357   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3358   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3359 
3360   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3361   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3362 
3363   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3364   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3365 
3366   case AMDGPU::V_LSHRREV_B32_e32:
3367   case AMDGPU::V_LSHRREV_B32_e64:
3368   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3369   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3370   case AMDGPU::V_LSHRREV_B32_e32_vi:
3371   case AMDGPU::V_LSHRREV_B32_e64_vi:
3372   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3373   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3374 
3375   case AMDGPU::V_ASHRREV_I32_e32:
3376   case AMDGPU::V_ASHRREV_I32_e64:
3377   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3378   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3379   case AMDGPU::V_ASHRREV_I32_e32_vi:
3380   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3381   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3382   case AMDGPU::V_ASHRREV_I32_e64_vi:
3383 
3384   case AMDGPU::V_LSHLREV_B32_e32:
3385   case AMDGPU::V_LSHLREV_B32_e64:
3386   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3387   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3388   case AMDGPU::V_LSHLREV_B32_e32_vi:
3389   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3390   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3391   case AMDGPU::V_LSHLREV_B32_e64_vi:
3392 
3393   case AMDGPU::V_LSHLREV_B16_e32:
3394   case AMDGPU::V_LSHLREV_B16_e64:
3395   case AMDGPU::V_LSHLREV_B16_e32_vi:
3396   case AMDGPU::V_LSHLREV_B16_e64_vi:
3397   case AMDGPU::V_LSHLREV_B16_gfx10:
3398 
3399   case AMDGPU::V_LSHRREV_B16_e32:
3400   case AMDGPU::V_LSHRREV_B16_e64:
3401   case AMDGPU::V_LSHRREV_B16_e32_vi:
3402   case AMDGPU::V_LSHRREV_B16_e64_vi:
3403   case AMDGPU::V_LSHRREV_B16_gfx10:
3404 
3405   case AMDGPU::V_ASHRREV_I16_e32:
3406   case AMDGPU::V_ASHRREV_I16_e64:
3407   case AMDGPU::V_ASHRREV_I16_e32_vi:
3408   case AMDGPU::V_ASHRREV_I16_e64_vi:
3409   case AMDGPU::V_ASHRREV_I16_gfx10:
3410 
3411   case AMDGPU::V_LSHLREV_B64:
3412   case AMDGPU::V_LSHLREV_B64_gfx10:
3413   case AMDGPU::V_LSHLREV_B64_vi:
3414 
3415   case AMDGPU::V_LSHRREV_B64:
3416   case AMDGPU::V_LSHRREV_B64_gfx10:
3417   case AMDGPU::V_LSHRREV_B64_vi:
3418 
3419   case AMDGPU::V_ASHRREV_I64:
3420   case AMDGPU::V_ASHRREV_I64_gfx10:
3421   case AMDGPU::V_ASHRREV_I64_vi:
3422 
3423   case AMDGPU::V_PK_LSHLREV_B16:
3424   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3425   case AMDGPU::V_PK_LSHLREV_B16_vi:
3426 
3427   case AMDGPU::V_PK_LSHRREV_B16:
3428   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3429   case AMDGPU::V_PK_LSHRREV_B16_vi:
3430   case AMDGPU::V_PK_ASHRREV_I16:
3431   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3432   case AMDGPU::V_PK_ASHRREV_I16_vi:
3433     return true;
3434   default:
3435     return false;
3436   }
3437 }
3438 
3439 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3440 
3441   using namespace SIInstrFlags;
3442   const unsigned Opcode = Inst.getOpcode();
3443   const MCInstrDesc &Desc = MII.get(Opcode);
3444 
3445   // lds_direct register is defined so that it can be used
3446   // with 9-bit operands only. Ignore encodings which do not accept these.
3447   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3448     return true;
3449 
3450   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3451   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3452   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3453 
3454   const int SrcIndices[] = { Src1Idx, Src2Idx };
3455 
3456   // lds_direct cannot be specified as either src1 or src2.
3457   for (int SrcIdx : SrcIndices) {
3458     if (SrcIdx == -1) break;
3459     const MCOperand &Src = Inst.getOperand(SrcIdx);
3460     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3461       return false;
3462     }
3463   }
3464 
3465   if (Src0Idx == -1)
3466     return true;
3467 
3468   const MCOperand &Src = Inst.getOperand(Src0Idx);
3469   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3470     return true;
3471 
3472   // lds_direct is specified as src0. Check additional limitations.
3473   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3474 }
3475 
3476 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3477   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3478     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3479     if (Op.isFlatOffset())
3480       return Op.getStartLoc();
3481   }
3482   return getLoc();
3483 }
3484 
3485 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3486                                          const OperandVector &Operands) {
3487   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3488   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3489     return true;
3490 
3491   auto Opcode = Inst.getOpcode();
3492   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3493   assert(OpNum != -1);
3494 
3495   const auto &Op = Inst.getOperand(OpNum);
3496   if (!hasFlatOffsets() && Op.getImm() != 0) {
3497     Error(getFlatOffsetLoc(Operands),
3498           "flat offset modifier is not supported on this GPU");
3499     return false;
3500   }
3501 
3502   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3503   // For FLAT segment the offset must be positive;
3504   // MSB is ignored and forced to zero.
3505   unsigned OffsetSize = isGFX9() ? 13 : 12;
3506   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3507     if (!isIntN(OffsetSize, Op.getImm())) {
3508       Error(getFlatOffsetLoc(Operands),
3509             isGFX9() ? "expected a 13-bit signed offset" :
3510                        "expected a 12-bit signed offset");
3511       return false;
3512     }
3513   } else {
3514     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3515       Error(getFlatOffsetLoc(Operands),
3516             isGFX9() ? "expected a 12-bit unsigned offset" :
3517                        "expected an 11-bit unsigned offset");
3518       return false;
3519     }
3520   }
3521 
3522   return true;
3523 }
3524 
3525 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3526   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3527     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3528     if (Op.isSMEMOffset())
3529       return Op.getStartLoc();
3530   }
3531   return getLoc();
3532 }
3533 
3534 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3535                                          const OperandVector &Operands) {
3536   if (isCI() || isSI())
3537     return true;
3538 
3539   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3540   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3541     return true;
3542 
3543   auto Opcode = Inst.getOpcode();
3544   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3545   if (OpNum == -1)
3546     return true;
3547 
3548   const auto &Op = Inst.getOperand(OpNum);
3549   if (!Op.isImm())
3550     return true;
3551 
3552   uint64_t Offset = Op.getImm();
3553   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3554   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3555       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3556     return true;
3557 
3558   Error(getSMEMOffsetLoc(Operands),
3559         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3560                                "expected a 21-bit signed offset");
3561 
3562   return false;
3563 }
3564 
3565 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3566   unsigned Opcode = Inst.getOpcode();
3567   const MCInstrDesc &Desc = MII.get(Opcode);
3568   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3569     return true;
3570 
3571   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3572   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3573 
3574   const int OpIndices[] = { Src0Idx, Src1Idx };
3575 
3576   unsigned NumExprs = 0;
3577   unsigned NumLiterals = 0;
3578   uint32_t LiteralValue;
3579 
3580   for (int OpIdx : OpIndices) {
3581     if (OpIdx == -1) break;
3582 
3583     const MCOperand &MO = Inst.getOperand(OpIdx);
3584     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3585     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3586       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3587         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3588         if (NumLiterals == 0 || LiteralValue != Value) {
3589           LiteralValue = Value;
3590           ++NumLiterals;
3591         }
3592       } else if (MO.isExpr()) {
3593         ++NumExprs;
3594       }
3595     }
3596   }
3597 
3598   return NumLiterals + NumExprs <= 1;
3599 }
3600 
3601 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3602   const unsigned Opc = Inst.getOpcode();
3603   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3604       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3605     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3606     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3607 
3608     if (OpSel & ~3)
3609       return false;
3610   }
3611   return true;
3612 }
3613 
3614 // Check if VCC register matches wavefront size
3615 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3616   auto FB = getFeatureBits();
3617   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3618     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3619 }
3620 
3621 // VOP3 literal is only allowed in GFX10+ and only one can be used
3622 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3623   unsigned Opcode = Inst.getOpcode();
3624   const MCInstrDesc &Desc = MII.get(Opcode);
3625   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3626     return true;
3627 
3628   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3629   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3630   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3631 
3632   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3633 
3634   unsigned NumExprs = 0;
3635   unsigned NumLiterals = 0;
3636   uint32_t LiteralValue;
3637 
3638   for (int OpIdx : OpIndices) {
3639     if (OpIdx == -1) break;
3640 
3641     const MCOperand &MO = Inst.getOperand(OpIdx);
3642     if (!MO.isImm() && !MO.isExpr())
3643       continue;
3644     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3645       continue;
3646 
3647     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3648         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3649       return false;
3650 
3651     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3652       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3653       if (NumLiterals == 0 || LiteralValue != Value) {
3654         LiteralValue = Value;
3655         ++NumLiterals;
3656       }
3657     } else if (MO.isExpr()) {
3658       ++NumExprs;
3659     }
3660   }
3661   NumLiterals += NumExprs;
3662 
3663   return !NumLiterals ||
3664          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3665 }
3666 
3667 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3668                                           const SMLoc &IDLoc,
3669                                           const OperandVector &Operands) {
3670   if (!validateLdsDirect(Inst)) {
3671     Error(IDLoc,
3672       "invalid use of lds_direct");
3673     return false;
3674   }
3675   if (!validateSOPLiteral(Inst)) {
3676     Error(IDLoc,
3677       "only one literal operand is allowed");
3678     return false;
3679   }
3680   if (!validateVOP3Literal(Inst)) {
3681     Error(IDLoc,
3682       "invalid literal operand");
3683     return false;
3684   }
3685   if (!validateConstantBusLimitations(Inst)) {
3686     Error(IDLoc,
3687       "invalid operand (violates constant bus restrictions)");
3688     return false;
3689   }
3690   if (!validateEarlyClobberLimitations(Inst)) {
3691     Error(IDLoc,
3692       "destination must be different than all sources");
3693     return false;
3694   }
3695   if (!validateIntClampSupported(Inst)) {
3696     Error(IDLoc,
3697       "integer clamping is not supported on this GPU");
3698     return false;
3699   }
3700   if (!validateOpSel(Inst)) {
3701     Error(IDLoc,
3702       "invalid op_sel operand");
3703     return false;
3704   }
3705   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3706   if (!validateMIMGD16(Inst)) {
3707     Error(IDLoc,
3708       "d16 modifier is not supported on this GPU");
3709     return false;
3710   }
3711   if (!validateMIMGDim(Inst)) {
3712     Error(IDLoc, "dim modifier is required on this GPU");
3713     return false;
3714   }
3715   if (!validateMIMGDataSize(Inst)) {
3716     Error(IDLoc,
3717       "image data size does not match dmask and tfe");
3718     return false;
3719   }
3720   if (!validateMIMGAddrSize(Inst)) {
3721     Error(IDLoc,
3722       "image address size does not match dim and a16");
3723     return false;
3724   }
3725   if (!validateMIMGAtomicDMask(Inst)) {
3726     Error(IDLoc,
3727       "invalid atomic image dmask");
3728     return false;
3729   }
3730   if (!validateMIMGGatherDMask(Inst)) {
3731     Error(IDLoc,
3732       "invalid image_gather dmask: only one bit must be set");
3733     return false;
3734   }
3735   if (!validateMovrels(Inst)) {
3736     Error(IDLoc, "source operand must be a VGPR");
3737     return false;
3738   }
3739   if (!validateFlatOffset(Inst, Operands)) {
3740     return false;
3741   }
3742   if (!validateSMEMOffset(Inst, Operands)) {
3743     return false;
3744   }
3745   if (!validateMAIAccWrite(Inst)) {
3746     return false;
3747   }
3748 
3749   return true;
3750 }
3751 
3752 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3753                                             const FeatureBitset &FBS,
3754                                             unsigned VariantID = 0);
3755 
3756 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3757                                               OperandVector &Operands,
3758                                               MCStreamer &Out,
3759                                               uint64_t &ErrorInfo,
3760                                               bool MatchingInlineAsm) {
3761   MCInst Inst;
3762   unsigned Result = Match_Success;
3763   for (auto Variant : getMatchedVariants()) {
3764     uint64_t EI;
3765     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3766                                   Variant);
3767     // We order match statuses from least to most specific. We use most specific
3768     // status as resulting
3769     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3770     if ((R == Match_Success) ||
3771         (R == Match_PreferE32) ||
3772         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3773         (R == Match_InvalidOperand && Result != Match_MissingFeature
3774                                    && Result != Match_PreferE32) ||
3775         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3776                                    && Result != Match_MissingFeature
3777                                    && Result != Match_PreferE32)) {
3778       Result = R;
3779       ErrorInfo = EI;
3780     }
3781     if (R == Match_Success)
3782       break;
3783   }
3784 
3785   switch (Result) {
3786   default: break;
3787   case Match_Success:
3788     if (!validateInstruction(Inst, IDLoc, Operands)) {
3789       return true;
3790     }
3791     Inst.setLoc(IDLoc);
3792     Out.emitInstruction(Inst, getSTI());
3793     return false;
3794 
3795   case Match_MissingFeature:
3796     return Error(IDLoc, "instruction not supported on this GPU");
3797 
3798   case Match_MnemonicFail: {
3799     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3800     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3801         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3802     return Error(IDLoc, "invalid instruction" + Suggestion,
3803                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3804   }
3805 
3806   case Match_InvalidOperand: {
3807     SMLoc ErrorLoc = IDLoc;
3808     if (ErrorInfo != ~0ULL) {
3809       if (ErrorInfo >= Operands.size()) {
3810         return Error(IDLoc, "too few operands for instruction");
3811       }
3812       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3813       if (ErrorLoc == SMLoc())
3814         ErrorLoc = IDLoc;
3815     }
3816     return Error(ErrorLoc, "invalid operand for instruction");
3817   }
3818 
3819   case Match_PreferE32:
3820     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3821                         "should be encoded as e32");
3822   }
3823   llvm_unreachable("Implement any new match types added!");
3824 }
3825 
3826 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3827   int64_t Tmp = -1;
3828   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3829     return true;
3830   }
3831   if (getParser().parseAbsoluteExpression(Tmp)) {
3832     return true;
3833   }
3834   Ret = static_cast<uint32_t>(Tmp);
3835   return false;
3836 }
3837 
3838 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3839                                                uint32_t &Minor) {
3840   if (ParseAsAbsoluteExpression(Major))
3841     return TokError("invalid major version");
3842 
3843   if (getLexer().isNot(AsmToken::Comma))
3844     return TokError("minor version number required, comma expected");
3845   Lex();
3846 
3847   if (ParseAsAbsoluteExpression(Minor))
3848     return TokError("invalid minor version");
3849 
3850   return false;
3851 }
3852 
3853 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3854   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3855     return TokError("directive only supported for amdgcn architecture");
3856 
3857   std::string Target;
3858 
3859   SMLoc TargetStart = getTok().getLoc();
3860   if (getParser().parseEscapedString(Target))
3861     return true;
3862   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3863 
3864   std::string ExpectedTarget;
3865   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3866   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3867 
3868   if (Target != ExpectedTargetOS.str())
3869     return getParser().Error(TargetRange.Start, "target must match options",
3870                              TargetRange);
3871 
3872   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3873   return false;
3874 }
3875 
3876 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3877   return getParser().Error(Range.Start, "value out of range", Range);
3878 }
3879 
3880 bool AMDGPUAsmParser::calculateGPRBlocks(
3881     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3882     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3883     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3884     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3885   // TODO(scott.linder): These calculations are duplicated from
3886   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3887   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3888 
3889   unsigned NumVGPRs = NextFreeVGPR;
3890   unsigned NumSGPRs = NextFreeSGPR;
3891 
3892   if (Version.Major >= 10)
3893     NumSGPRs = 0;
3894   else {
3895     unsigned MaxAddressableNumSGPRs =
3896         IsaInfo::getAddressableNumSGPRs(&getSTI());
3897 
3898     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3899         NumSGPRs > MaxAddressableNumSGPRs)
3900       return OutOfRangeError(SGPRRange);
3901 
3902     NumSGPRs +=
3903         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3904 
3905     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3906         NumSGPRs > MaxAddressableNumSGPRs)
3907       return OutOfRangeError(SGPRRange);
3908 
3909     if (Features.test(FeatureSGPRInitBug))
3910       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3911   }
3912 
3913   VGPRBlocks =
3914       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3915   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3916 
3917   return false;
3918 }
3919 
3920 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3921   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3922     return TokError("directive only supported for amdgcn architecture");
3923 
3924   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3925     return TokError("directive only supported for amdhsa OS");
3926 
3927   StringRef KernelName;
3928   if (getParser().parseIdentifier(KernelName))
3929     return true;
3930 
3931   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3932 
3933   StringSet<> Seen;
3934 
3935   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3936 
3937   SMRange VGPRRange;
3938   uint64_t NextFreeVGPR = 0;
3939   SMRange SGPRRange;
3940   uint64_t NextFreeSGPR = 0;
3941   unsigned UserSGPRCount = 0;
3942   bool ReserveVCC = true;
3943   bool ReserveFlatScr = true;
3944   bool ReserveXNACK = hasXNACK();
3945   Optional<bool> EnableWavefrontSize32;
3946 
3947   while (true) {
3948     while (getLexer().is(AsmToken::EndOfStatement))
3949       Lex();
3950 
3951     if (getLexer().isNot(AsmToken::Identifier))
3952       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3953 
3954     StringRef ID = getTok().getIdentifier();
3955     SMRange IDRange = getTok().getLocRange();
3956     Lex();
3957 
3958     if (ID == ".end_amdhsa_kernel")
3959       break;
3960 
3961     if (Seen.find(ID) != Seen.end())
3962       return TokError(".amdhsa_ directives cannot be repeated");
3963     Seen.insert(ID);
3964 
3965     SMLoc ValStart = getTok().getLoc();
3966     int64_t IVal;
3967     if (getParser().parseAbsoluteExpression(IVal))
3968       return true;
3969     SMLoc ValEnd = getTok().getLoc();
3970     SMRange ValRange = SMRange(ValStart, ValEnd);
3971 
3972     if (IVal < 0)
3973       return OutOfRangeError(ValRange);
3974 
3975     uint64_t Val = IVal;
3976 
3977 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3978   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3979     return OutOfRangeError(RANGE);                                             \
3980   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3981 
3982     if (ID == ".amdhsa_group_segment_fixed_size") {
3983       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3984         return OutOfRangeError(ValRange);
3985       KD.group_segment_fixed_size = Val;
3986     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3987       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3988         return OutOfRangeError(ValRange);
3989       KD.private_segment_fixed_size = Val;
3990     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3991       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3992                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3993                        Val, ValRange);
3994       if (Val)
3995         UserSGPRCount += 4;
3996     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3997       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3998                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3999                        ValRange);
4000       if (Val)
4001         UserSGPRCount += 2;
4002     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4003       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4004                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4005                        ValRange);
4006       if (Val)
4007         UserSGPRCount += 2;
4008     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4009       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4010                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4011                        Val, ValRange);
4012       if (Val)
4013         UserSGPRCount += 2;
4014     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4015       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4016                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4017                        ValRange);
4018       if (Val)
4019         UserSGPRCount += 2;
4020     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4021       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4022                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4023                        ValRange);
4024       if (Val)
4025         UserSGPRCount += 2;
4026     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4027       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4028                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4029                        Val, ValRange);
4030       if (Val)
4031         UserSGPRCount += 1;
4032     } else if (ID == ".amdhsa_wavefront_size32") {
4033       if (IVersion.Major < 10)
4034         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4035                                  IDRange);
4036       EnableWavefrontSize32 = Val;
4037       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4038                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4039                        Val, ValRange);
4040     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4041       PARSE_BITS_ENTRY(
4042           KD.compute_pgm_rsrc2,
4043           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
4044           ValRange);
4045     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4046       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4047                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4048                        ValRange);
4049     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4050       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4051                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4052                        ValRange);
4053     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4054       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4055                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4056                        ValRange);
4057     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4058       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4059                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4060                        ValRange);
4061     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4062       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4063                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4064                        ValRange);
4065     } else if (ID == ".amdhsa_next_free_vgpr") {
4066       VGPRRange = ValRange;
4067       NextFreeVGPR = Val;
4068     } else if (ID == ".amdhsa_next_free_sgpr") {
4069       SGPRRange = ValRange;
4070       NextFreeSGPR = Val;
4071     } else if (ID == ".amdhsa_reserve_vcc") {
4072       if (!isUInt<1>(Val))
4073         return OutOfRangeError(ValRange);
4074       ReserveVCC = Val;
4075     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4076       if (IVersion.Major < 7)
4077         return getParser().Error(IDRange.Start, "directive requires gfx7+",
4078                                  IDRange);
4079       if (!isUInt<1>(Val))
4080         return OutOfRangeError(ValRange);
4081       ReserveFlatScr = Val;
4082     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4083       if (IVersion.Major < 8)
4084         return getParser().Error(IDRange.Start, "directive requires gfx8+",
4085                                  IDRange);
4086       if (!isUInt<1>(Val))
4087         return OutOfRangeError(ValRange);
4088       ReserveXNACK = Val;
4089     } else if (ID == ".amdhsa_float_round_mode_32") {
4090       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4091                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4092     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4093       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4094                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4095     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4096       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4097                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4098     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4099       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4100                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4101                        ValRange);
4102     } else if (ID == ".amdhsa_dx10_clamp") {
4103       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4104                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4105     } else if (ID == ".amdhsa_ieee_mode") {
4106       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4107                        Val, ValRange);
4108     } else if (ID == ".amdhsa_fp16_overflow") {
4109       if (IVersion.Major < 9)
4110         return getParser().Error(IDRange.Start, "directive requires gfx9+",
4111                                  IDRange);
4112       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4113                        ValRange);
4114     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4115       if (IVersion.Major < 10)
4116         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4117                                  IDRange);
4118       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4119                        ValRange);
4120     } else if (ID == ".amdhsa_memory_ordered") {
4121       if (IVersion.Major < 10)
4122         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4123                                  IDRange);
4124       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4125                        ValRange);
4126     } else if (ID == ".amdhsa_forward_progress") {
4127       if (IVersion.Major < 10)
4128         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4129                                  IDRange);
4130       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4131                        ValRange);
4132     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4133       PARSE_BITS_ENTRY(
4134           KD.compute_pgm_rsrc2,
4135           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4136           ValRange);
4137     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4138       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4139                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4140                        Val, ValRange);
4141     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4142       PARSE_BITS_ENTRY(
4143           KD.compute_pgm_rsrc2,
4144           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4145           ValRange);
4146     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4147       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4148                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4149                        Val, ValRange);
4150     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4151       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4152                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4153                        Val, ValRange);
4154     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4155       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4156                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4157                        Val, ValRange);
4158     } else if (ID == ".amdhsa_exception_int_div_zero") {
4159       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4160                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4161                        Val, ValRange);
4162     } else {
4163       return getParser().Error(IDRange.Start,
4164                                "unknown .amdhsa_kernel directive", IDRange);
4165     }
4166 
4167 #undef PARSE_BITS_ENTRY
4168   }
4169 
4170   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4171     return TokError(".amdhsa_next_free_vgpr directive is required");
4172 
4173   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4174     return TokError(".amdhsa_next_free_sgpr directive is required");
4175 
4176   unsigned VGPRBlocks;
4177   unsigned SGPRBlocks;
4178   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4179                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4180                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4181                          SGPRBlocks))
4182     return true;
4183 
4184   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4185           VGPRBlocks))
4186     return OutOfRangeError(VGPRRange);
4187   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4188                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4189 
4190   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4191           SGPRBlocks))
4192     return OutOfRangeError(SGPRRange);
4193   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4194                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4195                   SGPRBlocks);
4196 
4197   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4198     return TokError("too many user SGPRs enabled");
4199   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4200                   UserSGPRCount);
4201 
4202   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4203       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4204       ReserveFlatScr, ReserveXNACK);
4205   return false;
4206 }
4207 
4208 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4209   uint32_t Major;
4210   uint32_t Minor;
4211 
4212   if (ParseDirectiveMajorMinor(Major, Minor))
4213     return true;
4214 
4215   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4216   return false;
4217 }
4218 
4219 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4220   uint32_t Major;
4221   uint32_t Minor;
4222   uint32_t Stepping;
4223   StringRef VendorName;
4224   StringRef ArchName;
4225 
4226   // If this directive has no arguments, then use the ISA version for the
4227   // targeted GPU.
4228   if (getLexer().is(AsmToken::EndOfStatement)) {
4229     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4230     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4231                                                       ISA.Stepping,
4232                                                       "AMD", "AMDGPU");
4233     return false;
4234   }
4235 
4236   if (ParseDirectiveMajorMinor(Major, Minor))
4237     return true;
4238 
4239   if (getLexer().isNot(AsmToken::Comma))
4240     return TokError("stepping version number required, comma expected");
4241   Lex();
4242 
4243   if (ParseAsAbsoluteExpression(Stepping))
4244     return TokError("invalid stepping version");
4245 
4246   if (getLexer().isNot(AsmToken::Comma))
4247     return TokError("vendor name required, comma expected");
4248   Lex();
4249 
4250   if (getLexer().isNot(AsmToken::String))
4251     return TokError("invalid vendor name");
4252 
4253   VendorName = getLexer().getTok().getStringContents();
4254   Lex();
4255 
4256   if (getLexer().isNot(AsmToken::Comma))
4257     return TokError("arch name required, comma expected");
4258   Lex();
4259 
4260   if (getLexer().isNot(AsmToken::String))
4261     return TokError("invalid arch name");
4262 
4263   ArchName = getLexer().getTok().getStringContents();
4264   Lex();
4265 
4266   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4267                                                     VendorName, ArchName);
4268   return false;
4269 }
4270 
4271 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4272                                                amd_kernel_code_t &Header) {
4273   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4274   // assembly for backwards compatibility.
4275   if (ID == "max_scratch_backing_memory_byte_size") {
4276     Parser.eatToEndOfStatement();
4277     return false;
4278   }
4279 
4280   SmallString<40> ErrStr;
4281   raw_svector_ostream Err(ErrStr);
4282   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4283     return TokError(Err.str());
4284   }
4285   Lex();
4286 
4287   if (ID == "enable_wavefront_size32") {
4288     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4289       if (!isGFX10())
4290         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4291       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4292         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4293     } else {
4294       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4295         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4296     }
4297   }
4298 
4299   if (ID == "wavefront_size") {
4300     if (Header.wavefront_size == 5) {
4301       if (!isGFX10())
4302         return TokError("wavefront_size=5 is only allowed on GFX10+");
4303       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4304         return TokError("wavefront_size=5 requires +WavefrontSize32");
4305     } else if (Header.wavefront_size == 6) {
4306       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4307         return TokError("wavefront_size=6 requires +WavefrontSize64");
4308     }
4309   }
4310 
4311   if (ID == "enable_wgp_mode") {
4312     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4313       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4314   }
4315 
4316   if (ID == "enable_mem_ordered") {
4317     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4318       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4319   }
4320 
4321   if (ID == "enable_fwd_progress") {
4322     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4323       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4324   }
4325 
4326   return false;
4327 }
4328 
4329 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4330   amd_kernel_code_t Header;
4331   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4332 
4333   while (true) {
4334     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4335     // will set the current token to EndOfStatement.
4336     while(getLexer().is(AsmToken::EndOfStatement))
4337       Lex();
4338 
4339     if (getLexer().isNot(AsmToken::Identifier))
4340       return TokError("expected value identifier or .end_amd_kernel_code_t");
4341 
4342     StringRef ID = getLexer().getTok().getIdentifier();
4343     Lex();
4344 
4345     if (ID == ".end_amd_kernel_code_t")
4346       break;
4347 
4348     if (ParseAMDKernelCodeTValue(ID, Header))
4349       return true;
4350   }
4351 
4352   getTargetStreamer().EmitAMDKernelCodeT(Header);
4353 
4354   return false;
4355 }
4356 
4357 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4358   if (getLexer().isNot(AsmToken::Identifier))
4359     return TokError("expected symbol name");
4360 
4361   StringRef KernelName = Parser.getTok().getString();
4362 
4363   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4364                                            ELF::STT_AMDGPU_HSA_KERNEL);
4365   Lex();
4366   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4367     KernelScope.initialize(getContext());
4368   return false;
4369 }
4370 
4371 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4372   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4373     return Error(getParser().getTok().getLoc(),
4374                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4375                  "architectures");
4376   }
4377 
4378   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4379 
4380   std::string ISAVersionStringFromSTI;
4381   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4382   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4383 
4384   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4385     return Error(getParser().getTok().getLoc(),
4386                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4387                  "arguments specified through the command line");
4388   }
4389 
4390   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4391   Lex();
4392 
4393   return false;
4394 }
4395 
4396 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4397   const char *AssemblerDirectiveBegin;
4398   const char *AssemblerDirectiveEnd;
4399   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4400       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4401           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4402                             HSAMD::V3::AssemblerDirectiveEnd)
4403           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4404                             HSAMD::AssemblerDirectiveEnd);
4405 
4406   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4407     return Error(getParser().getTok().getLoc(),
4408                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4409                  "not available on non-amdhsa OSes")).str());
4410   }
4411 
4412   std::string HSAMetadataString;
4413   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4414                           HSAMetadataString))
4415     return true;
4416 
4417   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4418     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4419       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4420   } else {
4421     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4422       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4423   }
4424 
4425   return false;
4426 }
4427 
4428 /// Common code to parse out a block of text (typically YAML) between start and
4429 /// end directives.
4430 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4431                                           const char *AssemblerDirectiveEnd,
4432                                           std::string &CollectString) {
4433 
4434   raw_string_ostream CollectStream(CollectString);
4435 
4436   getLexer().setSkipSpace(false);
4437 
4438   bool FoundEnd = false;
4439   while (!getLexer().is(AsmToken::Eof)) {
4440     while (getLexer().is(AsmToken::Space)) {
4441       CollectStream << getLexer().getTok().getString();
4442       Lex();
4443     }
4444 
4445     if (getLexer().is(AsmToken::Identifier)) {
4446       StringRef ID = getLexer().getTok().getIdentifier();
4447       if (ID == AssemblerDirectiveEnd) {
4448         Lex();
4449         FoundEnd = true;
4450         break;
4451       }
4452     }
4453 
4454     CollectStream << Parser.parseStringToEndOfStatement()
4455                   << getContext().getAsmInfo()->getSeparatorString();
4456 
4457     Parser.eatToEndOfStatement();
4458   }
4459 
4460   getLexer().setSkipSpace(true);
4461 
4462   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4463     return TokError(Twine("expected directive ") +
4464                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4465   }
4466 
4467   CollectStream.flush();
4468   return false;
4469 }
4470 
4471 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4472 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4473   std::string String;
4474   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4475                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4476     return true;
4477 
4478   auto PALMetadata = getTargetStreamer().getPALMetadata();
4479   if (!PALMetadata->setFromString(String))
4480     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4481   return false;
4482 }
4483 
4484 /// Parse the assembler directive for old linear-format PAL metadata.
4485 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4486   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4487     return Error(getParser().getTok().getLoc(),
4488                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4489                  "not available on non-amdpal OSes")).str());
4490   }
4491 
4492   auto PALMetadata = getTargetStreamer().getPALMetadata();
4493   PALMetadata->setLegacy();
4494   for (;;) {
4495     uint32_t Key, Value;
4496     if (ParseAsAbsoluteExpression(Key)) {
4497       return TokError(Twine("invalid value in ") +
4498                       Twine(PALMD::AssemblerDirective));
4499     }
4500     if (getLexer().isNot(AsmToken::Comma)) {
4501       return TokError(Twine("expected an even number of values in ") +
4502                       Twine(PALMD::AssemblerDirective));
4503     }
4504     Lex();
4505     if (ParseAsAbsoluteExpression(Value)) {
4506       return TokError(Twine("invalid value in ") +
4507                       Twine(PALMD::AssemblerDirective));
4508     }
4509     PALMetadata->setRegister(Key, Value);
4510     if (getLexer().isNot(AsmToken::Comma))
4511       break;
4512     Lex();
4513   }
4514   return false;
4515 }
4516 
4517 /// ParseDirectiveAMDGPULDS
4518 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4519 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4520   if (getParser().checkForValidSection())
4521     return true;
4522 
4523   StringRef Name;
4524   SMLoc NameLoc = getLexer().getLoc();
4525   if (getParser().parseIdentifier(Name))
4526     return TokError("expected identifier in directive");
4527 
4528   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4529   if (parseToken(AsmToken::Comma, "expected ','"))
4530     return true;
4531 
4532   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4533 
4534   int64_t Size;
4535   SMLoc SizeLoc = getLexer().getLoc();
4536   if (getParser().parseAbsoluteExpression(Size))
4537     return true;
4538   if (Size < 0)
4539     return Error(SizeLoc, "size must be non-negative");
4540   if (Size > LocalMemorySize)
4541     return Error(SizeLoc, "size is too large");
4542 
4543   int64_t Alignment = 4;
4544   if (getLexer().is(AsmToken::Comma)) {
4545     Lex();
4546     SMLoc AlignLoc = getLexer().getLoc();
4547     if (getParser().parseAbsoluteExpression(Alignment))
4548       return true;
4549     if (Alignment < 0 || !isPowerOf2_64(Alignment))
4550       return Error(AlignLoc, "alignment must be a power of two");
4551 
4552     // Alignment larger than the size of LDS is possible in theory, as long
4553     // as the linker manages to place to symbol at address 0, but we do want
4554     // to make sure the alignment fits nicely into a 32-bit integer.
4555     if (Alignment >= 1u << 31)
4556       return Error(AlignLoc, "alignment is too large");
4557   }
4558 
4559   if (parseToken(AsmToken::EndOfStatement,
4560                  "unexpected token in '.amdgpu_lds' directive"))
4561     return true;
4562 
4563   Symbol->redefineIfPossible();
4564   if (!Symbol->isUndefined())
4565     return Error(NameLoc, "invalid symbol redefinition");
4566 
4567   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
4568   return false;
4569 }
4570 
4571 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4572   StringRef IDVal = DirectiveID.getString();
4573 
4574   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4575     if (IDVal == ".amdgcn_target")
4576       return ParseDirectiveAMDGCNTarget();
4577 
4578     if (IDVal == ".amdhsa_kernel")
4579       return ParseDirectiveAMDHSAKernel();
4580 
4581     // TODO: Restructure/combine with PAL metadata directive.
4582     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4583       return ParseDirectiveHSAMetadata();
4584   } else {
4585     if (IDVal == ".hsa_code_object_version")
4586       return ParseDirectiveHSACodeObjectVersion();
4587 
4588     if (IDVal == ".hsa_code_object_isa")
4589       return ParseDirectiveHSACodeObjectISA();
4590 
4591     if (IDVal == ".amd_kernel_code_t")
4592       return ParseDirectiveAMDKernelCodeT();
4593 
4594     if (IDVal == ".amdgpu_hsa_kernel")
4595       return ParseDirectiveAMDGPUHsaKernel();
4596 
4597     if (IDVal == ".amd_amdgpu_isa")
4598       return ParseDirectiveISAVersion();
4599 
4600     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4601       return ParseDirectiveHSAMetadata();
4602   }
4603 
4604   if (IDVal == ".amdgpu_lds")
4605     return ParseDirectiveAMDGPULDS();
4606 
4607   if (IDVal == PALMD::AssemblerDirectiveBegin)
4608     return ParseDirectivePALMetadataBegin();
4609 
4610   if (IDVal == PALMD::AssemblerDirective)
4611     return ParseDirectivePALMetadata();
4612 
4613   return true;
4614 }
4615 
4616 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4617                                            unsigned RegNo) const {
4618 
4619   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4620        R.isValid(); ++R) {
4621     if (*R == RegNo)
4622       return isGFX9() || isGFX10();
4623   }
4624 
4625   // GFX10 has 2 more SGPRs 104 and 105.
4626   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4627        R.isValid(); ++R) {
4628     if (*R == RegNo)
4629       return hasSGPR104_SGPR105();
4630   }
4631 
4632   switch (RegNo) {
4633   case AMDGPU::SRC_SHARED_BASE:
4634   case AMDGPU::SRC_SHARED_LIMIT:
4635   case AMDGPU::SRC_PRIVATE_BASE:
4636   case AMDGPU::SRC_PRIVATE_LIMIT:
4637   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4638     return !isCI() && !isSI() && !isVI();
4639   case AMDGPU::TBA:
4640   case AMDGPU::TBA_LO:
4641   case AMDGPU::TBA_HI:
4642   case AMDGPU::TMA:
4643   case AMDGPU::TMA_LO:
4644   case AMDGPU::TMA_HI:
4645     return !isGFX9() && !isGFX10();
4646   case AMDGPU::XNACK_MASK:
4647   case AMDGPU::XNACK_MASK_LO:
4648   case AMDGPU::XNACK_MASK_HI:
4649     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4650   case AMDGPU::SGPR_NULL:
4651     return isGFX10();
4652   default:
4653     break;
4654   }
4655 
4656   if (isCI())
4657     return true;
4658 
4659   if (isSI() || isGFX10()) {
4660     // No flat_scr on SI.
4661     // On GFX10 flat scratch is not a valid register operand and can only be
4662     // accessed with s_setreg/s_getreg.
4663     switch (RegNo) {
4664     case AMDGPU::FLAT_SCR:
4665     case AMDGPU::FLAT_SCR_LO:
4666     case AMDGPU::FLAT_SCR_HI:
4667       return false;
4668     default:
4669       return true;
4670     }
4671   }
4672 
4673   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4674   // SI/CI have.
4675   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4676        R.isValid(); ++R) {
4677     if (*R == RegNo)
4678       return hasSGPR102_SGPR103();
4679   }
4680 
4681   return true;
4682 }
4683 
4684 OperandMatchResultTy
4685 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4686                               OperandMode Mode) {
4687   // Try to parse with a custom parser
4688   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4689 
4690   // If we successfully parsed the operand or if there as an error parsing,
4691   // we are done.
4692   //
4693   // If we are parsing after we reach EndOfStatement then this means we
4694   // are appending default values to the Operands list.  This is only done
4695   // by custom parser, so we shouldn't continue on to the generic parsing.
4696   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4697       getLexer().is(AsmToken::EndOfStatement))
4698     return ResTy;
4699 
4700   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4701     unsigned Prefix = Operands.size();
4702     SMLoc LBraceLoc = getTok().getLoc();
4703     Parser.Lex(); // eat the '['
4704 
4705     for (;;) {
4706       ResTy = parseReg(Operands);
4707       if (ResTy != MatchOperand_Success)
4708         return ResTy;
4709 
4710       if (getLexer().is(AsmToken::RBrac))
4711         break;
4712 
4713       if (getLexer().isNot(AsmToken::Comma))
4714         return MatchOperand_ParseFail;
4715       Parser.Lex();
4716     }
4717 
4718     if (Operands.size() - Prefix > 1) {
4719       Operands.insert(Operands.begin() + Prefix,
4720                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4721       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4722                                                     getTok().getLoc()));
4723     }
4724 
4725     Parser.Lex(); // eat the ']'
4726     return MatchOperand_Success;
4727   }
4728 
4729   return parseRegOrImm(Operands);
4730 }
4731 
4732 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4733   // Clear any forced encodings from the previous instruction.
4734   setForcedEncodingSize(0);
4735   setForcedDPP(false);
4736   setForcedSDWA(false);
4737 
4738   if (Name.endswith("_e64")) {
4739     setForcedEncodingSize(64);
4740     return Name.substr(0, Name.size() - 4);
4741   } else if (Name.endswith("_e32")) {
4742     setForcedEncodingSize(32);
4743     return Name.substr(0, Name.size() - 4);
4744   } else if (Name.endswith("_dpp")) {
4745     setForcedDPP(true);
4746     return Name.substr(0, Name.size() - 4);
4747   } else if (Name.endswith("_sdwa")) {
4748     setForcedSDWA(true);
4749     return Name.substr(0, Name.size() - 5);
4750   }
4751   return Name;
4752 }
4753 
4754 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4755                                        StringRef Name,
4756                                        SMLoc NameLoc, OperandVector &Operands) {
4757   // Add the instruction mnemonic
4758   Name = parseMnemonicSuffix(Name);
4759   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4760 
4761   bool IsMIMG = Name.startswith("image_");
4762 
4763   while (!getLexer().is(AsmToken::EndOfStatement)) {
4764     OperandMode Mode = OperandMode_Default;
4765     if (IsMIMG && isGFX10() && Operands.size() == 2)
4766       Mode = OperandMode_NSA;
4767     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4768 
4769     // Eat the comma or space if there is one.
4770     if (getLexer().is(AsmToken::Comma))
4771       Parser.Lex();
4772 
4773     if (Res != MatchOperand_Success) {
4774       if (!Parser.hasPendingError()) {
4775         // FIXME: use real operand location rather than the current location.
4776         StringRef Msg =
4777           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
4778                                             "not a valid operand.";
4779         Error(getLexer().getLoc(), Msg);
4780       }
4781       while (!getLexer().is(AsmToken::EndOfStatement)) {
4782         Parser.Lex();
4783       }
4784       return true;
4785     }
4786   }
4787 
4788   return false;
4789 }
4790 
4791 //===----------------------------------------------------------------------===//
4792 // Utility functions
4793 //===----------------------------------------------------------------------===//
4794 
4795 OperandMatchResultTy
4796 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4797 
4798   if (!trySkipId(Prefix, AsmToken::Colon))
4799     return MatchOperand_NoMatch;
4800 
4801   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4802 }
4803 
4804 OperandMatchResultTy
4805 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4806                                     AMDGPUOperand::ImmTy ImmTy,
4807                                     bool (*ConvertResult)(int64_t&)) {
4808   SMLoc S = getLoc();
4809   int64_t Value = 0;
4810 
4811   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4812   if (Res != MatchOperand_Success)
4813     return Res;
4814 
4815   if (ConvertResult && !ConvertResult(Value)) {
4816     Error(S, "invalid " + StringRef(Prefix) + " value.");
4817   }
4818 
4819   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4820   return MatchOperand_Success;
4821 }
4822 
4823 OperandMatchResultTy
4824 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4825                                              OperandVector &Operands,
4826                                              AMDGPUOperand::ImmTy ImmTy,
4827                                              bool (*ConvertResult)(int64_t&)) {
4828   SMLoc S = getLoc();
4829   if (!trySkipId(Prefix, AsmToken::Colon))
4830     return MatchOperand_NoMatch;
4831 
4832   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4833     return MatchOperand_ParseFail;
4834 
4835   unsigned Val = 0;
4836   const unsigned MaxSize = 4;
4837 
4838   // FIXME: How to verify the number of elements matches the number of src
4839   // operands?
4840   for (int I = 0; ; ++I) {
4841     int64_t Op;
4842     SMLoc Loc = getLoc();
4843     if (!parseExpr(Op))
4844       return MatchOperand_ParseFail;
4845 
4846     if (Op != 0 && Op != 1) {
4847       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4848       return MatchOperand_ParseFail;
4849     }
4850 
4851     Val |= (Op << I);
4852 
4853     if (trySkipToken(AsmToken::RBrac))
4854       break;
4855 
4856     if (I + 1 == MaxSize) {
4857       Error(getLoc(), "expected a closing square bracket");
4858       return MatchOperand_ParseFail;
4859     }
4860 
4861     if (!skipToken(AsmToken::Comma, "expected a comma"))
4862       return MatchOperand_ParseFail;
4863   }
4864 
4865   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4866   return MatchOperand_Success;
4867 }
4868 
4869 OperandMatchResultTy
4870 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4871                                AMDGPUOperand::ImmTy ImmTy) {
4872   int64_t Bit = 0;
4873   SMLoc S = Parser.getTok().getLoc();
4874 
4875   // We are at the end of the statement, and this is a default argument, so
4876   // use a default value.
4877   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4878     switch(getLexer().getKind()) {
4879       case AsmToken::Identifier: {
4880         StringRef Tok = Parser.getTok().getString();
4881         if (Tok == Name) {
4882           if (Tok == "r128" && !hasMIMG_R128())
4883             Error(S, "r128 modifier is not supported on this GPU");
4884           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
4885             Error(S, "a16 modifier is not supported on this GPU");
4886           Bit = 1;
4887           Parser.Lex();
4888         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4889           Bit = 0;
4890           Parser.Lex();
4891         } else {
4892           return MatchOperand_NoMatch;
4893         }
4894         break;
4895       }
4896       default:
4897         return MatchOperand_NoMatch;
4898     }
4899   }
4900 
4901   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4902     return MatchOperand_ParseFail;
4903 
4904   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
4905     ImmTy = AMDGPUOperand::ImmTyR128A16;
4906 
4907   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4908   return MatchOperand_Success;
4909 }
4910 
4911 static void addOptionalImmOperand(
4912   MCInst& Inst, const OperandVector& Operands,
4913   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4914   AMDGPUOperand::ImmTy ImmT,
4915   int64_t Default = 0) {
4916   auto i = OptionalIdx.find(ImmT);
4917   if (i != OptionalIdx.end()) {
4918     unsigned Idx = i->second;
4919     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4920   } else {
4921     Inst.addOperand(MCOperand::createImm(Default));
4922   }
4923 }
4924 
4925 OperandMatchResultTy
4926 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4927   if (getLexer().isNot(AsmToken::Identifier)) {
4928     return MatchOperand_NoMatch;
4929   }
4930   StringRef Tok = Parser.getTok().getString();
4931   if (Tok != Prefix) {
4932     return MatchOperand_NoMatch;
4933   }
4934 
4935   Parser.Lex();
4936   if (getLexer().isNot(AsmToken::Colon)) {
4937     return MatchOperand_ParseFail;
4938   }
4939 
4940   Parser.Lex();
4941   if (getLexer().isNot(AsmToken::Identifier)) {
4942     return MatchOperand_ParseFail;
4943   }
4944 
4945   Value = Parser.getTok().getString();
4946   return MatchOperand_Success;
4947 }
4948 
4949 //===----------------------------------------------------------------------===//
4950 // MTBUF format
4951 //===----------------------------------------------------------------------===//
4952 
4953 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
4954                                   int64_t MaxVal,
4955                                   int64_t &Fmt) {
4956   int64_t Val;
4957   SMLoc Loc = getLoc();
4958 
4959   auto Res = parseIntWithPrefix(Pref, Val);
4960   if (Res == MatchOperand_ParseFail)
4961     return false;
4962   if (Res == MatchOperand_NoMatch)
4963     return true;
4964 
4965   if (Val < 0 || Val > MaxVal) {
4966     Error(Loc, Twine("out of range ", StringRef(Pref)));
4967     return false;
4968   }
4969 
4970   Fmt = Val;
4971   return true;
4972 }
4973 
4974 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4975 // values to live in a joint format operand in the MCInst encoding.
4976 OperandMatchResultTy
4977 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
4978   using namespace llvm::AMDGPU::MTBUFFormat;
4979 
4980   int64_t Dfmt = DFMT_UNDEF;
4981   int64_t Nfmt = NFMT_UNDEF;
4982 
4983   // dfmt and nfmt can appear in either order, and each is optional.
4984   for (int I = 0; I < 2; ++I) {
4985     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
4986       return MatchOperand_ParseFail;
4987 
4988     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
4989       return MatchOperand_ParseFail;
4990     }
4991     // Skip optional comma between dfmt/nfmt
4992     // but guard against 2 commas following each other.
4993     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
4994         !peekToken().is(AsmToken::Comma)) {
4995       trySkipToken(AsmToken::Comma);
4996     }
4997   }
4998 
4999   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5000     return MatchOperand_NoMatch;
5001 
5002   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5003   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5004 
5005   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5006   return MatchOperand_Success;
5007 }
5008 
5009 OperandMatchResultTy
5010 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5011   using namespace llvm::AMDGPU::MTBUFFormat;
5012 
5013   int64_t Fmt = UFMT_UNDEF;
5014 
5015   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5016     return MatchOperand_ParseFail;
5017 
5018   if (Fmt == UFMT_UNDEF)
5019     return MatchOperand_NoMatch;
5020 
5021   Format = Fmt;
5022   return MatchOperand_Success;
5023 }
5024 
5025 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5026                                     int64_t &Nfmt,
5027                                     StringRef FormatStr,
5028                                     SMLoc Loc) {
5029   using namespace llvm::AMDGPU::MTBUFFormat;
5030   int64_t Format;
5031 
5032   Format = getDfmt(FormatStr);
5033   if (Format != DFMT_UNDEF) {
5034     Dfmt = Format;
5035     return true;
5036   }
5037 
5038   Format = getNfmt(FormatStr, getSTI());
5039   if (Format != NFMT_UNDEF) {
5040     Nfmt = Format;
5041     return true;
5042   }
5043 
5044   Error(Loc, "unsupported format");
5045   return false;
5046 }
5047 
5048 OperandMatchResultTy
5049 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5050                                           SMLoc FormatLoc,
5051                                           int64_t &Format) {
5052   using namespace llvm::AMDGPU::MTBUFFormat;
5053 
5054   int64_t Dfmt = DFMT_UNDEF;
5055   int64_t Nfmt = NFMT_UNDEF;
5056   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5057     return MatchOperand_ParseFail;
5058 
5059   if (trySkipToken(AsmToken::Comma)) {
5060     StringRef Str;
5061     SMLoc Loc = getLoc();
5062     if (!parseId(Str, "expected a format string") ||
5063         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5064       return MatchOperand_ParseFail;
5065     }
5066     if (Dfmt == DFMT_UNDEF) {
5067       Error(Loc, "duplicate numeric format");
5068       return MatchOperand_ParseFail;
5069     } else if (Nfmt == NFMT_UNDEF) {
5070       Error(Loc, "duplicate data format");
5071       return MatchOperand_ParseFail;
5072     }
5073   }
5074 
5075   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5076   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5077 
5078   if (isGFX10()) {
5079     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5080     if (Ufmt == UFMT_UNDEF) {
5081       Error(FormatLoc, "unsupported format");
5082       return MatchOperand_ParseFail;
5083     }
5084     Format = Ufmt;
5085   } else {
5086     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5087   }
5088 
5089   return MatchOperand_Success;
5090 }
5091 
5092 OperandMatchResultTy
5093 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5094                                             SMLoc Loc,
5095                                             int64_t &Format) {
5096   using namespace llvm::AMDGPU::MTBUFFormat;
5097 
5098   auto Id = getUnifiedFormat(FormatStr);
5099   if (Id == UFMT_UNDEF)
5100     return MatchOperand_NoMatch;
5101 
5102   if (!isGFX10()) {
5103     Error(Loc, "unified format is not supported on this GPU");
5104     return MatchOperand_ParseFail;
5105   }
5106 
5107   Format = Id;
5108   return MatchOperand_Success;
5109 }
5110 
5111 OperandMatchResultTy
5112 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5113   using namespace llvm::AMDGPU::MTBUFFormat;
5114   SMLoc Loc = getLoc();
5115 
5116   if (!parseExpr(Format))
5117     return MatchOperand_ParseFail;
5118   if (!isValidFormatEncoding(Format, getSTI())) {
5119     Error(Loc, "out of range format");
5120     return MatchOperand_ParseFail;
5121   }
5122 
5123   return MatchOperand_Success;
5124 }
5125 
5126 OperandMatchResultTy
5127 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5128   using namespace llvm::AMDGPU::MTBUFFormat;
5129 
5130   if (!trySkipId("format", AsmToken::Colon))
5131     return MatchOperand_NoMatch;
5132 
5133   if (trySkipToken(AsmToken::LBrac)) {
5134     StringRef FormatStr;
5135     SMLoc Loc = getLoc();
5136     if (!parseId(FormatStr, "expected a format string"))
5137       return MatchOperand_ParseFail;
5138 
5139     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5140     if (Res == MatchOperand_NoMatch)
5141       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5142     if (Res != MatchOperand_Success)
5143       return Res;
5144 
5145     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5146       return MatchOperand_ParseFail;
5147 
5148     return MatchOperand_Success;
5149   }
5150 
5151   return parseNumericFormat(Format);
5152 }
5153 
5154 OperandMatchResultTy
5155 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5156   using namespace llvm::AMDGPU::MTBUFFormat;
5157 
5158   int64_t Format = getDefaultFormatEncoding(getSTI());
5159   OperandMatchResultTy Res;
5160   SMLoc Loc = getLoc();
5161 
5162   // Parse legacy format syntax.
5163   Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5164   if (Res == MatchOperand_ParseFail)
5165     return Res;
5166 
5167   bool FormatFound = (Res == MatchOperand_Success);
5168 
5169   Operands.push_back(
5170     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5171 
5172   if (FormatFound)
5173     trySkipToken(AsmToken::Comma);
5174 
5175   if (isToken(AsmToken::EndOfStatement)) {
5176     // We are expecting an soffset operand,
5177     // but let matcher handle the error.
5178     return MatchOperand_Success;
5179   }
5180 
5181   // Parse soffset.
5182   Res = parseRegOrImm(Operands);
5183   if (Res != MatchOperand_Success)
5184     return Res;
5185 
5186   trySkipToken(AsmToken::Comma);
5187 
5188   if (!FormatFound) {
5189     Res = parseSymbolicOrNumericFormat(Format);
5190     if (Res == MatchOperand_ParseFail)
5191       return Res;
5192     if (Res == MatchOperand_Success) {
5193       auto Size = Operands.size();
5194       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5195       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5196       Op.setImm(Format);
5197     }
5198     return MatchOperand_Success;
5199   }
5200 
5201   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5202     Error(getLoc(), "duplicate format");
5203     return MatchOperand_ParseFail;
5204   }
5205   return MatchOperand_Success;
5206 }
5207 
5208 //===----------------------------------------------------------------------===//
5209 // ds
5210 //===----------------------------------------------------------------------===//
5211 
5212 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5213                                     const OperandVector &Operands) {
5214   OptionalImmIndexMap OptionalIdx;
5215 
5216   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5217     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5218 
5219     // Add the register arguments
5220     if (Op.isReg()) {
5221       Op.addRegOperands(Inst, 1);
5222       continue;
5223     }
5224 
5225     // Handle optional arguments
5226     OptionalIdx[Op.getImmTy()] = i;
5227   }
5228 
5229   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5230   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5231   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5232 
5233   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5234 }
5235 
5236 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5237                                 bool IsGdsHardcoded) {
5238   OptionalImmIndexMap OptionalIdx;
5239 
5240   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5241     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5242 
5243     // Add the register arguments
5244     if (Op.isReg()) {
5245       Op.addRegOperands(Inst, 1);
5246       continue;
5247     }
5248 
5249     if (Op.isToken() && Op.getToken() == "gds") {
5250       IsGdsHardcoded = true;
5251       continue;
5252     }
5253 
5254     // Handle optional arguments
5255     OptionalIdx[Op.getImmTy()] = i;
5256   }
5257 
5258   AMDGPUOperand::ImmTy OffsetType =
5259     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5260      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5261      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5262                                                       AMDGPUOperand::ImmTyOffset;
5263 
5264   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5265 
5266   if (!IsGdsHardcoded) {
5267     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5268   }
5269   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5270 }
5271 
5272 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5273   OptionalImmIndexMap OptionalIdx;
5274 
5275   unsigned OperandIdx[4];
5276   unsigned EnMask = 0;
5277   int SrcIdx = 0;
5278 
5279   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5280     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5281 
5282     // Add the register arguments
5283     if (Op.isReg()) {
5284       assert(SrcIdx < 4);
5285       OperandIdx[SrcIdx] = Inst.size();
5286       Op.addRegOperands(Inst, 1);
5287       ++SrcIdx;
5288       continue;
5289     }
5290 
5291     if (Op.isOff()) {
5292       assert(SrcIdx < 4);
5293       OperandIdx[SrcIdx] = Inst.size();
5294       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5295       ++SrcIdx;
5296       continue;
5297     }
5298 
5299     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5300       Op.addImmOperands(Inst, 1);
5301       continue;
5302     }
5303 
5304     if (Op.isToken() && Op.getToken() == "done")
5305       continue;
5306 
5307     // Handle optional arguments
5308     OptionalIdx[Op.getImmTy()] = i;
5309   }
5310 
5311   assert(SrcIdx == 4);
5312 
5313   bool Compr = false;
5314   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5315     Compr = true;
5316     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5317     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5318     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5319   }
5320 
5321   for (auto i = 0; i < SrcIdx; ++i) {
5322     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5323       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5324     }
5325   }
5326 
5327   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5328   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5329 
5330   Inst.addOperand(MCOperand::createImm(EnMask));
5331 }
5332 
5333 //===----------------------------------------------------------------------===//
5334 // s_waitcnt
5335 //===----------------------------------------------------------------------===//
5336 
5337 static bool
5338 encodeCnt(
5339   const AMDGPU::IsaVersion ISA,
5340   int64_t &IntVal,
5341   int64_t CntVal,
5342   bool Saturate,
5343   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5344   unsigned (*decode)(const IsaVersion &Version, unsigned))
5345 {
5346   bool Failed = false;
5347 
5348   IntVal = encode(ISA, IntVal, CntVal);
5349   if (CntVal != decode(ISA, IntVal)) {
5350     if (Saturate) {
5351       IntVal = encode(ISA, IntVal, -1);
5352     } else {
5353       Failed = true;
5354     }
5355   }
5356   return Failed;
5357 }
5358 
5359 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5360 
5361   SMLoc CntLoc = getLoc();
5362   StringRef CntName = getTokenStr();
5363 
5364   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5365       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5366     return false;
5367 
5368   int64_t CntVal;
5369   SMLoc ValLoc = getLoc();
5370   if (!parseExpr(CntVal))
5371     return false;
5372 
5373   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5374 
5375   bool Failed = true;
5376   bool Sat = CntName.endswith("_sat");
5377 
5378   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5379     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5380   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5381     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5382   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5383     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5384   } else {
5385     Error(CntLoc, "invalid counter name " + CntName);
5386     return false;
5387   }
5388 
5389   if (Failed) {
5390     Error(ValLoc, "too large value for " + CntName);
5391     return false;
5392   }
5393 
5394   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5395     return false;
5396 
5397   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5398     if (isToken(AsmToken::EndOfStatement)) {
5399       Error(getLoc(), "expected a counter name");
5400       return false;
5401     }
5402   }
5403 
5404   return true;
5405 }
5406 
5407 OperandMatchResultTy
5408 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5409   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5410   int64_t Waitcnt = getWaitcntBitMask(ISA);
5411   SMLoc S = getLoc();
5412 
5413   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5414     while (!isToken(AsmToken::EndOfStatement)) {
5415       if (!parseCnt(Waitcnt))
5416         return MatchOperand_ParseFail;
5417     }
5418   } else {
5419     if (!parseExpr(Waitcnt))
5420       return MatchOperand_ParseFail;
5421   }
5422 
5423   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5424   return MatchOperand_Success;
5425 }
5426 
5427 bool
5428 AMDGPUOperand::isSWaitCnt() const {
5429   return isImm();
5430 }
5431 
5432 //===----------------------------------------------------------------------===//
5433 // hwreg
5434 //===----------------------------------------------------------------------===//
5435 
5436 bool
5437 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5438                                 int64_t &Offset,
5439                                 int64_t &Width) {
5440   using namespace llvm::AMDGPU::Hwreg;
5441 
5442   // The register may be specified by name or using a numeric code
5443   if (isToken(AsmToken::Identifier) &&
5444       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5445     HwReg.IsSymbolic = true;
5446     lex(); // skip message name
5447   } else if (!parseExpr(HwReg.Id)) {
5448     return false;
5449   }
5450 
5451   if (trySkipToken(AsmToken::RParen))
5452     return true;
5453 
5454   // parse optional params
5455   return
5456     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
5457     parseExpr(Offset) &&
5458     skipToken(AsmToken::Comma, "expected a comma") &&
5459     parseExpr(Width) &&
5460     skipToken(AsmToken::RParen, "expected a closing parenthesis");
5461 }
5462 
5463 bool
5464 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5465                                const int64_t Offset,
5466                                const int64_t Width,
5467                                const SMLoc Loc) {
5468 
5469   using namespace llvm::AMDGPU::Hwreg;
5470 
5471   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5472     Error(Loc, "specified hardware register is not supported on this GPU");
5473     return false;
5474   } else if (!isValidHwreg(HwReg.Id)) {
5475     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
5476     return false;
5477   } else if (!isValidHwregOffset(Offset)) {
5478     Error(Loc, "invalid bit offset: only 5-bit values are legal");
5479     return false;
5480   } else if (!isValidHwregWidth(Width)) {
5481     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
5482     return false;
5483   }
5484   return true;
5485 }
5486 
5487 OperandMatchResultTy
5488 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5489   using namespace llvm::AMDGPU::Hwreg;
5490 
5491   int64_t ImmVal = 0;
5492   SMLoc Loc = getLoc();
5493 
5494   if (trySkipId("hwreg", AsmToken::LParen)) {
5495     OperandInfoTy HwReg(ID_UNKNOWN_);
5496     int64_t Offset = OFFSET_DEFAULT_;
5497     int64_t Width = WIDTH_DEFAULT_;
5498     if (parseHwregBody(HwReg, Offset, Width) &&
5499         validateHwreg(HwReg, Offset, Width, Loc)) {
5500       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5501     } else {
5502       return MatchOperand_ParseFail;
5503     }
5504   } else if (parseExpr(ImmVal)) {
5505     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5506       Error(Loc, "invalid immediate: only 16-bit values are legal");
5507       return MatchOperand_ParseFail;
5508     }
5509   } else {
5510     return MatchOperand_ParseFail;
5511   }
5512 
5513   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5514   return MatchOperand_Success;
5515 }
5516 
5517 bool AMDGPUOperand::isHwreg() const {
5518   return isImmTy(ImmTyHwreg);
5519 }
5520 
5521 //===----------------------------------------------------------------------===//
5522 // sendmsg
5523 //===----------------------------------------------------------------------===//
5524 
5525 bool
5526 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5527                                   OperandInfoTy &Op,
5528                                   OperandInfoTy &Stream) {
5529   using namespace llvm::AMDGPU::SendMsg;
5530 
5531   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5532     Msg.IsSymbolic = true;
5533     lex(); // skip message name
5534   } else if (!parseExpr(Msg.Id)) {
5535     return false;
5536   }
5537 
5538   if (trySkipToken(AsmToken::Comma)) {
5539     Op.IsDefined = true;
5540     if (isToken(AsmToken::Identifier) &&
5541         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5542       lex(); // skip operation name
5543     } else if (!parseExpr(Op.Id)) {
5544       return false;
5545     }
5546 
5547     if (trySkipToken(AsmToken::Comma)) {
5548       Stream.IsDefined = true;
5549       if (!parseExpr(Stream.Id))
5550         return false;
5551     }
5552   }
5553 
5554   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5555 }
5556 
5557 bool
5558 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5559                                  const OperandInfoTy &Op,
5560                                  const OperandInfoTy &Stream,
5561                                  const SMLoc S) {
5562   using namespace llvm::AMDGPU::SendMsg;
5563 
5564   // Validation strictness depends on whether message is specified
5565   // in a symbolc or in a numeric form. In the latter case
5566   // only encoding possibility is checked.
5567   bool Strict = Msg.IsSymbolic;
5568 
5569   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5570     Error(S, "invalid message id");
5571     return false;
5572   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5573     Error(S, Op.IsDefined ?
5574              "message does not support operations" :
5575              "missing message operation");
5576     return false;
5577   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5578     Error(S, "invalid operation id");
5579     return false;
5580   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5581     Error(S, "message operation does not support streams");
5582     return false;
5583   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5584     Error(S, "invalid message stream id");
5585     return false;
5586   }
5587   return true;
5588 }
5589 
5590 OperandMatchResultTy
5591 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5592   using namespace llvm::AMDGPU::SendMsg;
5593 
5594   int64_t ImmVal = 0;
5595   SMLoc Loc = getLoc();
5596 
5597   if (trySkipId("sendmsg", AsmToken::LParen)) {
5598     OperandInfoTy Msg(ID_UNKNOWN_);
5599     OperandInfoTy Op(OP_NONE_);
5600     OperandInfoTy Stream(STREAM_ID_NONE_);
5601     if (parseSendMsgBody(Msg, Op, Stream) &&
5602         validateSendMsg(Msg, Op, Stream, Loc)) {
5603       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5604     } else {
5605       return MatchOperand_ParseFail;
5606     }
5607   } else if (parseExpr(ImmVal)) {
5608     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5609       Error(Loc, "invalid immediate: only 16-bit values are legal");
5610       return MatchOperand_ParseFail;
5611     }
5612   } else {
5613     return MatchOperand_ParseFail;
5614   }
5615 
5616   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5617   return MatchOperand_Success;
5618 }
5619 
5620 bool AMDGPUOperand::isSendMsg() const {
5621   return isImmTy(ImmTySendMsg);
5622 }
5623 
5624 //===----------------------------------------------------------------------===//
5625 // v_interp
5626 //===----------------------------------------------------------------------===//
5627 
5628 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5629   if (getLexer().getKind() != AsmToken::Identifier)
5630     return MatchOperand_NoMatch;
5631 
5632   StringRef Str = Parser.getTok().getString();
5633   int Slot = StringSwitch<int>(Str)
5634     .Case("p10", 0)
5635     .Case("p20", 1)
5636     .Case("p0", 2)
5637     .Default(-1);
5638 
5639   SMLoc S = Parser.getTok().getLoc();
5640   if (Slot == -1)
5641     return MatchOperand_ParseFail;
5642 
5643   Parser.Lex();
5644   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5645                                               AMDGPUOperand::ImmTyInterpSlot));
5646   return MatchOperand_Success;
5647 }
5648 
5649 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5650   if (getLexer().getKind() != AsmToken::Identifier)
5651     return MatchOperand_NoMatch;
5652 
5653   StringRef Str = Parser.getTok().getString();
5654   if (!Str.startswith("attr"))
5655     return MatchOperand_NoMatch;
5656 
5657   StringRef Chan = Str.take_back(2);
5658   int AttrChan = StringSwitch<int>(Chan)
5659     .Case(".x", 0)
5660     .Case(".y", 1)
5661     .Case(".z", 2)
5662     .Case(".w", 3)
5663     .Default(-1);
5664   if (AttrChan == -1)
5665     return MatchOperand_ParseFail;
5666 
5667   Str = Str.drop_back(2).drop_front(4);
5668 
5669   uint8_t Attr;
5670   if (Str.getAsInteger(10, Attr))
5671     return MatchOperand_ParseFail;
5672 
5673   SMLoc S = Parser.getTok().getLoc();
5674   Parser.Lex();
5675   if (Attr > 63) {
5676     Error(S, "out of bounds attr");
5677     return MatchOperand_ParseFail;
5678   }
5679 
5680   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5681 
5682   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5683                                               AMDGPUOperand::ImmTyInterpAttr));
5684   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5685                                               AMDGPUOperand::ImmTyAttrChan));
5686   return MatchOperand_Success;
5687 }
5688 
5689 //===----------------------------------------------------------------------===//
5690 // exp
5691 //===----------------------------------------------------------------------===//
5692 
5693 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5694                                                       uint8_t &Val) {
5695   if (Str == "null") {
5696     Val = 9;
5697     return MatchOperand_Success;
5698   }
5699 
5700   if (Str.startswith("mrt")) {
5701     Str = Str.drop_front(3);
5702     if (Str == "z") { // == mrtz
5703       Val = 8;
5704       return MatchOperand_Success;
5705     }
5706 
5707     if (Str.getAsInteger(10, Val))
5708       return MatchOperand_ParseFail;
5709 
5710     if (Val > 7) {
5711       Error(getLoc(), "invalid exp target");
5712       return MatchOperand_ParseFail;
5713     }
5714 
5715     return MatchOperand_Success;
5716   }
5717 
5718   if (Str.startswith("pos")) {
5719     Str = Str.drop_front(3);
5720     if (Str.getAsInteger(10, Val))
5721       return MatchOperand_ParseFail;
5722 
5723     if (Val > 4 || (Val == 4 && !isGFX10())) {
5724       Error(getLoc(), "invalid exp target");
5725       return MatchOperand_ParseFail;
5726     }
5727 
5728     Val += 12;
5729     return MatchOperand_Success;
5730   }
5731 
5732   if (isGFX10() && Str == "prim") {
5733     Val = 20;
5734     return MatchOperand_Success;
5735   }
5736 
5737   if (Str.startswith("param")) {
5738     Str = Str.drop_front(5);
5739     if (Str.getAsInteger(10, Val))
5740       return MatchOperand_ParseFail;
5741 
5742     if (Val >= 32) {
5743       Error(getLoc(), "invalid exp target");
5744       return MatchOperand_ParseFail;
5745     }
5746 
5747     Val += 32;
5748     return MatchOperand_Success;
5749   }
5750 
5751   if (Str.startswith("invalid_target_")) {
5752     Str = Str.drop_front(15);
5753     if (Str.getAsInteger(10, Val))
5754       return MatchOperand_ParseFail;
5755 
5756     Error(getLoc(), "invalid exp target");
5757     return MatchOperand_ParseFail;
5758   }
5759 
5760   return MatchOperand_NoMatch;
5761 }
5762 
5763 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5764   uint8_t Val;
5765   StringRef Str = Parser.getTok().getString();
5766 
5767   auto Res = parseExpTgtImpl(Str, Val);
5768   if (Res != MatchOperand_Success)
5769     return Res;
5770 
5771   SMLoc S = Parser.getTok().getLoc();
5772   Parser.Lex();
5773 
5774   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5775                                               AMDGPUOperand::ImmTyExpTgt));
5776   return MatchOperand_Success;
5777 }
5778 
5779 //===----------------------------------------------------------------------===//
5780 // parser helpers
5781 //===----------------------------------------------------------------------===//
5782 
5783 bool
5784 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5785   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5786 }
5787 
5788 bool
5789 AMDGPUAsmParser::isId(const StringRef Id) const {
5790   return isId(getToken(), Id);
5791 }
5792 
5793 bool
5794 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5795   return getTokenKind() == Kind;
5796 }
5797 
5798 bool
5799 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5800   if (isId(Id)) {
5801     lex();
5802     return true;
5803   }
5804   return false;
5805 }
5806 
5807 bool
5808 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5809   if (isId(Id) && peekToken().is(Kind)) {
5810     lex();
5811     lex();
5812     return true;
5813   }
5814   return false;
5815 }
5816 
5817 bool
5818 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5819   if (isToken(Kind)) {
5820     lex();
5821     return true;
5822   }
5823   return false;
5824 }
5825 
5826 bool
5827 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5828                            const StringRef ErrMsg) {
5829   if (!trySkipToken(Kind)) {
5830     Error(getLoc(), ErrMsg);
5831     return false;
5832   }
5833   return true;
5834 }
5835 
5836 bool
5837 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5838   return !getParser().parseAbsoluteExpression(Imm);
5839 }
5840 
5841 bool
5842 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5843   SMLoc S = getLoc();
5844 
5845   const MCExpr *Expr;
5846   if (Parser.parseExpression(Expr))
5847     return false;
5848 
5849   int64_t IntVal;
5850   if (Expr->evaluateAsAbsolute(IntVal)) {
5851     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5852   } else {
5853     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5854   }
5855   return true;
5856 }
5857 
5858 bool
5859 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5860   if (isToken(AsmToken::String)) {
5861     Val = getToken().getStringContents();
5862     lex();
5863     return true;
5864   } else {
5865     Error(getLoc(), ErrMsg);
5866     return false;
5867   }
5868 }
5869 
5870 bool
5871 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
5872   if (isToken(AsmToken::Identifier)) {
5873     Val = getTokenStr();
5874     lex();
5875     return true;
5876   } else {
5877     Error(getLoc(), ErrMsg);
5878     return false;
5879   }
5880 }
5881 
5882 AsmToken
5883 AMDGPUAsmParser::getToken() const {
5884   return Parser.getTok();
5885 }
5886 
5887 AsmToken
5888 AMDGPUAsmParser::peekToken() {
5889   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
5890 }
5891 
5892 void
5893 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5894   auto TokCount = getLexer().peekTokens(Tokens);
5895 
5896   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5897     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5898 }
5899 
5900 AsmToken::TokenKind
5901 AMDGPUAsmParser::getTokenKind() const {
5902   return getLexer().getKind();
5903 }
5904 
5905 SMLoc
5906 AMDGPUAsmParser::getLoc() const {
5907   return getToken().getLoc();
5908 }
5909 
5910 StringRef
5911 AMDGPUAsmParser::getTokenStr() const {
5912   return getToken().getString();
5913 }
5914 
5915 void
5916 AMDGPUAsmParser::lex() {
5917   Parser.Lex();
5918 }
5919 
5920 //===----------------------------------------------------------------------===//
5921 // swizzle
5922 //===----------------------------------------------------------------------===//
5923 
5924 LLVM_READNONE
5925 static unsigned
5926 encodeBitmaskPerm(const unsigned AndMask,
5927                   const unsigned OrMask,
5928                   const unsigned XorMask) {
5929   using namespace llvm::AMDGPU::Swizzle;
5930 
5931   return BITMASK_PERM_ENC |
5932          (AndMask << BITMASK_AND_SHIFT) |
5933          (OrMask  << BITMASK_OR_SHIFT)  |
5934          (XorMask << BITMASK_XOR_SHIFT);
5935 }
5936 
5937 bool
5938 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5939                                       const unsigned MinVal,
5940                                       const unsigned MaxVal,
5941                                       const StringRef ErrMsg) {
5942   for (unsigned i = 0; i < OpNum; ++i) {
5943     if (!skipToken(AsmToken::Comma, "expected a comma")){
5944       return false;
5945     }
5946     SMLoc ExprLoc = Parser.getTok().getLoc();
5947     if (!parseExpr(Op[i])) {
5948       return false;
5949     }
5950     if (Op[i] < MinVal || Op[i] > MaxVal) {
5951       Error(ExprLoc, ErrMsg);
5952       return false;
5953     }
5954   }
5955 
5956   return true;
5957 }
5958 
5959 bool
5960 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5961   using namespace llvm::AMDGPU::Swizzle;
5962 
5963   int64_t Lane[LANE_NUM];
5964   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5965                            "expected a 2-bit lane id")) {
5966     Imm = QUAD_PERM_ENC;
5967     for (unsigned I = 0; I < LANE_NUM; ++I) {
5968       Imm |= Lane[I] << (LANE_SHIFT * I);
5969     }
5970     return true;
5971   }
5972   return false;
5973 }
5974 
5975 bool
5976 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5977   using namespace llvm::AMDGPU::Swizzle;
5978 
5979   SMLoc S = Parser.getTok().getLoc();
5980   int64_t GroupSize;
5981   int64_t LaneIdx;
5982 
5983   if (!parseSwizzleOperands(1, &GroupSize,
5984                             2, 32,
5985                             "group size must be in the interval [2,32]")) {
5986     return false;
5987   }
5988   if (!isPowerOf2_64(GroupSize)) {
5989     Error(S, "group size must be a power of two");
5990     return false;
5991   }
5992   if (parseSwizzleOperands(1, &LaneIdx,
5993                            0, GroupSize - 1,
5994                            "lane id must be in the interval [0,group size - 1]")) {
5995     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5996     return true;
5997   }
5998   return false;
5999 }
6000 
6001 bool
6002 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6003   using namespace llvm::AMDGPU::Swizzle;
6004 
6005   SMLoc S = Parser.getTok().getLoc();
6006   int64_t GroupSize;
6007 
6008   if (!parseSwizzleOperands(1, &GroupSize,
6009       2, 32, "group size must be in the interval [2,32]")) {
6010     return false;
6011   }
6012   if (!isPowerOf2_64(GroupSize)) {
6013     Error(S, "group size must be a power of two");
6014     return false;
6015   }
6016 
6017   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6018   return true;
6019 }
6020 
6021 bool
6022 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6023   using namespace llvm::AMDGPU::Swizzle;
6024 
6025   SMLoc S = Parser.getTok().getLoc();
6026   int64_t GroupSize;
6027 
6028   if (!parseSwizzleOperands(1, &GroupSize,
6029       1, 16, "group size must be in the interval [1,16]")) {
6030     return false;
6031   }
6032   if (!isPowerOf2_64(GroupSize)) {
6033     Error(S, "group size must be a power of two");
6034     return false;
6035   }
6036 
6037   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6038   return true;
6039 }
6040 
6041 bool
6042 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6043   using namespace llvm::AMDGPU::Swizzle;
6044 
6045   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6046     return false;
6047   }
6048 
6049   StringRef Ctl;
6050   SMLoc StrLoc = Parser.getTok().getLoc();
6051   if (!parseString(Ctl)) {
6052     return false;
6053   }
6054   if (Ctl.size() != BITMASK_WIDTH) {
6055     Error(StrLoc, "expected a 5-character mask");
6056     return false;
6057   }
6058 
6059   unsigned AndMask = 0;
6060   unsigned OrMask = 0;
6061   unsigned XorMask = 0;
6062 
6063   for (size_t i = 0; i < Ctl.size(); ++i) {
6064     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6065     switch(Ctl[i]) {
6066     default:
6067       Error(StrLoc, "invalid mask");
6068       return false;
6069     case '0':
6070       break;
6071     case '1':
6072       OrMask |= Mask;
6073       break;
6074     case 'p':
6075       AndMask |= Mask;
6076       break;
6077     case 'i':
6078       AndMask |= Mask;
6079       XorMask |= Mask;
6080       break;
6081     }
6082   }
6083 
6084   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6085   return true;
6086 }
6087 
6088 bool
6089 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6090 
6091   SMLoc OffsetLoc = Parser.getTok().getLoc();
6092 
6093   if (!parseExpr(Imm)) {
6094     return false;
6095   }
6096   if (!isUInt<16>(Imm)) {
6097     Error(OffsetLoc, "expected a 16-bit offset");
6098     return false;
6099   }
6100   return true;
6101 }
6102 
6103 bool
6104 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6105   using namespace llvm::AMDGPU::Swizzle;
6106 
6107   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6108 
6109     SMLoc ModeLoc = Parser.getTok().getLoc();
6110     bool Ok = false;
6111 
6112     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6113       Ok = parseSwizzleQuadPerm(Imm);
6114     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6115       Ok = parseSwizzleBitmaskPerm(Imm);
6116     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6117       Ok = parseSwizzleBroadcast(Imm);
6118     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6119       Ok = parseSwizzleSwap(Imm);
6120     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6121       Ok = parseSwizzleReverse(Imm);
6122     } else {
6123       Error(ModeLoc, "expected a swizzle mode");
6124     }
6125 
6126     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6127   }
6128 
6129   return false;
6130 }
6131 
6132 OperandMatchResultTy
6133 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6134   SMLoc S = Parser.getTok().getLoc();
6135   int64_t Imm = 0;
6136 
6137   if (trySkipId("offset")) {
6138 
6139     bool Ok = false;
6140     if (skipToken(AsmToken::Colon, "expected a colon")) {
6141       if (trySkipId("swizzle")) {
6142         Ok = parseSwizzleMacro(Imm);
6143       } else {
6144         Ok = parseSwizzleOffset(Imm);
6145       }
6146     }
6147 
6148     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6149 
6150     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6151   } else {
6152     // Swizzle "offset" operand is optional.
6153     // If it is omitted, try parsing other optional operands.
6154     return parseOptionalOpr(Operands);
6155   }
6156 }
6157 
6158 bool
6159 AMDGPUOperand::isSwizzle() const {
6160   return isImmTy(ImmTySwizzle);
6161 }
6162 
6163 //===----------------------------------------------------------------------===//
6164 // VGPR Index Mode
6165 //===----------------------------------------------------------------------===//
6166 
6167 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6168 
6169   using namespace llvm::AMDGPU::VGPRIndexMode;
6170 
6171   if (trySkipToken(AsmToken::RParen)) {
6172     return OFF;
6173   }
6174 
6175   int64_t Imm = 0;
6176 
6177   while (true) {
6178     unsigned Mode = 0;
6179     SMLoc S = Parser.getTok().getLoc();
6180 
6181     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6182       if (trySkipId(IdSymbolic[ModeId])) {
6183         Mode = 1 << ModeId;
6184         break;
6185       }
6186     }
6187 
6188     if (Mode == 0) {
6189       Error(S, (Imm == 0)?
6190                "expected a VGPR index mode or a closing parenthesis" :
6191                "expected a VGPR index mode");
6192       return UNDEF;
6193     }
6194 
6195     if (Imm & Mode) {
6196       Error(S, "duplicate VGPR index mode");
6197       return UNDEF;
6198     }
6199     Imm |= Mode;
6200 
6201     if (trySkipToken(AsmToken::RParen))
6202       break;
6203     if (!skipToken(AsmToken::Comma,
6204                    "expected a comma or a closing parenthesis"))
6205       return UNDEF;
6206   }
6207 
6208   return Imm;
6209 }
6210 
6211 OperandMatchResultTy
6212 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6213 
6214   using namespace llvm::AMDGPU::VGPRIndexMode;
6215 
6216   int64_t Imm = 0;
6217   SMLoc S = Parser.getTok().getLoc();
6218 
6219   if (getLexer().getKind() == AsmToken::Identifier &&
6220       Parser.getTok().getString() == "gpr_idx" &&
6221       getLexer().peekTok().is(AsmToken::LParen)) {
6222 
6223     Parser.Lex();
6224     Parser.Lex();
6225 
6226     Imm = parseGPRIdxMacro();
6227     if (Imm == UNDEF)
6228       return MatchOperand_ParseFail;
6229 
6230   } else {
6231     if (getParser().parseAbsoluteExpression(Imm))
6232       return MatchOperand_ParseFail;
6233     if (Imm < 0 || !isUInt<4>(Imm)) {
6234       Error(S, "invalid immediate: only 4-bit values are legal");
6235       return MatchOperand_ParseFail;
6236     }
6237   }
6238 
6239   Operands.push_back(
6240       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6241   return MatchOperand_Success;
6242 }
6243 
6244 bool AMDGPUOperand::isGPRIdxMode() const {
6245   return isImmTy(ImmTyGprIdxMode);
6246 }
6247 
6248 //===----------------------------------------------------------------------===//
6249 // sopp branch targets
6250 //===----------------------------------------------------------------------===//
6251 
6252 OperandMatchResultTy
6253 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6254 
6255   // Make sure we are not parsing something
6256   // that looks like a label or an expression but is not.
6257   // This will improve error messages.
6258   if (isRegister() || isModifier())
6259     return MatchOperand_NoMatch;
6260 
6261   if (!parseExpr(Operands))
6262     return MatchOperand_ParseFail;
6263 
6264   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6265   assert(Opr.isImm() || Opr.isExpr());
6266   SMLoc Loc = Opr.getStartLoc();
6267 
6268   // Currently we do not support arbitrary expressions as branch targets.
6269   // Only labels and absolute expressions are accepted.
6270   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6271     Error(Loc, "expected an absolute expression or a label");
6272   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6273     Error(Loc, "expected a 16-bit signed jump offset");
6274   }
6275 
6276   return MatchOperand_Success;
6277 }
6278 
6279 //===----------------------------------------------------------------------===//
6280 // Boolean holding registers
6281 //===----------------------------------------------------------------------===//
6282 
6283 OperandMatchResultTy
6284 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6285   return parseReg(Operands);
6286 }
6287 
6288 //===----------------------------------------------------------------------===//
6289 // mubuf
6290 //===----------------------------------------------------------------------===//
6291 
6292 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
6293   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
6294 }
6295 
6296 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
6297   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
6298 }
6299 
6300 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
6301   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
6302 }
6303 
6304 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6305                                const OperandVector &Operands,
6306                                bool IsAtomic,
6307                                bool IsAtomicReturn,
6308                                bool IsLds) {
6309   bool IsLdsOpcode = IsLds;
6310   bool HasLdsModifier = false;
6311   OptionalImmIndexMap OptionalIdx;
6312   assert(IsAtomicReturn ? IsAtomic : true);
6313   unsigned FirstOperandIdx = 1;
6314 
6315   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6316     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6317 
6318     // Add the register arguments
6319     if (Op.isReg()) {
6320       Op.addRegOperands(Inst, 1);
6321       // Insert a tied src for atomic return dst.
6322       // This cannot be postponed as subsequent calls to
6323       // addImmOperands rely on correct number of MC operands.
6324       if (IsAtomicReturn && i == FirstOperandIdx)
6325         Op.addRegOperands(Inst, 1);
6326       continue;
6327     }
6328 
6329     // Handle the case where soffset is an immediate
6330     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6331       Op.addImmOperands(Inst, 1);
6332       continue;
6333     }
6334 
6335     HasLdsModifier |= Op.isLDS();
6336 
6337     // Handle tokens like 'offen' which are sometimes hard-coded into the
6338     // asm string.  There are no MCInst operands for these.
6339     if (Op.isToken()) {
6340       continue;
6341     }
6342     assert(Op.isImm());
6343 
6344     // Handle optional arguments
6345     OptionalIdx[Op.getImmTy()] = i;
6346   }
6347 
6348   // This is a workaround for an llvm quirk which may result in an
6349   // incorrect instruction selection. Lds and non-lds versions of
6350   // MUBUF instructions are identical except that lds versions
6351   // have mandatory 'lds' modifier. However this modifier follows
6352   // optional modifiers and llvm asm matcher regards this 'lds'
6353   // modifier as an optional one. As a result, an lds version
6354   // of opcode may be selected even if it has no 'lds' modifier.
6355   if (IsLdsOpcode && !HasLdsModifier) {
6356     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6357     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6358       Inst.setOpcode(NoLdsOpcode);
6359       IsLdsOpcode = false;
6360     }
6361   }
6362 
6363   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6364   if (!IsAtomic) { // glc is hard-coded.
6365     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6366   }
6367   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6368 
6369   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6370     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6371   }
6372 
6373   if (isGFX10())
6374     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6375 }
6376 
6377 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6378   OptionalImmIndexMap OptionalIdx;
6379 
6380   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6381     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6382 
6383     // Add the register arguments
6384     if (Op.isReg()) {
6385       Op.addRegOperands(Inst, 1);
6386       continue;
6387     }
6388 
6389     // Handle the case where soffset is an immediate
6390     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6391       Op.addImmOperands(Inst, 1);
6392       continue;
6393     }
6394 
6395     // Handle tokens like 'offen' which are sometimes hard-coded into the
6396     // asm string.  There are no MCInst operands for these.
6397     if (Op.isToken()) {
6398       continue;
6399     }
6400     assert(Op.isImm());
6401 
6402     // Handle optional arguments
6403     OptionalIdx[Op.getImmTy()] = i;
6404   }
6405 
6406   addOptionalImmOperand(Inst, Operands, OptionalIdx,
6407                         AMDGPUOperand::ImmTyOffset);
6408   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6409   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6410   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6411   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6412 
6413   if (isGFX10())
6414     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6415 }
6416 
6417 //===----------------------------------------------------------------------===//
6418 // mimg
6419 //===----------------------------------------------------------------------===//
6420 
6421 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6422                               bool IsAtomic) {
6423   unsigned I = 1;
6424   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6425   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6426     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6427   }
6428 
6429   if (IsAtomic) {
6430     // Add src, same as dst
6431     assert(Desc.getNumDefs() == 1);
6432     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6433   }
6434 
6435   OptionalImmIndexMap OptionalIdx;
6436 
6437   for (unsigned E = Operands.size(); I != E; ++I) {
6438     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6439 
6440     // Add the register arguments
6441     if (Op.isReg()) {
6442       Op.addRegOperands(Inst, 1);
6443     } else if (Op.isImmModifier()) {
6444       OptionalIdx[Op.getImmTy()] = I;
6445     } else if (!Op.isToken()) {
6446       llvm_unreachable("unexpected operand type");
6447     }
6448   }
6449 
6450   bool IsGFX10 = isGFX10();
6451 
6452   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6453   if (IsGFX10)
6454     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6455   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6456   if (IsGFX10)
6457     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6458   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6459   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6460   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6461   if (IsGFX10)
6462     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6463   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6464   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6465   if (!IsGFX10)
6466     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6467   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6468 }
6469 
6470 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6471   cvtMIMG(Inst, Operands, true);
6472 }
6473 
6474 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
6475                                       const OperandVector &Operands) {
6476   for (unsigned I = 1; I < Operands.size(); ++I) {
6477     auto &Operand = (AMDGPUOperand &)*Operands[I];
6478     if (Operand.isReg())
6479       Operand.addRegOperands(Inst, 1);
6480   }
6481 
6482   Inst.addOperand(MCOperand::createImm(1)); // a16
6483 }
6484 
6485 //===----------------------------------------------------------------------===//
6486 // smrd
6487 //===----------------------------------------------------------------------===//
6488 
6489 bool AMDGPUOperand::isSMRDOffset8() const {
6490   return isImm() && isUInt<8>(getImm());
6491 }
6492 
6493 bool AMDGPUOperand::isSMEMOffset() const {
6494   return isImm(); // Offset range is checked later by validator.
6495 }
6496 
6497 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6498   // 32-bit literals are only supported on CI and we only want to use them
6499   // when the offset is > 8-bits.
6500   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6501 }
6502 
6503 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6504   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6505 }
6506 
6507 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
6508   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6509 }
6510 
6511 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6512   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6513 }
6514 
6515 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6516   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6517 }
6518 
6519 //===----------------------------------------------------------------------===//
6520 // vop3
6521 //===----------------------------------------------------------------------===//
6522 
6523 static bool ConvertOmodMul(int64_t &Mul) {
6524   if (Mul != 1 && Mul != 2 && Mul != 4)
6525     return false;
6526 
6527   Mul >>= 1;
6528   return true;
6529 }
6530 
6531 static bool ConvertOmodDiv(int64_t &Div) {
6532   if (Div == 1) {
6533     Div = 0;
6534     return true;
6535   }
6536 
6537   if (Div == 2) {
6538     Div = 3;
6539     return true;
6540   }
6541 
6542   return false;
6543 }
6544 
6545 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6546   if (BoundCtrl == 0) {
6547     BoundCtrl = 1;
6548     return true;
6549   }
6550 
6551   if (BoundCtrl == -1) {
6552     BoundCtrl = 0;
6553     return true;
6554   }
6555 
6556   return false;
6557 }
6558 
6559 // Note: the order in this table matches the order of operands in AsmString.
6560 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6561   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6562   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6563   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6564   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6565   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6566   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6567   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6568   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6569   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6570   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6571   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6572   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6573   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6574   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6575   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6576   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6577   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6578   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6579   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6580   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6581   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6582   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6583   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6584   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6585   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6586   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6587   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6588   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6589   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6590   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6591   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6592   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6593   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6594   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6595   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6596   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6597   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6598   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6599   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6600   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6601   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6602   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6603   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6604 };
6605 
6606 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6607 
6608   OperandMatchResultTy res = parseOptionalOpr(Operands);
6609 
6610   // This is a hack to enable hardcoded mandatory operands which follow
6611   // optional operands.
6612   //
6613   // Current design assumes that all operands after the first optional operand
6614   // are also optional. However implementation of some instructions violates
6615   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6616   //
6617   // To alleviate this problem, we have to (implicitly) parse extra operands
6618   // to make sure autogenerated parser of custom operands never hit hardcoded
6619   // mandatory operands.
6620 
6621   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6622     if (res != MatchOperand_Success ||
6623         isToken(AsmToken::EndOfStatement))
6624       break;
6625 
6626     trySkipToken(AsmToken::Comma);
6627     res = parseOptionalOpr(Operands);
6628   }
6629 
6630   return res;
6631 }
6632 
6633 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6634   OperandMatchResultTy res;
6635   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6636     // try to parse any optional operand here
6637     if (Op.IsBit) {
6638       res = parseNamedBit(Op.Name, Operands, Op.Type);
6639     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6640       res = parseOModOperand(Operands);
6641     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6642                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6643                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6644       res = parseSDWASel(Operands, Op.Name, Op.Type);
6645     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6646       res = parseSDWADstUnused(Operands);
6647     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6648                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6649                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6650                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6651       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6652                                         Op.ConvertResult);
6653     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6654       res = parseDim(Operands);
6655     } else {
6656       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6657     }
6658     if (res != MatchOperand_NoMatch) {
6659       return res;
6660     }
6661   }
6662   return MatchOperand_NoMatch;
6663 }
6664 
6665 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6666   StringRef Name = Parser.getTok().getString();
6667   if (Name == "mul") {
6668     return parseIntWithPrefix("mul", Operands,
6669                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6670   }
6671 
6672   if (Name == "div") {
6673     return parseIntWithPrefix("div", Operands,
6674                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6675   }
6676 
6677   return MatchOperand_NoMatch;
6678 }
6679 
6680 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6681   cvtVOP3P(Inst, Operands);
6682 
6683   int Opc = Inst.getOpcode();
6684 
6685   int SrcNum;
6686   const int Ops[] = { AMDGPU::OpName::src0,
6687                       AMDGPU::OpName::src1,
6688                       AMDGPU::OpName::src2 };
6689   for (SrcNum = 0;
6690        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6691        ++SrcNum);
6692   assert(SrcNum > 0);
6693 
6694   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6695   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6696 
6697   if ((OpSel & (1 << SrcNum)) != 0) {
6698     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6699     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6700     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6701   }
6702 }
6703 
6704 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6705       // 1. This operand is input modifiers
6706   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6707       // 2. This is not last operand
6708       && Desc.NumOperands > (OpNum + 1)
6709       // 3. Next operand is register class
6710       && Desc.OpInfo[OpNum + 1].RegClass != -1
6711       // 4. Next register is not tied to any other operand
6712       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6713 }
6714 
6715 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6716 {
6717   OptionalImmIndexMap OptionalIdx;
6718   unsigned Opc = Inst.getOpcode();
6719 
6720   unsigned I = 1;
6721   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6722   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6723     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6724   }
6725 
6726   for (unsigned E = Operands.size(); I != E; ++I) {
6727     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6728     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6729       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6730     } else if (Op.isInterpSlot() ||
6731                Op.isInterpAttr() ||
6732                Op.isAttrChan()) {
6733       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6734     } else if (Op.isImmModifier()) {
6735       OptionalIdx[Op.getImmTy()] = I;
6736     } else {
6737       llvm_unreachable("unhandled operand type");
6738     }
6739   }
6740 
6741   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6742     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6743   }
6744 
6745   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6746     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6747   }
6748 
6749   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6750     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6751   }
6752 }
6753 
6754 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6755                               OptionalImmIndexMap &OptionalIdx) {
6756   unsigned Opc = Inst.getOpcode();
6757 
6758   unsigned I = 1;
6759   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6760   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6761     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6762   }
6763 
6764   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6765     // This instruction has src modifiers
6766     for (unsigned E = Operands.size(); I != E; ++I) {
6767       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6768       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6769         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6770       } else if (Op.isImmModifier()) {
6771         OptionalIdx[Op.getImmTy()] = I;
6772       } else if (Op.isRegOrImm()) {
6773         Op.addRegOrImmOperands(Inst, 1);
6774       } else {
6775         llvm_unreachable("unhandled operand type");
6776       }
6777     }
6778   } else {
6779     // No src modifiers
6780     for (unsigned E = Operands.size(); I != E; ++I) {
6781       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6782       if (Op.isMod()) {
6783         OptionalIdx[Op.getImmTy()] = I;
6784       } else {
6785         Op.addRegOrImmOperands(Inst, 1);
6786       }
6787     }
6788   }
6789 
6790   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6791     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6792   }
6793 
6794   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6795     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6796   }
6797 
6798   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6799   // it has src2 register operand that is tied to dst operand
6800   // we don't allow modifiers for this operand in assembler so src2_modifiers
6801   // should be 0.
6802   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6803       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6804       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6805       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6806       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6807       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6808       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6809     auto it = Inst.begin();
6810     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6811     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6812     ++it;
6813     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6814   }
6815 }
6816 
6817 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6818   OptionalImmIndexMap OptionalIdx;
6819   cvtVOP3(Inst, Operands, OptionalIdx);
6820 }
6821 
6822 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6823                                const OperandVector &Operands) {
6824   OptionalImmIndexMap OptIdx;
6825   const int Opc = Inst.getOpcode();
6826   const MCInstrDesc &Desc = MII.get(Opc);
6827 
6828   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6829 
6830   cvtVOP3(Inst, Operands, OptIdx);
6831 
6832   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6833     assert(!IsPacked);
6834     Inst.addOperand(Inst.getOperand(0));
6835   }
6836 
6837   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6838   // instruction, and then figure out where to actually put the modifiers
6839 
6840   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6841 
6842   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6843   if (OpSelHiIdx != -1) {
6844     int DefaultVal = IsPacked ? -1 : 0;
6845     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6846                           DefaultVal);
6847   }
6848 
6849   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6850   if (NegLoIdx != -1) {
6851     assert(IsPacked);
6852     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6853     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6854   }
6855 
6856   const int Ops[] = { AMDGPU::OpName::src0,
6857                       AMDGPU::OpName::src1,
6858                       AMDGPU::OpName::src2 };
6859   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6860                          AMDGPU::OpName::src1_modifiers,
6861                          AMDGPU::OpName::src2_modifiers };
6862 
6863   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6864 
6865   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6866   unsigned OpSelHi = 0;
6867   unsigned NegLo = 0;
6868   unsigned NegHi = 0;
6869 
6870   if (OpSelHiIdx != -1) {
6871     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6872   }
6873 
6874   if (NegLoIdx != -1) {
6875     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6876     NegLo = Inst.getOperand(NegLoIdx).getImm();
6877     NegHi = Inst.getOperand(NegHiIdx).getImm();
6878   }
6879 
6880   for (int J = 0; J < 3; ++J) {
6881     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6882     if (OpIdx == -1)
6883       break;
6884 
6885     uint32_t ModVal = 0;
6886 
6887     if ((OpSel & (1 << J)) != 0)
6888       ModVal |= SISrcMods::OP_SEL_0;
6889 
6890     if ((OpSelHi & (1 << J)) != 0)
6891       ModVal |= SISrcMods::OP_SEL_1;
6892 
6893     if ((NegLo & (1 << J)) != 0)
6894       ModVal |= SISrcMods::NEG;
6895 
6896     if ((NegHi & (1 << J)) != 0)
6897       ModVal |= SISrcMods::NEG_HI;
6898 
6899     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6900 
6901     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6902   }
6903 }
6904 
6905 //===----------------------------------------------------------------------===//
6906 // dpp
6907 //===----------------------------------------------------------------------===//
6908 
6909 bool AMDGPUOperand::isDPP8() const {
6910   return isImmTy(ImmTyDPP8);
6911 }
6912 
6913 bool AMDGPUOperand::isDPPCtrl() const {
6914   using namespace AMDGPU::DPP;
6915 
6916   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6917   if (result) {
6918     int64_t Imm = getImm();
6919     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6920            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6921            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6922            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6923            (Imm == DppCtrl::WAVE_SHL1) ||
6924            (Imm == DppCtrl::WAVE_ROL1) ||
6925            (Imm == DppCtrl::WAVE_SHR1) ||
6926            (Imm == DppCtrl::WAVE_ROR1) ||
6927            (Imm == DppCtrl::ROW_MIRROR) ||
6928            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6929            (Imm == DppCtrl::BCAST15) ||
6930            (Imm == DppCtrl::BCAST31) ||
6931            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6932            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6933   }
6934   return false;
6935 }
6936 
6937 //===----------------------------------------------------------------------===//
6938 // mAI
6939 //===----------------------------------------------------------------------===//
6940 
6941 bool AMDGPUOperand::isBLGP() const {
6942   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6943 }
6944 
6945 bool AMDGPUOperand::isCBSZ() const {
6946   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6947 }
6948 
6949 bool AMDGPUOperand::isABID() const {
6950   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6951 }
6952 
6953 bool AMDGPUOperand::isS16Imm() const {
6954   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6955 }
6956 
6957 bool AMDGPUOperand::isU16Imm() const {
6958   return isImm() && isUInt<16>(getImm());
6959 }
6960 
6961 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6962   if (!isGFX10())
6963     return MatchOperand_NoMatch;
6964 
6965   SMLoc S = Parser.getTok().getLoc();
6966 
6967   if (getLexer().isNot(AsmToken::Identifier))
6968     return MatchOperand_NoMatch;
6969   if (getLexer().getTok().getString() != "dim")
6970     return MatchOperand_NoMatch;
6971 
6972   Parser.Lex();
6973   if (getLexer().isNot(AsmToken::Colon))
6974     return MatchOperand_ParseFail;
6975 
6976   Parser.Lex();
6977 
6978   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6979   // integer.
6980   std::string Token;
6981   if (getLexer().is(AsmToken::Integer)) {
6982     SMLoc Loc = getLexer().getTok().getEndLoc();
6983     Token = std::string(getLexer().getTok().getString());
6984     Parser.Lex();
6985     if (getLexer().getTok().getLoc() != Loc)
6986       return MatchOperand_ParseFail;
6987   }
6988   if (getLexer().isNot(AsmToken::Identifier))
6989     return MatchOperand_ParseFail;
6990   Token += getLexer().getTok().getString();
6991 
6992   StringRef DimId = Token;
6993   if (DimId.startswith("SQ_RSRC_IMG_"))
6994     DimId = DimId.substr(12);
6995 
6996   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6997   if (!DimInfo)
6998     return MatchOperand_ParseFail;
6999 
7000   Parser.Lex();
7001 
7002   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
7003                                               AMDGPUOperand::ImmTyDim));
7004   return MatchOperand_Success;
7005 }
7006 
7007 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7008   SMLoc S = Parser.getTok().getLoc();
7009   StringRef Prefix;
7010 
7011   if (getLexer().getKind() == AsmToken::Identifier) {
7012     Prefix = Parser.getTok().getString();
7013   } else {
7014     return MatchOperand_NoMatch;
7015   }
7016 
7017   if (Prefix != "dpp8")
7018     return parseDPPCtrl(Operands);
7019   if (!isGFX10())
7020     return MatchOperand_NoMatch;
7021 
7022   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7023 
7024   int64_t Sels[8];
7025 
7026   Parser.Lex();
7027   if (getLexer().isNot(AsmToken::Colon))
7028     return MatchOperand_ParseFail;
7029 
7030   Parser.Lex();
7031   if (getLexer().isNot(AsmToken::LBrac))
7032     return MatchOperand_ParseFail;
7033 
7034   Parser.Lex();
7035   if (getParser().parseAbsoluteExpression(Sels[0]))
7036     return MatchOperand_ParseFail;
7037   if (0 > Sels[0] || 7 < Sels[0])
7038     return MatchOperand_ParseFail;
7039 
7040   for (size_t i = 1; i < 8; ++i) {
7041     if (getLexer().isNot(AsmToken::Comma))
7042       return MatchOperand_ParseFail;
7043 
7044     Parser.Lex();
7045     if (getParser().parseAbsoluteExpression(Sels[i]))
7046       return MatchOperand_ParseFail;
7047     if (0 > Sels[i] || 7 < Sels[i])
7048       return MatchOperand_ParseFail;
7049   }
7050 
7051   if (getLexer().isNot(AsmToken::RBrac))
7052     return MatchOperand_ParseFail;
7053   Parser.Lex();
7054 
7055   unsigned DPP8 = 0;
7056   for (size_t i = 0; i < 8; ++i)
7057     DPP8 |= (Sels[i] << (i * 3));
7058 
7059   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7060   return MatchOperand_Success;
7061 }
7062 
7063 OperandMatchResultTy
7064 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7065   using namespace AMDGPU::DPP;
7066 
7067   SMLoc S = Parser.getTok().getLoc();
7068   StringRef Prefix;
7069   int64_t Int;
7070 
7071   if (getLexer().getKind() == AsmToken::Identifier) {
7072     Prefix = Parser.getTok().getString();
7073   } else {
7074     return MatchOperand_NoMatch;
7075   }
7076 
7077   if (Prefix == "row_mirror") {
7078     Int = DppCtrl::ROW_MIRROR;
7079     Parser.Lex();
7080   } else if (Prefix == "row_half_mirror") {
7081     Int = DppCtrl::ROW_HALF_MIRROR;
7082     Parser.Lex();
7083   } else {
7084     // Check to prevent parseDPPCtrlOps from eating invalid tokens
7085     if (Prefix != "quad_perm"
7086         && Prefix != "row_shl"
7087         && Prefix != "row_shr"
7088         && Prefix != "row_ror"
7089         && Prefix != "wave_shl"
7090         && Prefix != "wave_rol"
7091         && Prefix != "wave_shr"
7092         && Prefix != "wave_ror"
7093         && Prefix != "row_bcast"
7094         && Prefix != "row_share"
7095         && Prefix != "row_xmask") {
7096       return MatchOperand_NoMatch;
7097     }
7098 
7099     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
7100       return MatchOperand_NoMatch;
7101 
7102     if (!isVI() && !isGFX9() &&
7103         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
7104          Prefix == "wave_rol" || Prefix == "wave_ror" ||
7105          Prefix == "row_bcast"))
7106       return MatchOperand_NoMatch;
7107 
7108     Parser.Lex();
7109     if (getLexer().isNot(AsmToken::Colon))
7110       return MatchOperand_ParseFail;
7111 
7112     if (Prefix == "quad_perm") {
7113       // quad_perm:[%d,%d,%d,%d]
7114       Parser.Lex();
7115       if (getLexer().isNot(AsmToken::LBrac))
7116         return MatchOperand_ParseFail;
7117       Parser.Lex();
7118 
7119       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
7120         return MatchOperand_ParseFail;
7121 
7122       for (int i = 0; i < 3; ++i) {
7123         if (getLexer().isNot(AsmToken::Comma))
7124           return MatchOperand_ParseFail;
7125         Parser.Lex();
7126 
7127         int64_t Temp;
7128         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
7129           return MatchOperand_ParseFail;
7130         const int shift = i*2 + 2;
7131         Int += (Temp << shift);
7132       }
7133 
7134       if (getLexer().isNot(AsmToken::RBrac))
7135         return MatchOperand_ParseFail;
7136       Parser.Lex();
7137     } else {
7138       // sel:%d
7139       Parser.Lex();
7140       if (getParser().parseAbsoluteExpression(Int))
7141         return MatchOperand_ParseFail;
7142 
7143       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
7144         Int |= DppCtrl::ROW_SHL0;
7145       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
7146         Int |= DppCtrl::ROW_SHR0;
7147       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
7148         Int |= DppCtrl::ROW_ROR0;
7149       } else if (Prefix == "wave_shl" && 1 == Int) {
7150         Int = DppCtrl::WAVE_SHL1;
7151       } else if (Prefix == "wave_rol" && 1 == Int) {
7152         Int = DppCtrl::WAVE_ROL1;
7153       } else if (Prefix == "wave_shr" && 1 == Int) {
7154         Int = DppCtrl::WAVE_SHR1;
7155       } else if (Prefix == "wave_ror" && 1 == Int) {
7156         Int = DppCtrl::WAVE_ROR1;
7157       } else if (Prefix == "row_bcast") {
7158         if (Int == 15) {
7159           Int = DppCtrl::BCAST15;
7160         } else if (Int == 31) {
7161           Int = DppCtrl::BCAST31;
7162         } else {
7163           return MatchOperand_ParseFail;
7164         }
7165       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
7166         Int |= DppCtrl::ROW_SHARE_FIRST;
7167       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
7168         Int |= DppCtrl::ROW_XMASK_FIRST;
7169       } else {
7170         return MatchOperand_ParseFail;
7171       }
7172     }
7173   }
7174 
7175   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
7176   return MatchOperand_Success;
7177 }
7178 
7179 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7180   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7181 }
7182 
7183 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7184   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7185 }
7186 
7187 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7188   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7189 }
7190 
7191 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7192   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7193 }
7194 
7195 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7196   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7197 }
7198 
7199 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7200   OptionalImmIndexMap OptionalIdx;
7201 
7202   unsigned I = 1;
7203   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7204   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7205     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7206   }
7207 
7208   int Fi = 0;
7209   for (unsigned E = Operands.size(); I != E; ++I) {
7210     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7211                                             MCOI::TIED_TO);
7212     if (TiedTo != -1) {
7213       assert((unsigned)TiedTo < Inst.getNumOperands());
7214       // handle tied old or src2 for MAC instructions
7215       Inst.addOperand(Inst.getOperand(TiedTo));
7216     }
7217     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7218     // Add the register arguments
7219     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7220       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7221       // Skip it.
7222       continue;
7223     }
7224 
7225     if (IsDPP8) {
7226       if (Op.isDPP8()) {
7227         Op.addImmOperands(Inst, 1);
7228       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7229         Op.addRegWithFPInputModsOperands(Inst, 2);
7230       } else if (Op.isFI()) {
7231         Fi = Op.getImm();
7232       } else if (Op.isReg()) {
7233         Op.addRegOperands(Inst, 1);
7234       } else {
7235         llvm_unreachable("Invalid operand type");
7236       }
7237     } else {
7238       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7239         Op.addRegWithFPInputModsOperands(Inst, 2);
7240       } else if (Op.isDPPCtrl()) {
7241         Op.addImmOperands(Inst, 1);
7242       } else if (Op.isImm()) {
7243         // Handle optional arguments
7244         OptionalIdx[Op.getImmTy()] = I;
7245       } else {
7246         llvm_unreachable("Invalid operand type");
7247       }
7248     }
7249   }
7250 
7251   if (IsDPP8) {
7252     using namespace llvm::AMDGPU::DPP;
7253     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7254   } else {
7255     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7256     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7257     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7258     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7259       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7260     }
7261   }
7262 }
7263 
7264 //===----------------------------------------------------------------------===//
7265 // sdwa
7266 //===----------------------------------------------------------------------===//
7267 
7268 OperandMatchResultTy
7269 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7270                               AMDGPUOperand::ImmTy Type) {
7271   using namespace llvm::AMDGPU::SDWA;
7272 
7273   SMLoc S = Parser.getTok().getLoc();
7274   StringRef Value;
7275   OperandMatchResultTy res;
7276 
7277   res = parseStringWithPrefix(Prefix, Value);
7278   if (res != MatchOperand_Success) {
7279     return res;
7280   }
7281 
7282   int64_t Int;
7283   Int = StringSwitch<int64_t>(Value)
7284         .Case("BYTE_0", SdwaSel::BYTE_0)
7285         .Case("BYTE_1", SdwaSel::BYTE_1)
7286         .Case("BYTE_2", SdwaSel::BYTE_2)
7287         .Case("BYTE_3", SdwaSel::BYTE_3)
7288         .Case("WORD_0", SdwaSel::WORD_0)
7289         .Case("WORD_1", SdwaSel::WORD_1)
7290         .Case("DWORD", SdwaSel::DWORD)
7291         .Default(0xffffffff);
7292   Parser.Lex(); // eat last token
7293 
7294   if (Int == 0xffffffff) {
7295     return MatchOperand_ParseFail;
7296   }
7297 
7298   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
7299   return MatchOperand_Success;
7300 }
7301 
7302 OperandMatchResultTy
7303 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
7304   using namespace llvm::AMDGPU::SDWA;
7305 
7306   SMLoc S = Parser.getTok().getLoc();
7307   StringRef Value;
7308   OperandMatchResultTy res;
7309 
7310   res = parseStringWithPrefix("dst_unused", Value);
7311   if (res != MatchOperand_Success) {
7312     return res;
7313   }
7314 
7315   int64_t Int;
7316   Int = StringSwitch<int64_t>(Value)
7317         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
7318         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
7319         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
7320         .Default(0xffffffff);
7321   Parser.Lex(); // eat last token
7322 
7323   if (Int == 0xffffffff) {
7324     return MatchOperand_ParseFail;
7325   }
7326 
7327   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
7328   return MatchOperand_Success;
7329 }
7330 
7331 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
7332   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
7333 }
7334 
7335 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
7336   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
7337 }
7338 
7339 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
7340   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
7341 }
7342 
7343 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
7344   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
7345 }
7346 
7347 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
7348   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
7349 }
7350 
7351 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
7352                               uint64_t BasicInstType,
7353                               bool SkipDstVcc,
7354                               bool SkipSrcVcc) {
7355   using namespace llvm::AMDGPU::SDWA;
7356 
7357   OptionalImmIndexMap OptionalIdx;
7358   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
7359   bool SkippedVcc = false;
7360 
7361   unsigned I = 1;
7362   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7363   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7364     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7365   }
7366 
7367   for (unsigned E = Operands.size(); I != E; ++I) {
7368     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7369     if (SkipVcc && !SkippedVcc && Op.isReg() &&
7370         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7371       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7372       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7373       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7374       // Skip VCC only if we didn't skip it on previous iteration.
7375       // Note that src0 and src1 occupy 2 slots each because of modifiers.
7376       if (BasicInstType == SIInstrFlags::VOP2 &&
7377           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7378            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7379         SkippedVcc = true;
7380         continue;
7381       } else if (BasicInstType == SIInstrFlags::VOPC &&
7382                  Inst.getNumOperands() == 0) {
7383         SkippedVcc = true;
7384         continue;
7385       }
7386     }
7387     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7388       Op.addRegOrImmWithInputModsOperands(Inst, 2);
7389     } else if (Op.isImm()) {
7390       // Handle optional arguments
7391       OptionalIdx[Op.getImmTy()] = I;
7392     } else {
7393       llvm_unreachable("Invalid operand type");
7394     }
7395     SkippedVcc = false;
7396   }
7397 
7398   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7399       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7400       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7401     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7402     switch (BasicInstType) {
7403     case SIInstrFlags::VOP1:
7404       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7405       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7406         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7407       }
7408       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7409       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7410       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7411       break;
7412 
7413     case SIInstrFlags::VOP2:
7414       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7415       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7416         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7417       }
7418       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7419       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7420       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7421       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7422       break;
7423 
7424     case SIInstrFlags::VOPC:
7425       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7426         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7427       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7428       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7429       break;
7430 
7431     default:
7432       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7433     }
7434   }
7435 
7436   // special case v_mac_{f16, f32}:
7437   // it has src2 register operand that is tied to dst operand
7438   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7439       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7440     auto it = Inst.begin();
7441     std::advance(
7442       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7443     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7444   }
7445 }
7446 
7447 //===----------------------------------------------------------------------===//
7448 // mAI
7449 //===----------------------------------------------------------------------===//
7450 
7451 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7452   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7453 }
7454 
7455 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7456   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7457 }
7458 
7459 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7460   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7461 }
7462 
7463 /// Force static initialization.
7464 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7465   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7466   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7467 }
7468 
7469 #define GET_REGISTER_MATCHER
7470 #define GET_MATCHER_IMPLEMENTATION
7471 #define GET_MNEMONIC_SPELL_CHECKER
7472 #include "AMDGPUGenAsmMatcher.inc"
7473 
7474 // This fuction should be defined after auto-generated include so that we have
7475 // MatchClassKind enum defined
7476 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7477                                                      unsigned Kind) {
7478   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7479   // But MatchInstructionImpl() expects to meet token and fails to validate
7480   // operand. This method checks if we are given immediate operand but expect to
7481   // get corresponding token.
7482   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7483   switch (Kind) {
7484   case MCK_addr64:
7485     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7486   case MCK_gds:
7487     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7488   case MCK_lds:
7489     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7490   case MCK_glc:
7491     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7492   case MCK_idxen:
7493     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7494   case MCK_offen:
7495     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7496   case MCK_SSrcB32:
7497     // When operands have expression values, they will return true for isToken,
7498     // because it is not possible to distinguish between a token and an
7499     // expression at parse time. MatchInstructionImpl() will always try to
7500     // match an operand as a token, when isToken returns true, and when the
7501     // name of the expression is not a valid token, the match will fail,
7502     // so we need to handle it here.
7503     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7504   case MCK_SSrcF32:
7505     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7506   case MCK_SoppBrTarget:
7507     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7508   case MCK_VReg32OrOff:
7509     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7510   case MCK_InterpSlot:
7511     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7512   case MCK_Attr:
7513     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7514   case MCK_AttrChan:
7515     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7516   case MCK_ImmSMEMOffset:
7517     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
7518   case MCK_SReg_64:
7519   case MCK_SReg_64_XEXEC:
7520     // Null is defined as a 32-bit register but
7521     // it should also be enabled with 64-bit operands.
7522     // The following code enables it for SReg_64 operands
7523     // used as source and destination. Remaining source
7524     // operands are handled in isInlinableImm.
7525     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7526   default:
7527     return Match_InvalidOperand;
7528   }
7529 }
7530 
7531 //===----------------------------------------------------------------------===//
7532 // endpgm
7533 //===----------------------------------------------------------------------===//
7534 
7535 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7536   SMLoc S = Parser.getTok().getLoc();
7537   int64_t Imm = 0;
7538 
7539   if (!parseExpr(Imm)) {
7540     // The operand is optional, if not present default to 0
7541     Imm = 0;
7542   }
7543 
7544   if (!isUInt<16>(Imm)) {
7545     Error(S, "expected a 16-bit value");
7546     return MatchOperand_ParseFail;
7547   }
7548 
7549   Operands.push_back(
7550       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7551   return MatchOperand_Success;
7552 }
7553 
7554 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7555