1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyA16,
167     ImmTyLWE,
168     ImmTyExpTgt,
169     ImmTyExpCompr,
170     ImmTyExpVM,
171     ImmTyFORMAT,
172     ImmTyHwreg,
173     ImmTyOff,
174     ImmTySendMsg,
175     ImmTyInterpSlot,
176     ImmTyInterpAttr,
177     ImmTyAttrChan,
178     ImmTyOpSel,
179     ImmTyOpSelHi,
180     ImmTyNegLo,
181     ImmTyNegHi,
182     ImmTySwizzle,
183     ImmTyGprIdxMode,
184     ImmTyHigh,
185     ImmTyBLGP,
186     ImmTyCBSZ,
187     ImmTyABID,
188     ImmTyEndpgm,
189   };
190 
191   enum ImmKindTy {
192     ImmKindTyNone,
193     ImmKindTyLiteral,
194     ImmKindTyConst,
195   };
196 
197 private:
198   struct TokOp {
199     const char *Data;
200     unsigned Length;
201   };
202 
203   struct ImmOp {
204     int64_t Val;
205     ImmTy Type;
206     bool IsFPImm;
207     mutable ImmKindTy Kind;
208     Modifiers Mods;
209   };
210 
211   struct RegOp {
212     unsigned RegNo;
213     Modifiers Mods;
214   };
215 
216   union {
217     TokOp Tok;
218     ImmOp Imm;
219     RegOp Reg;
220     const MCExpr *Expr;
221   };
222 
223 public:
224   bool isToken() const override {
225     if (Kind == Token)
226       return true;
227 
228     // When parsing operands, we can't always tell if something was meant to be
229     // a token, like 'gds', or an expression that references a global variable.
230     // In this case, we assume the string is an expression, and if we need to
231     // interpret is a token, then we treat the symbol name as the token.
232     return isSymbolRefExpr();
233   }
234 
235   bool isSymbolRefExpr() const {
236     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
237   }
238 
239   bool isImm() const override {
240     return Kind == Immediate;
241   }
242 
243   void setImmKindNone() const {
244     assert(isImm());
245     Imm.Kind = ImmKindTyNone;
246   }
247 
248   void setImmKindLiteral() const {
249     assert(isImm());
250     Imm.Kind = ImmKindTyLiteral;
251   }
252 
253   void setImmKindConst() const {
254     assert(isImm());
255     Imm.Kind = ImmKindTyConst;
256   }
257 
258   bool IsImmKindLiteral() const {
259     return isImm() && Imm.Kind == ImmKindTyLiteral;
260   }
261 
262   bool isImmKindConst() const {
263     return isImm() && Imm.Kind == ImmKindTyConst;
264   }
265 
266   bool isInlinableImm(MVT type) const;
267   bool isLiteralImm(MVT type) const;
268 
269   bool isRegKind() const {
270     return Kind == Register;
271   }
272 
273   bool isReg() const override {
274     return isRegKind() && !hasModifiers();
275   }
276 
277   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
278     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
279   }
280 
281   bool isRegOrImmWithInt16InputMods() const {
282     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
283   }
284 
285   bool isRegOrImmWithInt32InputMods() const {
286     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
287   }
288 
289   bool isRegOrImmWithInt64InputMods() const {
290     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
291   }
292 
293   bool isRegOrImmWithFP16InputMods() const {
294     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
295   }
296 
297   bool isRegOrImmWithFP32InputMods() const {
298     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
299   }
300 
301   bool isRegOrImmWithFP64InputMods() const {
302     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
303   }
304 
305   bool isVReg() const {
306     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
307            isRegClass(AMDGPU::VReg_64RegClassID) ||
308            isRegClass(AMDGPU::VReg_96RegClassID) ||
309            isRegClass(AMDGPU::VReg_128RegClassID) ||
310            isRegClass(AMDGPU::VReg_160RegClassID) ||
311            isRegClass(AMDGPU::VReg_192RegClassID) ||
312            isRegClass(AMDGPU::VReg_256RegClassID) ||
313            isRegClass(AMDGPU::VReg_512RegClassID) ||
314            isRegClass(AMDGPU::VReg_1024RegClassID);
315   }
316 
317   bool isVReg32() const {
318     return isRegClass(AMDGPU::VGPR_32RegClassID);
319   }
320 
321   bool isVReg32OrOff() const {
322     return isOff() || isVReg32();
323   }
324 
325   bool isNull() const {
326     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
327   }
328 
329   bool isSDWAOperand(MVT type) const;
330   bool isSDWAFP16Operand() const;
331   bool isSDWAFP32Operand() const;
332   bool isSDWAInt16Operand() const;
333   bool isSDWAInt32Operand() const;
334 
335   bool isImmTy(ImmTy ImmT) const {
336     return isImm() && Imm.Type == ImmT;
337   }
338 
339   bool isImmModifier() const {
340     return isImm() && Imm.Type != ImmTyNone;
341   }
342 
343   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
344   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
345   bool isDMask() const { return isImmTy(ImmTyDMask); }
346   bool isDim() const { return isImmTy(ImmTyDim); }
347   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
348   bool isDA() const { return isImmTy(ImmTyDA); }
349   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
350   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
351   bool isLWE() const { return isImmTy(ImmTyLWE); }
352   bool isOff() const { return isImmTy(ImmTyOff); }
353   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
354   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
355   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
356   bool isOffen() const { return isImmTy(ImmTyOffen); }
357   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
358   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
359   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
360   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
361   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
362 
363   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
364   bool isGDS() const { return isImmTy(ImmTyGDS); }
365   bool isLDS() const { return isImmTy(ImmTyLDS); }
366   bool isDLC() const { return isImmTy(ImmTyDLC); }
367   bool isGLC() const { return isImmTy(ImmTyGLC); }
368   // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced
369   // value of the GLC operand.
370   bool isGLC_1() const { return isImmTy(ImmTyGLC); }
371   bool isSLC() const { return isImmTy(ImmTySLC); }
372   bool isSWZ() const { return isImmTy(ImmTySWZ); }
373   bool isTFE() const { return isImmTy(ImmTyTFE); }
374   bool isD16() const { return isImmTy(ImmTyD16); }
375   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
376   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
377   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
378   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
379   bool isFI() const { return isImmTy(ImmTyDppFi); }
380   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
381   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
382   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
383   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
384   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
385   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
386   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
387   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
388   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
389   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
390   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
391   bool isHigh() const { return isImmTy(ImmTyHigh); }
392 
393   bool isMod() const {
394     return isClampSI() || isOModSI();
395   }
396 
397   bool isRegOrImm() const {
398     return isReg() || isImm();
399   }
400 
401   bool isRegClass(unsigned RCID) const;
402 
403   bool isInlineValue() const;
404 
405   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
406     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
407   }
408 
409   bool isSCSrcB16() const {
410     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
411   }
412 
413   bool isSCSrcV2B16() const {
414     return isSCSrcB16();
415   }
416 
417   bool isSCSrcB32() const {
418     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
419   }
420 
421   bool isSCSrcB64() const {
422     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
423   }
424 
425   bool isBoolReg() const;
426 
427   bool isSCSrcF16() const {
428     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
429   }
430 
431   bool isSCSrcV2F16() const {
432     return isSCSrcF16();
433   }
434 
435   bool isSCSrcF32() const {
436     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
437   }
438 
439   bool isSCSrcF64() const {
440     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
441   }
442 
443   bool isSSrcB32() const {
444     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
445   }
446 
447   bool isSSrcB16() const {
448     return isSCSrcB16() || isLiteralImm(MVT::i16);
449   }
450 
451   bool isSSrcV2B16() const {
452     llvm_unreachable("cannot happen");
453     return isSSrcB16();
454   }
455 
456   bool isSSrcB64() const {
457     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
458     // See isVSrc64().
459     return isSCSrcB64() || isLiteralImm(MVT::i64);
460   }
461 
462   bool isSSrcF32() const {
463     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
464   }
465 
466   bool isSSrcF64() const {
467     return isSCSrcB64() || isLiteralImm(MVT::f64);
468   }
469 
470   bool isSSrcF16() const {
471     return isSCSrcB16() || isLiteralImm(MVT::f16);
472   }
473 
474   bool isSSrcV2F16() const {
475     llvm_unreachable("cannot happen");
476     return isSSrcF16();
477   }
478 
479   bool isSSrcOrLdsB32() const {
480     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
481            isLiteralImm(MVT::i32) || isExpr();
482   }
483 
484   bool isVCSrcB32() const {
485     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
486   }
487 
488   bool isVCSrcB64() const {
489     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
490   }
491 
492   bool isVCSrcB16() const {
493     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
494   }
495 
496   bool isVCSrcV2B16() const {
497     return isVCSrcB16();
498   }
499 
500   bool isVCSrcF32() const {
501     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
502   }
503 
504   bool isVCSrcF64() const {
505     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
506   }
507 
508   bool isVCSrcF16() const {
509     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
510   }
511 
512   bool isVCSrcV2F16() const {
513     return isVCSrcF16();
514   }
515 
516   bool isVSrcB32() const {
517     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
518   }
519 
520   bool isVSrcB64() const {
521     return isVCSrcF64() || isLiteralImm(MVT::i64);
522   }
523 
524   bool isVSrcB16() const {
525     return isVCSrcB16() || isLiteralImm(MVT::i16);
526   }
527 
528   bool isVSrcV2B16() const {
529     return isVSrcB16() || isLiteralImm(MVT::v2i16);
530   }
531 
532   bool isVSrcF32() const {
533     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
534   }
535 
536   bool isVSrcF64() const {
537     return isVCSrcF64() || isLiteralImm(MVT::f64);
538   }
539 
540   bool isVSrcF16() const {
541     return isVCSrcF16() || isLiteralImm(MVT::f16);
542   }
543 
544   bool isVSrcV2F16() const {
545     return isVSrcF16() || isLiteralImm(MVT::v2f16);
546   }
547 
548   bool isVISrcB32() const {
549     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
550   }
551 
552   bool isVISrcB16() const {
553     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
554   }
555 
556   bool isVISrcV2B16() const {
557     return isVISrcB16();
558   }
559 
560   bool isVISrcF32() const {
561     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
562   }
563 
564   bool isVISrcF16() const {
565     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
566   }
567 
568   bool isVISrcV2F16() const {
569     return isVISrcF16() || isVISrcB32();
570   }
571 
572   bool isAISrcB32() const {
573     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
574   }
575 
576   bool isAISrcB16() const {
577     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
578   }
579 
580   bool isAISrcV2B16() const {
581     return isAISrcB16();
582   }
583 
584   bool isAISrcF32() const {
585     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
586   }
587 
588   bool isAISrcF16() const {
589     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
590   }
591 
592   bool isAISrcV2F16() const {
593     return isAISrcF16() || isAISrcB32();
594   }
595 
596   bool isAISrc_128B32() const {
597     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
598   }
599 
600   bool isAISrc_128B16() const {
601     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
602   }
603 
604   bool isAISrc_128V2B16() const {
605     return isAISrc_128B16();
606   }
607 
608   bool isAISrc_128F32() const {
609     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
610   }
611 
612   bool isAISrc_128F16() const {
613     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
614   }
615 
616   bool isAISrc_128V2F16() const {
617     return isAISrc_128F16() || isAISrc_128B32();
618   }
619 
620   bool isAISrc_512B32() const {
621     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
622   }
623 
624   bool isAISrc_512B16() const {
625     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
626   }
627 
628   bool isAISrc_512V2B16() const {
629     return isAISrc_512B16();
630   }
631 
632   bool isAISrc_512F32() const {
633     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
634   }
635 
636   bool isAISrc_512F16() const {
637     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
638   }
639 
640   bool isAISrc_512V2F16() const {
641     return isAISrc_512F16() || isAISrc_512B32();
642   }
643 
644   bool isAISrc_1024B32() const {
645     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
646   }
647 
648   bool isAISrc_1024B16() const {
649     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
650   }
651 
652   bool isAISrc_1024V2B16() const {
653     return isAISrc_1024B16();
654   }
655 
656   bool isAISrc_1024F32() const {
657     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
658   }
659 
660   bool isAISrc_1024F16() const {
661     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
662   }
663 
664   bool isAISrc_1024V2F16() const {
665     return isAISrc_1024F16() || isAISrc_1024B32();
666   }
667 
668   bool isKImmFP32() const {
669     return isLiteralImm(MVT::f32);
670   }
671 
672   bool isKImmFP16() const {
673     return isLiteralImm(MVT::f16);
674   }
675 
676   bool isMem() const override {
677     return false;
678   }
679 
680   bool isExpr() const {
681     return Kind == Expression;
682   }
683 
684   bool isSoppBrTarget() const {
685     return isExpr() || isImm();
686   }
687 
688   bool isSWaitCnt() const;
689   bool isHwreg() const;
690   bool isSendMsg() const;
691   bool isSwizzle() const;
692   bool isSMRDOffset8() const;
693   bool isSMEMOffset() const;
694   bool isSMRDLiteralOffset() const;
695   bool isDPP8() const;
696   bool isDPPCtrl() const;
697   bool isBLGP() const;
698   bool isCBSZ() const;
699   bool isABID() const;
700   bool isGPRIdxMode() const;
701   bool isS16Imm() const;
702   bool isU16Imm() const;
703   bool isEndpgm() const;
704 
705   StringRef getExpressionAsToken() const {
706     assert(isExpr());
707     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
708     return S->getSymbol().getName();
709   }
710 
711   StringRef getToken() const {
712     assert(isToken());
713 
714     if (Kind == Expression)
715       return getExpressionAsToken();
716 
717     return StringRef(Tok.Data, Tok.Length);
718   }
719 
720   int64_t getImm() const {
721     assert(isImm());
722     return Imm.Val;
723   }
724 
725   void setImm(int64_t Val) {
726     assert(isImm());
727     Imm.Val = Val;
728   }
729 
730   ImmTy getImmTy() const {
731     assert(isImm());
732     return Imm.Type;
733   }
734 
735   unsigned getReg() const override {
736     assert(isRegKind());
737     return Reg.RegNo;
738   }
739 
740   SMLoc getStartLoc() const override {
741     return StartLoc;
742   }
743 
744   SMLoc getEndLoc() const override {
745     return EndLoc;
746   }
747 
748   SMRange getLocRange() const {
749     return SMRange(StartLoc, EndLoc);
750   }
751 
752   Modifiers getModifiers() const {
753     assert(isRegKind() || isImmTy(ImmTyNone));
754     return isRegKind() ? Reg.Mods : Imm.Mods;
755   }
756 
757   void setModifiers(Modifiers Mods) {
758     assert(isRegKind() || isImmTy(ImmTyNone));
759     if (isRegKind())
760       Reg.Mods = Mods;
761     else
762       Imm.Mods = Mods;
763   }
764 
765   bool hasModifiers() const {
766     return getModifiers().hasModifiers();
767   }
768 
769   bool hasFPModifiers() const {
770     return getModifiers().hasFPModifiers();
771   }
772 
773   bool hasIntModifiers() const {
774     return getModifiers().hasIntModifiers();
775   }
776 
777   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
778 
779   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
780 
781   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
782 
783   template <unsigned Bitwidth>
784   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
785 
786   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
787     addKImmFPOperands<16>(Inst, N);
788   }
789 
790   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
791     addKImmFPOperands<32>(Inst, N);
792   }
793 
794   void addRegOperands(MCInst &Inst, unsigned N) const;
795 
796   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
797     addRegOperands(Inst, N);
798   }
799 
800   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
801     if (isRegKind())
802       addRegOperands(Inst, N);
803     else if (isExpr())
804       Inst.addOperand(MCOperand::createExpr(Expr));
805     else
806       addImmOperands(Inst, N);
807   }
808 
809   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
810     Modifiers Mods = getModifiers();
811     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
812     if (isRegKind()) {
813       addRegOperands(Inst, N);
814     } else {
815       addImmOperands(Inst, N, false);
816     }
817   }
818 
819   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
820     assert(!hasIntModifiers());
821     addRegOrImmWithInputModsOperands(Inst, N);
822   }
823 
824   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
825     assert(!hasFPModifiers());
826     addRegOrImmWithInputModsOperands(Inst, N);
827   }
828 
829   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
830     Modifiers Mods = getModifiers();
831     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
832     assert(isRegKind());
833     addRegOperands(Inst, N);
834   }
835 
836   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
837     assert(!hasIntModifiers());
838     addRegWithInputModsOperands(Inst, N);
839   }
840 
841   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
842     assert(!hasFPModifiers());
843     addRegWithInputModsOperands(Inst, N);
844   }
845 
846   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
847     if (isImm())
848       addImmOperands(Inst, N);
849     else {
850       assert(isExpr());
851       Inst.addOperand(MCOperand::createExpr(Expr));
852     }
853   }
854 
855   static void printImmTy(raw_ostream& OS, ImmTy Type) {
856     switch (Type) {
857     case ImmTyNone: OS << "None"; break;
858     case ImmTyGDS: OS << "GDS"; break;
859     case ImmTyLDS: OS << "LDS"; break;
860     case ImmTyOffen: OS << "Offen"; break;
861     case ImmTyIdxen: OS << "Idxen"; break;
862     case ImmTyAddr64: OS << "Addr64"; break;
863     case ImmTyOffset: OS << "Offset"; break;
864     case ImmTyInstOffset: OS << "InstOffset"; break;
865     case ImmTyOffset0: OS << "Offset0"; break;
866     case ImmTyOffset1: OS << "Offset1"; break;
867     case ImmTyDLC: OS << "DLC"; break;
868     case ImmTyGLC: OS << "GLC"; break;
869     case ImmTySLC: OS << "SLC"; break;
870     case ImmTySWZ: OS << "SWZ"; break;
871     case ImmTyTFE: OS << "TFE"; break;
872     case ImmTyD16: OS << "D16"; break;
873     case ImmTyFORMAT: OS << "FORMAT"; break;
874     case ImmTyClampSI: OS << "ClampSI"; break;
875     case ImmTyOModSI: OS << "OModSI"; break;
876     case ImmTyDPP8: OS << "DPP8"; break;
877     case ImmTyDppCtrl: OS << "DppCtrl"; break;
878     case ImmTyDppRowMask: OS << "DppRowMask"; break;
879     case ImmTyDppBankMask: OS << "DppBankMask"; break;
880     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
881     case ImmTyDppFi: OS << "FI"; break;
882     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
883     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
884     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
885     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
886     case ImmTyDMask: OS << "DMask"; break;
887     case ImmTyDim: OS << "Dim"; break;
888     case ImmTyUNorm: OS << "UNorm"; break;
889     case ImmTyDA: OS << "DA"; break;
890     case ImmTyR128A16: OS << "R128A16"; break;
891     case ImmTyA16: OS << "A16"; break;
892     case ImmTyLWE: OS << "LWE"; break;
893     case ImmTyOff: OS << "Off"; break;
894     case ImmTyExpTgt: OS << "ExpTgt"; break;
895     case ImmTyExpCompr: OS << "ExpCompr"; break;
896     case ImmTyExpVM: OS << "ExpVM"; break;
897     case ImmTyHwreg: OS << "Hwreg"; break;
898     case ImmTySendMsg: OS << "SendMsg"; break;
899     case ImmTyInterpSlot: OS << "InterpSlot"; break;
900     case ImmTyInterpAttr: OS << "InterpAttr"; break;
901     case ImmTyAttrChan: OS << "AttrChan"; break;
902     case ImmTyOpSel: OS << "OpSel"; break;
903     case ImmTyOpSelHi: OS << "OpSelHi"; break;
904     case ImmTyNegLo: OS << "NegLo"; break;
905     case ImmTyNegHi: OS << "NegHi"; break;
906     case ImmTySwizzle: OS << "Swizzle"; break;
907     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
908     case ImmTyHigh: OS << "High"; break;
909     case ImmTyBLGP: OS << "BLGP"; break;
910     case ImmTyCBSZ: OS << "CBSZ"; break;
911     case ImmTyABID: OS << "ABID"; break;
912     case ImmTyEndpgm: OS << "Endpgm"; break;
913     }
914   }
915 
916   void print(raw_ostream &OS) const override {
917     switch (Kind) {
918     case Register:
919       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
920       break;
921     case Immediate:
922       OS << '<' << getImm();
923       if (getImmTy() != ImmTyNone) {
924         OS << " type: "; printImmTy(OS, getImmTy());
925       }
926       OS << " mods: " << Imm.Mods << '>';
927       break;
928     case Token:
929       OS << '\'' << getToken() << '\'';
930       break;
931     case Expression:
932       OS << "<expr " << *Expr << '>';
933       break;
934     }
935   }
936 
937   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
938                                       int64_t Val, SMLoc Loc,
939                                       ImmTy Type = ImmTyNone,
940                                       bool IsFPImm = false) {
941     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
942     Op->Imm.Val = Val;
943     Op->Imm.IsFPImm = IsFPImm;
944     Op->Imm.Kind = ImmKindTyNone;
945     Op->Imm.Type = Type;
946     Op->Imm.Mods = Modifiers();
947     Op->StartLoc = Loc;
948     Op->EndLoc = Loc;
949     return Op;
950   }
951 
952   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
953                                         StringRef Str, SMLoc Loc,
954                                         bool HasExplicitEncodingSize = true) {
955     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
956     Res->Tok.Data = Str.data();
957     Res->Tok.Length = Str.size();
958     Res->StartLoc = Loc;
959     Res->EndLoc = Loc;
960     return Res;
961   }
962 
963   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
964                                       unsigned RegNo, SMLoc S,
965                                       SMLoc E) {
966     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
967     Op->Reg.RegNo = RegNo;
968     Op->Reg.Mods = Modifiers();
969     Op->StartLoc = S;
970     Op->EndLoc = E;
971     return Op;
972   }
973 
974   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
975                                        const class MCExpr *Expr, SMLoc S) {
976     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
977     Op->Expr = Expr;
978     Op->StartLoc = S;
979     Op->EndLoc = S;
980     return Op;
981   }
982 };
983 
984 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
985   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
986   return OS;
987 }
988 
989 //===----------------------------------------------------------------------===//
990 // AsmParser
991 //===----------------------------------------------------------------------===//
992 
993 // Holds info related to the current kernel, e.g. count of SGPRs used.
994 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
995 // .amdgpu_hsa_kernel or at EOF.
996 class KernelScopeInfo {
997   int SgprIndexUnusedMin = -1;
998   int VgprIndexUnusedMin = -1;
999   MCContext *Ctx = nullptr;
1000 
1001   void usesSgprAt(int i) {
1002     if (i >= SgprIndexUnusedMin) {
1003       SgprIndexUnusedMin = ++i;
1004       if (Ctx) {
1005         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1006         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1007       }
1008     }
1009   }
1010 
1011   void usesVgprAt(int i) {
1012     if (i >= VgprIndexUnusedMin) {
1013       VgprIndexUnusedMin = ++i;
1014       if (Ctx) {
1015         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1016         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1017       }
1018     }
1019   }
1020 
1021 public:
1022   KernelScopeInfo() = default;
1023 
1024   void initialize(MCContext &Context) {
1025     Ctx = &Context;
1026     usesSgprAt(SgprIndexUnusedMin = -1);
1027     usesVgprAt(VgprIndexUnusedMin = -1);
1028   }
1029 
1030   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1031     switch (RegKind) {
1032       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1033       case IS_AGPR: // fall through
1034       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1035       default: break;
1036     }
1037   }
1038 };
1039 
1040 class AMDGPUAsmParser : public MCTargetAsmParser {
1041   MCAsmParser &Parser;
1042 
1043   // Number of extra operands parsed after the first optional operand.
1044   // This may be necessary to skip hardcoded mandatory operands.
1045   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1046 
1047   unsigned ForcedEncodingSize = 0;
1048   bool ForcedDPP = false;
1049   bool ForcedSDWA = false;
1050   KernelScopeInfo KernelScope;
1051 
1052   /// @name Auto-generated Match Functions
1053   /// {
1054 
1055 #define GET_ASSEMBLER_HEADER
1056 #include "AMDGPUGenAsmMatcher.inc"
1057 
1058   /// }
1059 
1060 private:
1061   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1062   bool OutOfRangeError(SMRange Range);
1063   /// Calculate VGPR/SGPR blocks required for given target, reserved
1064   /// registers, and user-specified NextFreeXGPR values.
1065   ///
1066   /// \param Features [in] Target features, used for bug corrections.
1067   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1068   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1069   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1070   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1071   /// descriptor field, if valid.
1072   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1073   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1074   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1075   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1076   /// \param VGPRBlocks [out] Result VGPR block count.
1077   /// \param SGPRBlocks [out] Result SGPR block count.
1078   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1079                           bool FlatScrUsed, bool XNACKUsed,
1080                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1081                           SMRange VGPRRange, unsigned NextFreeSGPR,
1082                           SMRange SGPRRange, unsigned &VGPRBlocks,
1083                           unsigned &SGPRBlocks);
1084   bool ParseDirectiveAMDGCNTarget();
1085   bool ParseDirectiveAMDHSAKernel();
1086   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1087   bool ParseDirectiveHSACodeObjectVersion();
1088   bool ParseDirectiveHSACodeObjectISA();
1089   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1090   bool ParseDirectiveAMDKernelCodeT();
1091   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1092   bool ParseDirectiveAMDGPUHsaKernel();
1093 
1094   bool ParseDirectiveISAVersion();
1095   bool ParseDirectiveHSAMetadata();
1096   bool ParseDirectivePALMetadataBegin();
1097   bool ParseDirectivePALMetadata();
1098   bool ParseDirectiveAMDGPULDS();
1099 
1100   /// Common code to parse out a block of text (typically YAML) between start and
1101   /// end directives.
1102   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1103                            const char *AssemblerDirectiveEnd,
1104                            std::string &CollectString);
1105 
1106   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1107                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1108   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1109                            unsigned &RegNum, unsigned &RegWidth,
1110                            bool RestoreOnFailure = false);
1111   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1112                            unsigned &RegNum, unsigned &RegWidth,
1113                            SmallVectorImpl<AsmToken> &Tokens);
1114   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1115                            unsigned &RegWidth,
1116                            SmallVectorImpl<AsmToken> &Tokens);
1117   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1118                            unsigned &RegWidth,
1119                            SmallVectorImpl<AsmToken> &Tokens);
1120   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1121                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1122   bool ParseRegRange(unsigned& Num, unsigned& Width);
1123   unsigned getRegularReg(RegisterKind RegKind,
1124                          unsigned RegNum,
1125                          unsigned RegWidth,
1126                          SMLoc Loc);
1127 
1128   bool isRegister();
1129   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1130   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1131   void initializeGprCountSymbol(RegisterKind RegKind);
1132   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1133                              unsigned RegWidth);
1134   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1135                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1136   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1137                  bool IsGdsHardcoded);
1138 
1139 public:
1140   enum AMDGPUMatchResultTy {
1141     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1142   };
1143   enum OperandMode {
1144     OperandMode_Default,
1145     OperandMode_NSA,
1146   };
1147 
1148   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1149 
1150   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1151                const MCInstrInfo &MII,
1152                const MCTargetOptions &Options)
1153       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1154     MCAsmParserExtension::Initialize(Parser);
1155 
1156     if (getFeatureBits().none()) {
1157       // Set default features.
1158       copySTI().ToggleFeature("southern-islands");
1159     }
1160 
1161     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1162 
1163     {
1164       // TODO: make those pre-defined variables read-only.
1165       // Currently there is none suitable machinery in the core llvm-mc for this.
1166       // MCSymbol::isRedefinable is intended for another purpose, and
1167       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1168       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1169       MCContext &Ctx = getContext();
1170       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1171         MCSymbol *Sym =
1172             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1173         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1174         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1175         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1176         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1177         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1178       } else {
1179         MCSymbol *Sym =
1180             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1181         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1182         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1183         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1184         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1185         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1186       }
1187       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1188         initializeGprCountSymbol(IS_VGPR);
1189         initializeGprCountSymbol(IS_SGPR);
1190       } else
1191         KernelScope.initialize(getContext());
1192     }
1193   }
1194 
1195   bool hasXNACK() const {
1196     return AMDGPU::hasXNACK(getSTI());
1197   }
1198 
1199   bool hasMIMG_R128() const {
1200     return AMDGPU::hasMIMG_R128(getSTI());
1201   }
1202 
1203   bool hasPackedD16() const {
1204     return AMDGPU::hasPackedD16(getSTI());
1205   }
1206 
1207   bool hasGFX10A16() const {
1208     return AMDGPU::hasGFX10A16(getSTI());
1209   }
1210 
1211   bool isSI() const {
1212     return AMDGPU::isSI(getSTI());
1213   }
1214 
1215   bool isCI() const {
1216     return AMDGPU::isCI(getSTI());
1217   }
1218 
1219   bool isVI() const {
1220     return AMDGPU::isVI(getSTI());
1221   }
1222 
1223   bool isGFX9() const {
1224     return AMDGPU::isGFX9(getSTI());
1225   }
1226 
1227   bool isGFX9Plus() const {
1228     return AMDGPU::isGFX9Plus(getSTI());
1229   }
1230 
1231   bool isGFX10() const {
1232     return AMDGPU::isGFX10(getSTI());
1233   }
1234 
1235   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1236 
1237   bool isGFX10_BEncoding() const {
1238     return AMDGPU::isGFX10_BEncoding(getSTI());
1239   }
1240 
1241   bool hasInv2PiInlineImm() const {
1242     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1243   }
1244 
1245   bool hasFlatOffsets() const {
1246     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1247   }
1248 
1249   bool hasSGPR102_SGPR103() const {
1250     return !isVI() && !isGFX9();
1251   }
1252 
1253   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1254 
1255   bool hasIntClamp() const {
1256     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1257   }
1258 
1259   AMDGPUTargetStreamer &getTargetStreamer() {
1260     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1261     return static_cast<AMDGPUTargetStreamer &>(TS);
1262   }
1263 
1264   const MCRegisterInfo *getMRI() const {
1265     // We need this const_cast because for some reason getContext() is not const
1266     // in MCAsmParser.
1267     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1268   }
1269 
1270   const MCInstrInfo *getMII() const {
1271     return &MII;
1272   }
1273 
1274   const FeatureBitset &getFeatureBits() const {
1275     return getSTI().getFeatureBits();
1276   }
1277 
1278   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1279   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1280   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1281 
1282   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1283   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1284   bool isForcedDPP() const { return ForcedDPP; }
1285   bool isForcedSDWA() const { return ForcedSDWA; }
1286   ArrayRef<unsigned> getMatchedVariants() const;
1287   StringRef getMatchedVariantName() const;
1288 
1289   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1290   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1291                      bool RestoreOnFailure);
1292   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1293   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1294                                         SMLoc &EndLoc) override;
1295   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1296   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1297                                       unsigned Kind) override;
1298   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1299                                OperandVector &Operands, MCStreamer &Out,
1300                                uint64_t &ErrorInfo,
1301                                bool MatchingInlineAsm) override;
1302   bool ParseDirective(AsmToken DirectiveID) override;
1303   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1304                                     OperandMode Mode = OperandMode_Default);
1305   StringRef parseMnemonicSuffix(StringRef Name);
1306   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1307                         SMLoc NameLoc, OperandVector &Operands) override;
1308   //bool ProcessInstruction(MCInst &Inst);
1309 
1310   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1311 
1312   OperandMatchResultTy
1313   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1314                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1315                      bool (*ConvertResult)(int64_t &) = nullptr);
1316 
1317   OperandMatchResultTy
1318   parseOperandArrayWithPrefix(const char *Prefix,
1319                               OperandVector &Operands,
1320                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1321                               bool (*ConvertResult)(int64_t&) = nullptr);
1322 
1323   OperandMatchResultTy
1324   parseNamedBit(const char *Name, OperandVector &Operands,
1325                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1326   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1327                                              StringRef &Value);
1328 
1329   bool isModifier();
1330   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1331   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1332   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1333   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1334   bool parseSP3NegModifier();
1335   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1336   OperandMatchResultTy parseReg(OperandVector &Operands);
1337   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1338   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1339   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1340   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1341   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1342   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1343   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1344   OperandMatchResultTy parseUfmt(int64_t &Format);
1345   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1346   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1347   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1348   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1349   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1350   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1351   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1352 
1353   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1354   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1355   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1356   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1357 
1358   bool parseCnt(int64_t &IntVal);
1359   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1360   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1361 
1362 private:
1363   struct OperandInfoTy {
1364     SMLoc Loc;
1365     int64_t Id;
1366     bool IsSymbolic = false;
1367     bool IsDefined = false;
1368 
1369     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1370   };
1371 
1372   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1373   bool validateSendMsg(const OperandInfoTy &Msg,
1374                        const OperandInfoTy &Op,
1375                        const OperandInfoTy &Stream);
1376 
1377   bool parseHwregBody(OperandInfoTy &HwReg,
1378                       OperandInfoTy &Offset,
1379                       OperandInfoTy &Width);
1380   bool validateHwreg(const OperandInfoTy &HwReg,
1381                      const OperandInfoTy &Offset,
1382                      const OperandInfoTy &Width);
1383 
1384   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1385   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1386   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1387 
1388   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1389                       const OperandVector &Operands) const;
1390   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1391   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1392   SMLoc getLitLoc(const OperandVector &Operands) const;
1393   SMLoc getConstLoc(const OperandVector &Operands) const;
1394 
1395   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1396   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1397   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1398   bool validateSOPLiteral(const MCInst &Inst) const;
1399   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1400   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1401   bool validateIntClampSupported(const MCInst &Inst);
1402   bool validateMIMGAtomicDMask(const MCInst &Inst);
1403   bool validateMIMGGatherDMask(const MCInst &Inst);
1404   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1405   bool validateMIMGDataSize(const MCInst &Inst);
1406   bool validateMIMGAddrSize(const MCInst &Inst);
1407   bool validateMIMGD16(const MCInst &Inst);
1408   bool validateMIMGDim(const MCInst &Inst);
1409   bool validateLdsDirect(const MCInst &Inst);
1410   bool validateOpSel(const MCInst &Inst);
1411   bool validateVccOperand(unsigned Reg) const;
1412   bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1413   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1414   bool validateDivScale(const MCInst &Inst);
1415   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1416                              const SMLoc &IDLoc);
1417   unsigned getConstantBusLimit(unsigned Opcode) const;
1418   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1419   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1420   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1421 
1422   bool isSupportedMnemo(StringRef Mnemo,
1423                         const FeatureBitset &FBS);
1424   bool isSupportedMnemo(StringRef Mnemo,
1425                         const FeatureBitset &FBS,
1426                         ArrayRef<unsigned> Variants);
1427   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1428 
1429   bool isId(const StringRef Id) const;
1430   bool isId(const AsmToken &Token, const StringRef Id) const;
1431   bool isToken(const AsmToken::TokenKind Kind) const;
1432   bool trySkipId(const StringRef Id);
1433   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1434   bool trySkipToken(const AsmToken::TokenKind Kind);
1435   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1436   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1437   bool parseId(StringRef &Val, const StringRef ErrMsg);
1438 
1439   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1440   AsmToken::TokenKind getTokenKind() const;
1441   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1442   bool parseExpr(OperandVector &Operands);
1443   StringRef getTokenStr() const;
1444   AsmToken peekToken();
1445   AsmToken getToken() const;
1446   SMLoc getLoc() const;
1447   void lex();
1448 
1449 public:
1450   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1451   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1452 
1453   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1454   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1455   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1456   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1457   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1458   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1459 
1460   bool parseSwizzleOperand(int64_t &Op,
1461                            const unsigned MinVal,
1462                            const unsigned MaxVal,
1463                            const StringRef ErrMsg,
1464                            SMLoc &Loc);
1465   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1466                             const unsigned MinVal,
1467                             const unsigned MaxVal,
1468                             const StringRef ErrMsg);
1469   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1470   bool parseSwizzleOffset(int64_t &Imm);
1471   bool parseSwizzleMacro(int64_t &Imm);
1472   bool parseSwizzleQuadPerm(int64_t &Imm);
1473   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1474   bool parseSwizzleBroadcast(int64_t &Imm);
1475   bool parseSwizzleSwap(int64_t &Imm);
1476   bool parseSwizzleReverse(int64_t &Imm);
1477 
1478   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1479   int64_t parseGPRIdxMacro();
1480 
1481   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1482   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1483   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1484   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1485   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1486 
1487   AMDGPUOperand::Ptr defaultDLC() const;
1488   AMDGPUOperand::Ptr defaultGLC() const;
1489   AMDGPUOperand::Ptr defaultGLC_1() const;
1490   AMDGPUOperand::Ptr defaultSLC() const;
1491 
1492   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1493   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1494   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1495   AMDGPUOperand::Ptr defaultFlatOffset() const;
1496 
1497   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1498 
1499   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1500                OptionalImmIndexMap &OptionalIdx);
1501   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1502   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1503   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1504 
1505   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1506 
1507   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1508                bool IsAtomic = false);
1509   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1510   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1511 
1512   OperandMatchResultTy parseDim(OperandVector &Operands);
1513   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1514   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1515   AMDGPUOperand::Ptr defaultRowMask() const;
1516   AMDGPUOperand::Ptr defaultBankMask() const;
1517   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1518   AMDGPUOperand::Ptr defaultFI() const;
1519   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1520   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1521 
1522   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1523                                     AMDGPUOperand::ImmTy Type);
1524   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1525   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1526   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1527   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1528   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1529   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1530   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1531                uint64_t BasicInstType,
1532                bool SkipDstVcc = false,
1533                bool SkipSrcVcc = false);
1534 
1535   AMDGPUOperand::Ptr defaultBLGP() const;
1536   AMDGPUOperand::Ptr defaultCBSZ() const;
1537   AMDGPUOperand::Ptr defaultABID() const;
1538 
1539   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1540   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1541 };
1542 
1543 struct OptionalOperand {
1544   const char *Name;
1545   AMDGPUOperand::ImmTy Type;
1546   bool IsBit;
1547   bool (*ConvertResult)(int64_t&);
1548 };
1549 
1550 } // end anonymous namespace
1551 
1552 // May be called with integer type with equivalent bitwidth.
1553 static const fltSemantics *getFltSemantics(unsigned Size) {
1554   switch (Size) {
1555   case 4:
1556     return &APFloat::IEEEsingle();
1557   case 8:
1558     return &APFloat::IEEEdouble();
1559   case 2:
1560     return &APFloat::IEEEhalf();
1561   default:
1562     llvm_unreachable("unsupported fp type");
1563   }
1564 }
1565 
1566 static const fltSemantics *getFltSemantics(MVT VT) {
1567   return getFltSemantics(VT.getSizeInBits() / 8);
1568 }
1569 
1570 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1571   switch (OperandType) {
1572   case AMDGPU::OPERAND_REG_IMM_INT32:
1573   case AMDGPU::OPERAND_REG_IMM_FP32:
1574   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1575   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1576   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1577   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1578     return &APFloat::IEEEsingle();
1579   case AMDGPU::OPERAND_REG_IMM_INT64:
1580   case AMDGPU::OPERAND_REG_IMM_FP64:
1581   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1582   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1583     return &APFloat::IEEEdouble();
1584   case AMDGPU::OPERAND_REG_IMM_INT16:
1585   case AMDGPU::OPERAND_REG_IMM_FP16:
1586   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1587   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1588   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1589   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1590   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1591   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1592   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1593   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1594   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1595   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1596     return &APFloat::IEEEhalf();
1597   default:
1598     llvm_unreachable("unsupported fp type");
1599   }
1600 }
1601 
1602 //===----------------------------------------------------------------------===//
1603 // Operand
1604 //===----------------------------------------------------------------------===//
1605 
1606 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1607   bool Lost;
1608 
1609   // Convert literal to single precision
1610   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1611                                                APFloat::rmNearestTiesToEven,
1612                                                &Lost);
1613   // We allow precision lost but not overflow or underflow
1614   if (Status != APFloat::opOK &&
1615       Lost &&
1616       ((Status & APFloat::opOverflow)  != 0 ||
1617        (Status & APFloat::opUnderflow) != 0)) {
1618     return false;
1619   }
1620 
1621   return true;
1622 }
1623 
1624 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1625   return isUIntN(Size, Val) || isIntN(Size, Val);
1626 }
1627 
1628 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1629   if (VT.getScalarType() == MVT::i16) {
1630     // FP immediate values are broken.
1631     return isInlinableIntLiteral(Val);
1632   }
1633 
1634   // f16/v2f16 operands work correctly for all values.
1635   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1636 }
1637 
1638 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1639 
1640   // This is a hack to enable named inline values like
1641   // shared_base with both 32-bit and 64-bit operands.
1642   // Note that these values are defined as
1643   // 32-bit operands only.
1644   if (isInlineValue()) {
1645     return true;
1646   }
1647 
1648   if (!isImmTy(ImmTyNone)) {
1649     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1650     return false;
1651   }
1652   // TODO: We should avoid using host float here. It would be better to
1653   // check the float bit values which is what a few other places do.
1654   // We've had bot failures before due to weird NaN support on mips hosts.
1655 
1656   APInt Literal(64, Imm.Val);
1657 
1658   if (Imm.IsFPImm) { // We got fp literal token
1659     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1660       return AMDGPU::isInlinableLiteral64(Imm.Val,
1661                                           AsmParser->hasInv2PiInlineImm());
1662     }
1663 
1664     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1665     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1666       return false;
1667 
1668     if (type.getScalarSizeInBits() == 16) {
1669       return isInlineableLiteralOp16(
1670         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1671         type, AsmParser->hasInv2PiInlineImm());
1672     }
1673 
1674     // Check if single precision literal is inlinable
1675     return AMDGPU::isInlinableLiteral32(
1676       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1677       AsmParser->hasInv2PiInlineImm());
1678   }
1679 
1680   // We got int literal token.
1681   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1682     return AMDGPU::isInlinableLiteral64(Imm.Val,
1683                                         AsmParser->hasInv2PiInlineImm());
1684   }
1685 
1686   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1687     return false;
1688   }
1689 
1690   if (type.getScalarSizeInBits() == 16) {
1691     return isInlineableLiteralOp16(
1692       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1693       type, AsmParser->hasInv2PiInlineImm());
1694   }
1695 
1696   return AMDGPU::isInlinableLiteral32(
1697     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1698     AsmParser->hasInv2PiInlineImm());
1699 }
1700 
1701 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1702   // Check that this immediate can be added as literal
1703   if (!isImmTy(ImmTyNone)) {
1704     return false;
1705   }
1706 
1707   if (!Imm.IsFPImm) {
1708     // We got int literal token.
1709 
1710     if (type == MVT::f64 && hasFPModifiers()) {
1711       // Cannot apply fp modifiers to int literals preserving the same semantics
1712       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1713       // disable these cases.
1714       return false;
1715     }
1716 
1717     unsigned Size = type.getSizeInBits();
1718     if (Size == 64)
1719       Size = 32;
1720 
1721     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1722     // types.
1723     return isSafeTruncation(Imm.Val, Size);
1724   }
1725 
1726   // We got fp literal token
1727   if (type == MVT::f64) { // Expected 64-bit fp operand
1728     // We would set low 64-bits of literal to zeroes but we accept this literals
1729     return true;
1730   }
1731 
1732   if (type == MVT::i64) { // Expected 64-bit int operand
1733     // We don't allow fp literals in 64-bit integer instructions. It is
1734     // unclear how we should encode them.
1735     return false;
1736   }
1737 
1738   // We allow fp literals with f16x2 operands assuming that the specified
1739   // literal goes into the lower half and the upper half is zero. We also
1740   // require that the literal may be losslesly converted to f16.
1741   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1742                      (type == MVT::v2i16)? MVT::i16 : type;
1743 
1744   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1745   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1746 }
1747 
1748 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1749   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1750 }
1751 
1752 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1753   if (AsmParser->isVI())
1754     return isVReg32();
1755   else if (AsmParser->isGFX9Plus())
1756     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1757   else
1758     return false;
1759 }
1760 
1761 bool AMDGPUOperand::isSDWAFP16Operand() const {
1762   return isSDWAOperand(MVT::f16);
1763 }
1764 
1765 bool AMDGPUOperand::isSDWAFP32Operand() const {
1766   return isSDWAOperand(MVT::f32);
1767 }
1768 
1769 bool AMDGPUOperand::isSDWAInt16Operand() const {
1770   return isSDWAOperand(MVT::i16);
1771 }
1772 
1773 bool AMDGPUOperand::isSDWAInt32Operand() const {
1774   return isSDWAOperand(MVT::i32);
1775 }
1776 
1777 bool AMDGPUOperand::isBoolReg() const {
1778   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1779          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1780 }
1781 
1782 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1783 {
1784   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1785   assert(Size == 2 || Size == 4 || Size == 8);
1786 
1787   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1788 
1789   if (Imm.Mods.Abs) {
1790     Val &= ~FpSignMask;
1791   }
1792   if (Imm.Mods.Neg) {
1793     Val ^= FpSignMask;
1794   }
1795 
1796   return Val;
1797 }
1798 
1799 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1800   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1801                              Inst.getNumOperands())) {
1802     addLiteralImmOperand(Inst, Imm.Val,
1803                          ApplyModifiers &
1804                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1805   } else {
1806     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1807     Inst.addOperand(MCOperand::createImm(Imm.Val));
1808     setImmKindNone();
1809   }
1810 }
1811 
1812 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1813   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1814   auto OpNum = Inst.getNumOperands();
1815   // Check that this operand accepts literals
1816   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1817 
1818   if (ApplyModifiers) {
1819     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1820     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1821     Val = applyInputFPModifiers(Val, Size);
1822   }
1823 
1824   APInt Literal(64, Val);
1825   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1826 
1827   if (Imm.IsFPImm) { // We got fp literal token
1828     switch (OpTy) {
1829     case AMDGPU::OPERAND_REG_IMM_INT64:
1830     case AMDGPU::OPERAND_REG_IMM_FP64:
1831     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1832     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1833       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1834                                        AsmParser->hasInv2PiInlineImm())) {
1835         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1836         setImmKindConst();
1837         return;
1838       }
1839 
1840       // Non-inlineable
1841       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1842         // For fp operands we check if low 32 bits are zeros
1843         if (Literal.getLoBits(32) != 0) {
1844           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1845           "Can't encode literal as exact 64-bit floating-point operand. "
1846           "Low 32-bits will be set to zero");
1847         }
1848 
1849         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1850         setImmKindLiteral();
1851         return;
1852       }
1853 
1854       // We don't allow fp literals in 64-bit integer instructions. It is
1855       // unclear how we should encode them. This case should be checked earlier
1856       // in predicate methods (isLiteralImm())
1857       llvm_unreachable("fp literal in 64-bit integer instruction.");
1858 
1859     case AMDGPU::OPERAND_REG_IMM_INT32:
1860     case AMDGPU::OPERAND_REG_IMM_FP32:
1861     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1862     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1863     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1864     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1865     case AMDGPU::OPERAND_REG_IMM_INT16:
1866     case AMDGPU::OPERAND_REG_IMM_FP16:
1867     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1868     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1869     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1870     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1871     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1872     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1873     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1874     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1875     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1876     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1877       bool lost;
1878       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1879       // Convert literal to single precision
1880       FPLiteral.convert(*getOpFltSemantics(OpTy),
1881                         APFloat::rmNearestTiesToEven, &lost);
1882       // We allow precision lost but not overflow or underflow. This should be
1883       // checked earlier in isLiteralImm()
1884 
1885       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1886       Inst.addOperand(MCOperand::createImm(ImmVal));
1887       setImmKindLiteral();
1888       return;
1889     }
1890     default:
1891       llvm_unreachable("invalid operand size");
1892     }
1893 
1894     return;
1895   }
1896 
1897   // We got int literal token.
1898   // Only sign extend inline immediates.
1899   switch (OpTy) {
1900   case AMDGPU::OPERAND_REG_IMM_INT32:
1901   case AMDGPU::OPERAND_REG_IMM_FP32:
1902   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1903   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1904   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1905   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1906   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1907   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1908     if (isSafeTruncation(Val, 32) &&
1909         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1910                                      AsmParser->hasInv2PiInlineImm())) {
1911       Inst.addOperand(MCOperand::createImm(Val));
1912       setImmKindConst();
1913       return;
1914     }
1915 
1916     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1917     setImmKindLiteral();
1918     return;
1919 
1920   case AMDGPU::OPERAND_REG_IMM_INT64:
1921   case AMDGPU::OPERAND_REG_IMM_FP64:
1922   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1923   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1924     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1925       Inst.addOperand(MCOperand::createImm(Val));
1926       setImmKindConst();
1927       return;
1928     }
1929 
1930     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1931     setImmKindLiteral();
1932     return;
1933 
1934   case AMDGPU::OPERAND_REG_IMM_INT16:
1935   case AMDGPU::OPERAND_REG_IMM_FP16:
1936   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1937   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1938   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1939   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1940     if (isSafeTruncation(Val, 16) &&
1941         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1942                                      AsmParser->hasInv2PiInlineImm())) {
1943       Inst.addOperand(MCOperand::createImm(Val));
1944       setImmKindConst();
1945       return;
1946     }
1947 
1948     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1949     setImmKindLiteral();
1950     return;
1951 
1952   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1953   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1954   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1955   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1956     assert(isSafeTruncation(Val, 16));
1957     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1958                                         AsmParser->hasInv2PiInlineImm()));
1959 
1960     Inst.addOperand(MCOperand::createImm(Val));
1961     return;
1962   }
1963   default:
1964     llvm_unreachable("invalid operand size");
1965   }
1966 }
1967 
1968 template <unsigned Bitwidth>
1969 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1970   APInt Literal(64, Imm.Val);
1971   setImmKindNone();
1972 
1973   if (!Imm.IsFPImm) {
1974     // We got int literal token.
1975     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1976     return;
1977   }
1978 
1979   bool Lost;
1980   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1981   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1982                     APFloat::rmNearestTiesToEven, &Lost);
1983   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1984 }
1985 
1986 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1987   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1988 }
1989 
1990 static bool isInlineValue(unsigned Reg) {
1991   switch (Reg) {
1992   case AMDGPU::SRC_SHARED_BASE:
1993   case AMDGPU::SRC_SHARED_LIMIT:
1994   case AMDGPU::SRC_PRIVATE_BASE:
1995   case AMDGPU::SRC_PRIVATE_LIMIT:
1996   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1997     return true;
1998   case AMDGPU::SRC_VCCZ:
1999   case AMDGPU::SRC_EXECZ:
2000   case AMDGPU::SRC_SCC:
2001     return true;
2002   case AMDGPU::SGPR_NULL:
2003     return true;
2004   default:
2005     return false;
2006   }
2007 }
2008 
2009 bool AMDGPUOperand::isInlineValue() const {
2010   return isRegKind() && ::isInlineValue(getReg());
2011 }
2012 
2013 //===----------------------------------------------------------------------===//
2014 // AsmParser
2015 //===----------------------------------------------------------------------===//
2016 
2017 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2018   if (Is == IS_VGPR) {
2019     switch (RegWidth) {
2020       default: return -1;
2021       case 1: return AMDGPU::VGPR_32RegClassID;
2022       case 2: return AMDGPU::VReg_64RegClassID;
2023       case 3: return AMDGPU::VReg_96RegClassID;
2024       case 4: return AMDGPU::VReg_128RegClassID;
2025       case 5: return AMDGPU::VReg_160RegClassID;
2026       case 6: return AMDGPU::VReg_192RegClassID;
2027       case 8: return AMDGPU::VReg_256RegClassID;
2028       case 16: return AMDGPU::VReg_512RegClassID;
2029       case 32: return AMDGPU::VReg_1024RegClassID;
2030     }
2031   } else if (Is == IS_TTMP) {
2032     switch (RegWidth) {
2033       default: return -1;
2034       case 1: return AMDGPU::TTMP_32RegClassID;
2035       case 2: return AMDGPU::TTMP_64RegClassID;
2036       case 4: return AMDGPU::TTMP_128RegClassID;
2037       case 8: return AMDGPU::TTMP_256RegClassID;
2038       case 16: return AMDGPU::TTMP_512RegClassID;
2039     }
2040   } else if (Is == IS_SGPR) {
2041     switch (RegWidth) {
2042       default: return -1;
2043       case 1: return AMDGPU::SGPR_32RegClassID;
2044       case 2: return AMDGPU::SGPR_64RegClassID;
2045       case 3: return AMDGPU::SGPR_96RegClassID;
2046       case 4: return AMDGPU::SGPR_128RegClassID;
2047       case 5: return AMDGPU::SGPR_160RegClassID;
2048       case 6: return AMDGPU::SGPR_192RegClassID;
2049       case 8: return AMDGPU::SGPR_256RegClassID;
2050       case 16: return AMDGPU::SGPR_512RegClassID;
2051     }
2052   } else if (Is == IS_AGPR) {
2053     switch (RegWidth) {
2054       default: return -1;
2055       case 1: return AMDGPU::AGPR_32RegClassID;
2056       case 2: return AMDGPU::AReg_64RegClassID;
2057       case 3: return AMDGPU::AReg_96RegClassID;
2058       case 4: return AMDGPU::AReg_128RegClassID;
2059       case 5: return AMDGPU::AReg_160RegClassID;
2060       case 6: return AMDGPU::AReg_192RegClassID;
2061       case 8: return AMDGPU::AReg_256RegClassID;
2062       case 16: return AMDGPU::AReg_512RegClassID;
2063       case 32: return AMDGPU::AReg_1024RegClassID;
2064     }
2065   }
2066   return -1;
2067 }
2068 
2069 static unsigned getSpecialRegForName(StringRef RegName) {
2070   return StringSwitch<unsigned>(RegName)
2071     .Case("exec", AMDGPU::EXEC)
2072     .Case("vcc", AMDGPU::VCC)
2073     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2074     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2075     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2076     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2077     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2078     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2079     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2080     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2081     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2082     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2083     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2084     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2085     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2086     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2087     .Case("m0", AMDGPU::M0)
2088     .Case("vccz", AMDGPU::SRC_VCCZ)
2089     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2090     .Case("execz", AMDGPU::SRC_EXECZ)
2091     .Case("src_execz", AMDGPU::SRC_EXECZ)
2092     .Case("scc", AMDGPU::SRC_SCC)
2093     .Case("src_scc", AMDGPU::SRC_SCC)
2094     .Case("tba", AMDGPU::TBA)
2095     .Case("tma", AMDGPU::TMA)
2096     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2097     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2098     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2099     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2100     .Case("vcc_lo", AMDGPU::VCC_LO)
2101     .Case("vcc_hi", AMDGPU::VCC_HI)
2102     .Case("exec_lo", AMDGPU::EXEC_LO)
2103     .Case("exec_hi", AMDGPU::EXEC_HI)
2104     .Case("tma_lo", AMDGPU::TMA_LO)
2105     .Case("tma_hi", AMDGPU::TMA_HI)
2106     .Case("tba_lo", AMDGPU::TBA_LO)
2107     .Case("tba_hi", AMDGPU::TBA_HI)
2108     .Case("pc", AMDGPU::PC_REG)
2109     .Case("null", AMDGPU::SGPR_NULL)
2110     .Default(AMDGPU::NoRegister);
2111 }
2112 
2113 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2114                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2115   auto R = parseRegister();
2116   if (!R) return true;
2117   assert(R->isReg());
2118   RegNo = R->getReg();
2119   StartLoc = R->getStartLoc();
2120   EndLoc = R->getEndLoc();
2121   return false;
2122 }
2123 
2124 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2125                                     SMLoc &EndLoc) {
2126   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2127 }
2128 
2129 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2130                                                        SMLoc &StartLoc,
2131                                                        SMLoc &EndLoc) {
2132   bool Result =
2133       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2134   bool PendingErrors = getParser().hasPendingError();
2135   getParser().clearPendingErrors();
2136   if (PendingErrors)
2137     return MatchOperand_ParseFail;
2138   if (Result)
2139     return MatchOperand_NoMatch;
2140   return MatchOperand_Success;
2141 }
2142 
2143 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2144                                             RegisterKind RegKind, unsigned Reg1,
2145                                             SMLoc Loc) {
2146   switch (RegKind) {
2147   case IS_SPECIAL:
2148     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2149       Reg = AMDGPU::EXEC;
2150       RegWidth = 2;
2151       return true;
2152     }
2153     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2154       Reg = AMDGPU::FLAT_SCR;
2155       RegWidth = 2;
2156       return true;
2157     }
2158     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2159       Reg = AMDGPU::XNACK_MASK;
2160       RegWidth = 2;
2161       return true;
2162     }
2163     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2164       Reg = AMDGPU::VCC;
2165       RegWidth = 2;
2166       return true;
2167     }
2168     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2169       Reg = AMDGPU::TBA;
2170       RegWidth = 2;
2171       return true;
2172     }
2173     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2174       Reg = AMDGPU::TMA;
2175       RegWidth = 2;
2176       return true;
2177     }
2178     Error(Loc, "register does not fit in the list");
2179     return false;
2180   case IS_VGPR:
2181   case IS_SGPR:
2182   case IS_AGPR:
2183   case IS_TTMP:
2184     if (Reg1 != Reg + RegWidth) {
2185       Error(Loc, "registers in a list must have consecutive indices");
2186       return false;
2187     }
2188     RegWidth++;
2189     return true;
2190   default:
2191     llvm_unreachable("unexpected register kind");
2192   }
2193 }
2194 
2195 struct RegInfo {
2196   StringLiteral Name;
2197   RegisterKind Kind;
2198 };
2199 
2200 static constexpr RegInfo RegularRegisters[] = {
2201   {{"v"},    IS_VGPR},
2202   {{"s"},    IS_SGPR},
2203   {{"ttmp"}, IS_TTMP},
2204   {{"acc"},  IS_AGPR},
2205   {{"a"},    IS_AGPR},
2206 };
2207 
2208 static bool isRegularReg(RegisterKind Kind) {
2209   return Kind == IS_VGPR ||
2210          Kind == IS_SGPR ||
2211          Kind == IS_TTMP ||
2212          Kind == IS_AGPR;
2213 }
2214 
2215 static const RegInfo* getRegularRegInfo(StringRef Str) {
2216   for (const RegInfo &Reg : RegularRegisters)
2217     if (Str.startswith(Reg.Name))
2218       return &Reg;
2219   return nullptr;
2220 }
2221 
2222 static bool getRegNum(StringRef Str, unsigned& Num) {
2223   return !Str.getAsInteger(10, Num);
2224 }
2225 
2226 bool
2227 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2228                             const AsmToken &NextToken) const {
2229 
2230   // A list of consecutive registers: [s0,s1,s2,s3]
2231   if (Token.is(AsmToken::LBrac))
2232     return true;
2233 
2234   if (!Token.is(AsmToken::Identifier))
2235     return false;
2236 
2237   // A single register like s0 or a range of registers like s[0:1]
2238 
2239   StringRef Str = Token.getString();
2240   const RegInfo *Reg = getRegularRegInfo(Str);
2241   if (Reg) {
2242     StringRef RegName = Reg->Name;
2243     StringRef RegSuffix = Str.substr(RegName.size());
2244     if (!RegSuffix.empty()) {
2245       unsigned Num;
2246       // A single register with an index: rXX
2247       if (getRegNum(RegSuffix, Num))
2248         return true;
2249     } else {
2250       // A range of registers: r[XX:YY].
2251       if (NextToken.is(AsmToken::LBrac))
2252         return true;
2253     }
2254   }
2255 
2256   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2257 }
2258 
2259 bool
2260 AMDGPUAsmParser::isRegister()
2261 {
2262   return isRegister(getToken(), peekToken());
2263 }
2264 
2265 unsigned
2266 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2267                                unsigned RegNum,
2268                                unsigned RegWidth,
2269                                SMLoc Loc) {
2270 
2271   assert(isRegularReg(RegKind));
2272 
2273   unsigned AlignSize = 1;
2274   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2275     // SGPR and TTMP registers must be aligned.
2276     // Max required alignment is 4 dwords.
2277     AlignSize = std::min(RegWidth, 4u);
2278   }
2279 
2280   if (RegNum % AlignSize != 0) {
2281     Error(Loc, "invalid register alignment");
2282     return AMDGPU::NoRegister;
2283   }
2284 
2285   unsigned RegIdx = RegNum / AlignSize;
2286   int RCID = getRegClass(RegKind, RegWidth);
2287   if (RCID == -1) {
2288     Error(Loc, "invalid or unsupported register size");
2289     return AMDGPU::NoRegister;
2290   }
2291 
2292   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2293   const MCRegisterClass RC = TRI->getRegClass(RCID);
2294   if (RegIdx >= RC.getNumRegs()) {
2295     Error(Loc, "register index is out of range");
2296     return AMDGPU::NoRegister;
2297   }
2298 
2299   return RC.getRegister(RegIdx);
2300 }
2301 
2302 bool
2303 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2304   int64_t RegLo, RegHi;
2305   if (!skipToken(AsmToken::LBrac, "missing register index"))
2306     return false;
2307 
2308   SMLoc FirstIdxLoc = getLoc();
2309   SMLoc SecondIdxLoc;
2310 
2311   if (!parseExpr(RegLo))
2312     return false;
2313 
2314   if (trySkipToken(AsmToken::Colon)) {
2315     SecondIdxLoc = getLoc();
2316     if (!parseExpr(RegHi))
2317       return false;
2318   } else {
2319     RegHi = RegLo;
2320   }
2321 
2322   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2323     return false;
2324 
2325   if (!isUInt<32>(RegLo)) {
2326     Error(FirstIdxLoc, "invalid register index");
2327     return false;
2328   }
2329 
2330   if (!isUInt<32>(RegHi)) {
2331     Error(SecondIdxLoc, "invalid register index");
2332     return false;
2333   }
2334 
2335   if (RegLo > RegHi) {
2336     Error(FirstIdxLoc, "first register index should not exceed second index");
2337     return false;
2338   }
2339 
2340   Num = static_cast<unsigned>(RegLo);
2341   Width = (RegHi - RegLo) + 1;
2342   return true;
2343 }
2344 
2345 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2346                                           unsigned &RegNum, unsigned &RegWidth,
2347                                           SmallVectorImpl<AsmToken> &Tokens) {
2348   assert(isToken(AsmToken::Identifier));
2349   unsigned Reg = getSpecialRegForName(getTokenStr());
2350   if (Reg) {
2351     RegNum = 0;
2352     RegWidth = 1;
2353     RegKind = IS_SPECIAL;
2354     Tokens.push_back(getToken());
2355     lex(); // skip register name
2356   }
2357   return Reg;
2358 }
2359 
2360 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2361                                           unsigned &RegNum, unsigned &RegWidth,
2362                                           SmallVectorImpl<AsmToken> &Tokens) {
2363   assert(isToken(AsmToken::Identifier));
2364   StringRef RegName = getTokenStr();
2365   auto Loc = getLoc();
2366 
2367   const RegInfo *RI = getRegularRegInfo(RegName);
2368   if (!RI) {
2369     Error(Loc, "invalid register name");
2370     return AMDGPU::NoRegister;
2371   }
2372 
2373   Tokens.push_back(getToken());
2374   lex(); // skip register name
2375 
2376   RegKind = RI->Kind;
2377   StringRef RegSuffix = RegName.substr(RI->Name.size());
2378   if (!RegSuffix.empty()) {
2379     // Single 32-bit register: vXX.
2380     if (!getRegNum(RegSuffix, RegNum)) {
2381       Error(Loc, "invalid register index");
2382       return AMDGPU::NoRegister;
2383     }
2384     RegWidth = 1;
2385   } else {
2386     // Range of registers: v[XX:YY]. ":YY" is optional.
2387     if (!ParseRegRange(RegNum, RegWidth))
2388       return AMDGPU::NoRegister;
2389   }
2390 
2391   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2392 }
2393 
2394 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2395                                        unsigned &RegWidth,
2396                                        SmallVectorImpl<AsmToken> &Tokens) {
2397   unsigned Reg = AMDGPU::NoRegister;
2398   auto ListLoc = getLoc();
2399 
2400   if (!skipToken(AsmToken::LBrac,
2401                  "expected a register or a list of registers")) {
2402     return AMDGPU::NoRegister;
2403   }
2404 
2405   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2406 
2407   auto Loc = getLoc();
2408   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2409     return AMDGPU::NoRegister;
2410   if (RegWidth != 1) {
2411     Error(Loc, "expected a single 32-bit register");
2412     return AMDGPU::NoRegister;
2413   }
2414 
2415   for (; trySkipToken(AsmToken::Comma); ) {
2416     RegisterKind NextRegKind;
2417     unsigned NextReg, NextRegNum, NextRegWidth;
2418     Loc = getLoc();
2419 
2420     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2421                              NextRegNum, NextRegWidth,
2422                              Tokens)) {
2423       return AMDGPU::NoRegister;
2424     }
2425     if (NextRegWidth != 1) {
2426       Error(Loc, "expected a single 32-bit register");
2427       return AMDGPU::NoRegister;
2428     }
2429     if (NextRegKind != RegKind) {
2430       Error(Loc, "registers in a list must be of the same kind");
2431       return AMDGPU::NoRegister;
2432     }
2433     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2434       return AMDGPU::NoRegister;
2435   }
2436 
2437   if (!skipToken(AsmToken::RBrac,
2438                  "expected a comma or a closing square bracket")) {
2439     return AMDGPU::NoRegister;
2440   }
2441 
2442   if (isRegularReg(RegKind))
2443     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2444 
2445   return Reg;
2446 }
2447 
2448 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2449                                           unsigned &RegNum, unsigned &RegWidth,
2450                                           SmallVectorImpl<AsmToken> &Tokens) {
2451   auto Loc = getLoc();
2452   Reg = AMDGPU::NoRegister;
2453 
2454   if (isToken(AsmToken::Identifier)) {
2455     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2456     if (Reg == AMDGPU::NoRegister)
2457       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2458   } else {
2459     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2460   }
2461 
2462   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2463   if (Reg == AMDGPU::NoRegister) {
2464     assert(Parser.hasPendingError());
2465     return false;
2466   }
2467 
2468   if (!subtargetHasRegister(*TRI, Reg)) {
2469     if (Reg == AMDGPU::SGPR_NULL) {
2470       Error(Loc, "'null' operand is not supported on this GPU");
2471     } else {
2472       Error(Loc, "register not available on this GPU");
2473     }
2474     return false;
2475   }
2476 
2477   return true;
2478 }
2479 
2480 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2481                                           unsigned &RegNum, unsigned &RegWidth,
2482                                           bool RestoreOnFailure /*=false*/) {
2483   Reg = AMDGPU::NoRegister;
2484 
2485   SmallVector<AsmToken, 1> Tokens;
2486   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2487     if (RestoreOnFailure) {
2488       while (!Tokens.empty()) {
2489         getLexer().UnLex(Tokens.pop_back_val());
2490       }
2491     }
2492     return true;
2493   }
2494   return false;
2495 }
2496 
2497 Optional<StringRef>
2498 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2499   switch (RegKind) {
2500   case IS_VGPR:
2501     return StringRef(".amdgcn.next_free_vgpr");
2502   case IS_SGPR:
2503     return StringRef(".amdgcn.next_free_sgpr");
2504   default:
2505     return None;
2506   }
2507 }
2508 
2509 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2510   auto SymbolName = getGprCountSymbolName(RegKind);
2511   assert(SymbolName && "initializing invalid register kind");
2512   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2513   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2514 }
2515 
2516 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2517                                             unsigned DwordRegIndex,
2518                                             unsigned RegWidth) {
2519   // Symbols are only defined for GCN targets
2520   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2521     return true;
2522 
2523   auto SymbolName = getGprCountSymbolName(RegKind);
2524   if (!SymbolName)
2525     return true;
2526   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2527 
2528   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2529   int64_t OldCount;
2530 
2531   if (!Sym->isVariable())
2532     return !Error(getParser().getTok().getLoc(),
2533                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2534   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2535     return !Error(
2536         getParser().getTok().getLoc(),
2537         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2538 
2539   if (OldCount <= NewMax)
2540     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2541 
2542   return true;
2543 }
2544 
2545 std::unique_ptr<AMDGPUOperand>
2546 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2547   const auto &Tok = Parser.getTok();
2548   SMLoc StartLoc = Tok.getLoc();
2549   SMLoc EndLoc = Tok.getEndLoc();
2550   RegisterKind RegKind;
2551   unsigned Reg, RegNum, RegWidth;
2552 
2553   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2554     return nullptr;
2555   }
2556   if (isHsaAbiVersion3(&getSTI())) {
2557     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2558       return nullptr;
2559   } else
2560     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2561   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2562 }
2563 
2564 OperandMatchResultTy
2565 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2566   // TODO: add syntactic sugar for 1/(2*PI)
2567 
2568   assert(!isRegister());
2569   assert(!isModifier());
2570 
2571   const auto& Tok = getToken();
2572   const auto& NextTok = peekToken();
2573   bool IsReal = Tok.is(AsmToken::Real);
2574   SMLoc S = getLoc();
2575   bool Negate = false;
2576 
2577   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2578     lex();
2579     IsReal = true;
2580     Negate = true;
2581   }
2582 
2583   if (IsReal) {
2584     // Floating-point expressions are not supported.
2585     // Can only allow floating-point literals with an
2586     // optional sign.
2587 
2588     StringRef Num = getTokenStr();
2589     lex();
2590 
2591     APFloat RealVal(APFloat::IEEEdouble());
2592     auto roundMode = APFloat::rmNearestTiesToEven;
2593     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2594       return MatchOperand_ParseFail;
2595     }
2596     if (Negate)
2597       RealVal.changeSign();
2598 
2599     Operands.push_back(
2600       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2601                                AMDGPUOperand::ImmTyNone, true));
2602 
2603     return MatchOperand_Success;
2604 
2605   } else {
2606     int64_t IntVal;
2607     const MCExpr *Expr;
2608     SMLoc S = getLoc();
2609 
2610     if (HasSP3AbsModifier) {
2611       // This is a workaround for handling expressions
2612       // as arguments of SP3 'abs' modifier, for example:
2613       //     |1.0|
2614       //     |-1|
2615       //     |1+x|
2616       // This syntax is not compatible with syntax of standard
2617       // MC expressions (due to the trailing '|').
2618       SMLoc EndLoc;
2619       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2620         return MatchOperand_ParseFail;
2621     } else {
2622       if (Parser.parseExpression(Expr))
2623         return MatchOperand_ParseFail;
2624     }
2625 
2626     if (Expr->evaluateAsAbsolute(IntVal)) {
2627       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2628     } else {
2629       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2630     }
2631 
2632     return MatchOperand_Success;
2633   }
2634 
2635   return MatchOperand_NoMatch;
2636 }
2637 
2638 OperandMatchResultTy
2639 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2640   if (!isRegister())
2641     return MatchOperand_NoMatch;
2642 
2643   if (auto R = parseRegister()) {
2644     assert(R->isReg());
2645     Operands.push_back(std::move(R));
2646     return MatchOperand_Success;
2647   }
2648   return MatchOperand_ParseFail;
2649 }
2650 
2651 OperandMatchResultTy
2652 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2653   auto res = parseReg(Operands);
2654   if (res != MatchOperand_NoMatch) {
2655     return res;
2656   } else if (isModifier()) {
2657     return MatchOperand_NoMatch;
2658   } else {
2659     return parseImm(Operands, HasSP3AbsMod);
2660   }
2661 }
2662 
2663 bool
2664 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2665   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2666     const auto &str = Token.getString();
2667     return str == "abs" || str == "neg" || str == "sext";
2668   }
2669   return false;
2670 }
2671 
2672 bool
2673 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2674   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2675 }
2676 
2677 bool
2678 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2679   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2680 }
2681 
2682 bool
2683 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2684   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2685 }
2686 
2687 // Check if this is an operand modifier or an opcode modifier
2688 // which may look like an expression but it is not. We should
2689 // avoid parsing these modifiers as expressions. Currently
2690 // recognized sequences are:
2691 //   |...|
2692 //   abs(...)
2693 //   neg(...)
2694 //   sext(...)
2695 //   -reg
2696 //   -|...|
2697 //   -abs(...)
2698 //   name:...
2699 // Note that simple opcode modifiers like 'gds' may be parsed as
2700 // expressions; this is a special case. See getExpressionAsToken.
2701 //
2702 bool
2703 AMDGPUAsmParser::isModifier() {
2704 
2705   AsmToken Tok = getToken();
2706   AsmToken NextToken[2];
2707   peekTokens(NextToken);
2708 
2709   return isOperandModifier(Tok, NextToken[0]) ||
2710          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2711          isOpcodeModifierWithVal(Tok, NextToken[0]);
2712 }
2713 
2714 // Check if the current token is an SP3 'neg' modifier.
2715 // Currently this modifier is allowed in the following context:
2716 //
2717 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2718 // 2. Before an 'abs' modifier: -abs(...)
2719 // 3. Before an SP3 'abs' modifier: -|...|
2720 //
2721 // In all other cases "-" is handled as a part
2722 // of an expression that follows the sign.
2723 //
2724 // Note: When "-" is followed by an integer literal,
2725 // this is interpreted as integer negation rather
2726 // than a floating-point NEG modifier applied to N.
2727 // Beside being contr-intuitive, such use of floating-point
2728 // NEG modifier would have resulted in different meaning
2729 // of integer literals used with VOP1/2/C and VOP3,
2730 // for example:
2731 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2732 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2733 // Negative fp literals with preceding "-" are
2734 // handled likewise for unifomtity
2735 //
2736 bool
2737 AMDGPUAsmParser::parseSP3NegModifier() {
2738 
2739   AsmToken NextToken[2];
2740   peekTokens(NextToken);
2741 
2742   if (isToken(AsmToken::Minus) &&
2743       (isRegister(NextToken[0], NextToken[1]) ||
2744        NextToken[0].is(AsmToken::Pipe) ||
2745        isId(NextToken[0], "abs"))) {
2746     lex();
2747     return true;
2748   }
2749 
2750   return false;
2751 }
2752 
2753 OperandMatchResultTy
2754 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2755                                               bool AllowImm) {
2756   bool Neg, SP3Neg;
2757   bool Abs, SP3Abs;
2758   SMLoc Loc;
2759 
2760   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2761   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2762     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2763     return MatchOperand_ParseFail;
2764   }
2765 
2766   SP3Neg = parseSP3NegModifier();
2767 
2768   Loc = getLoc();
2769   Neg = trySkipId("neg");
2770   if (Neg && SP3Neg) {
2771     Error(Loc, "expected register or immediate");
2772     return MatchOperand_ParseFail;
2773   }
2774   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2775     return MatchOperand_ParseFail;
2776 
2777   Abs = trySkipId("abs");
2778   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2779     return MatchOperand_ParseFail;
2780 
2781   Loc = getLoc();
2782   SP3Abs = trySkipToken(AsmToken::Pipe);
2783   if (Abs && SP3Abs) {
2784     Error(Loc, "expected register or immediate");
2785     return MatchOperand_ParseFail;
2786   }
2787 
2788   OperandMatchResultTy Res;
2789   if (AllowImm) {
2790     Res = parseRegOrImm(Operands, SP3Abs);
2791   } else {
2792     Res = parseReg(Operands);
2793   }
2794   if (Res != MatchOperand_Success) {
2795     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2796   }
2797 
2798   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2799     return MatchOperand_ParseFail;
2800   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2801     return MatchOperand_ParseFail;
2802   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2803     return MatchOperand_ParseFail;
2804 
2805   AMDGPUOperand::Modifiers Mods;
2806   Mods.Abs = Abs || SP3Abs;
2807   Mods.Neg = Neg || SP3Neg;
2808 
2809   if (Mods.hasFPModifiers()) {
2810     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2811     if (Op.isExpr()) {
2812       Error(Op.getStartLoc(), "expected an absolute expression");
2813       return MatchOperand_ParseFail;
2814     }
2815     Op.setModifiers(Mods);
2816   }
2817   return MatchOperand_Success;
2818 }
2819 
2820 OperandMatchResultTy
2821 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2822                                                bool AllowImm) {
2823   bool Sext = trySkipId("sext");
2824   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2825     return MatchOperand_ParseFail;
2826 
2827   OperandMatchResultTy Res;
2828   if (AllowImm) {
2829     Res = parseRegOrImm(Operands);
2830   } else {
2831     Res = parseReg(Operands);
2832   }
2833   if (Res != MatchOperand_Success) {
2834     return Sext? MatchOperand_ParseFail : Res;
2835   }
2836 
2837   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2838     return MatchOperand_ParseFail;
2839 
2840   AMDGPUOperand::Modifiers Mods;
2841   Mods.Sext = Sext;
2842 
2843   if (Mods.hasIntModifiers()) {
2844     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2845     if (Op.isExpr()) {
2846       Error(Op.getStartLoc(), "expected an absolute expression");
2847       return MatchOperand_ParseFail;
2848     }
2849     Op.setModifiers(Mods);
2850   }
2851 
2852   return MatchOperand_Success;
2853 }
2854 
2855 OperandMatchResultTy
2856 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2857   return parseRegOrImmWithFPInputMods(Operands, false);
2858 }
2859 
2860 OperandMatchResultTy
2861 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2862   return parseRegOrImmWithIntInputMods(Operands, false);
2863 }
2864 
2865 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2866   auto Loc = getLoc();
2867   if (trySkipId("off")) {
2868     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2869                                                 AMDGPUOperand::ImmTyOff, false));
2870     return MatchOperand_Success;
2871   }
2872 
2873   if (!isRegister())
2874     return MatchOperand_NoMatch;
2875 
2876   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2877   if (Reg) {
2878     Operands.push_back(std::move(Reg));
2879     return MatchOperand_Success;
2880   }
2881 
2882   return MatchOperand_ParseFail;
2883 
2884 }
2885 
2886 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2887   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2888 
2889   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2890       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2891       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2892       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2893     return Match_InvalidOperand;
2894 
2895   if ((TSFlags & SIInstrFlags::VOP3) &&
2896       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2897       getForcedEncodingSize() != 64)
2898     return Match_PreferE32;
2899 
2900   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2901       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2902     // v_mac_f32/16 allow only dst_sel == DWORD;
2903     auto OpNum =
2904         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2905     const auto &Op = Inst.getOperand(OpNum);
2906     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2907       return Match_InvalidOperand;
2908     }
2909   }
2910 
2911   return Match_Success;
2912 }
2913 
2914 static ArrayRef<unsigned> getAllVariants() {
2915   static const unsigned Variants[] = {
2916     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2917     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2918   };
2919 
2920   return makeArrayRef(Variants);
2921 }
2922 
2923 // What asm variants we should check
2924 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2925   if (getForcedEncodingSize() == 32) {
2926     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2927     return makeArrayRef(Variants);
2928   }
2929 
2930   if (isForcedVOP3()) {
2931     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2932     return makeArrayRef(Variants);
2933   }
2934 
2935   if (isForcedSDWA()) {
2936     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2937                                         AMDGPUAsmVariants::SDWA9};
2938     return makeArrayRef(Variants);
2939   }
2940 
2941   if (isForcedDPP()) {
2942     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2943     return makeArrayRef(Variants);
2944   }
2945 
2946   return getAllVariants();
2947 }
2948 
2949 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
2950   if (getForcedEncodingSize() == 32)
2951     return "e32";
2952 
2953   if (isForcedVOP3())
2954     return "e64";
2955 
2956   if (isForcedSDWA())
2957     return "sdwa";
2958 
2959   if (isForcedDPP())
2960     return "dpp";
2961 
2962   return "";
2963 }
2964 
2965 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2966   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2967   const unsigned Num = Desc.getNumImplicitUses();
2968   for (unsigned i = 0; i < Num; ++i) {
2969     unsigned Reg = Desc.ImplicitUses[i];
2970     switch (Reg) {
2971     case AMDGPU::FLAT_SCR:
2972     case AMDGPU::VCC:
2973     case AMDGPU::VCC_LO:
2974     case AMDGPU::VCC_HI:
2975     case AMDGPU::M0:
2976       return Reg;
2977     default:
2978       break;
2979     }
2980   }
2981   return AMDGPU::NoRegister;
2982 }
2983 
2984 // NB: This code is correct only when used to check constant
2985 // bus limitations because GFX7 support no f16 inline constants.
2986 // Note that there are no cases when a GFX7 opcode violates
2987 // constant bus limitations due to the use of an f16 constant.
2988 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2989                                        unsigned OpIdx) const {
2990   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2991 
2992   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2993     return false;
2994   }
2995 
2996   const MCOperand &MO = Inst.getOperand(OpIdx);
2997 
2998   int64_t Val = MO.getImm();
2999   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3000 
3001   switch (OpSize) { // expected operand size
3002   case 8:
3003     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3004   case 4:
3005     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3006   case 2: {
3007     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3008     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3009         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3010         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3011       return AMDGPU::isInlinableIntLiteral(Val);
3012 
3013     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3014         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3015         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3016       return AMDGPU::isInlinableIntLiteralV216(Val);
3017 
3018     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3019         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3020         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3021       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3022 
3023     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3024   }
3025   default:
3026     llvm_unreachable("invalid operand size");
3027   }
3028 }
3029 
3030 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3031   if (!isGFX10Plus())
3032     return 1;
3033 
3034   switch (Opcode) {
3035   // 64-bit shift instructions can use only one scalar value input
3036   case AMDGPU::V_LSHLREV_B64:
3037   case AMDGPU::V_LSHLREV_B64_gfx10:
3038   case AMDGPU::V_LSHL_B64:
3039   case AMDGPU::V_LSHRREV_B64:
3040   case AMDGPU::V_LSHRREV_B64_gfx10:
3041   case AMDGPU::V_LSHR_B64:
3042   case AMDGPU::V_ASHRREV_I64:
3043   case AMDGPU::V_ASHRREV_I64_gfx10:
3044   case AMDGPU::V_ASHR_I64:
3045     return 1;
3046   default:
3047     return 2;
3048   }
3049 }
3050 
3051 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3052   const MCOperand &MO = Inst.getOperand(OpIdx);
3053   if (MO.isImm()) {
3054     return !isInlineConstant(Inst, OpIdx);
3055   } else if (MO.isReg()) {
3056     auto Reg = MO.getReg();
3057     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3058     auto PReg = mc2PseudoReg(Reg);
3059     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3060   } else {
3061     return true;
3062   }
3063 }
3064 
3065 bool
3066 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3067                                                 const OperandVector &Operands) {
3068   const unsigned Opcode = Inst.getOpcode();
3069   const MCInstrDesc &Desc = MII.get(Opcode);
3070   unsigned LastSGPR = AMDGPU::NoRegister;
3071   unsigned ConstantBusUseCount = 0;
3072   unsigned NumLiterals = 0;
3073   unsigned LiteralSize;
3074 
3075   if (Desc.TSFlags &
3076       (SIInstrFlags::VOPC |
3077        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3078        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3079        SIInstrFlags::SDWA)) {
3080     // Check special imm operands (used by madmk, etc)
3081     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3082       ++ConstantBusUseCount;
3083     }
3084 
3085     SmallDenseSet<unsigned> SGPRsUsed;
3086     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3087     if (SGPRUsed != AMDGPU::NoRegister) {
3088       SGPRsUsed.insert(SGPRUsed);
3089       ++ConstantBusUseCount;
3090     }
3091 
3092     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3093     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3094     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3095 
3096     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3097 
3098     for (int OpIdx : OpIndices) {
3099       if (OpIdx == -1) break;
3100 
3101       const MCOperand &MO = Inst.getOperand(OpIdx);
3102       if (usesConstantBus(Inst, OpIdx)) {
3103         if (MO.isReg()) {
3104           LastSGPR = mc2PseudoReg(MO.getReg());
3105           // Pairs of registers with a partial intersections like these
3106           //   s0, s[0:1]
3107           //   flat_scratch_lo, flat_scratch
3108           //   flat_scratch_lo, flat_scratch_hi
3109           // are theoretically valid but they are disabled anyway.
3110           // Note that this code mimics SIInstrInfo::verifyInstruction
3111           if (!SGPRsUsed.count(LastSGPR)) {
3112             SGPRsUsed.insert(LastSGPR);
3113             ++ConstantBusUseCount;
3114           }
3115         } else { // Expression or a literal
3116 
3117           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3118             continue; // special operand like VINTERP attr_chan
3119 
3120           // An instruction may use only one literal.
3121           // This has been validated on the previous step.
3122           // See validateVOP3Literal.
3123           // This literal may be used as more than one operand.
3124           // If all these operands are of the same size,
3125           // this literal counts as one scalar value.
3126           // Otherwise it counts as 2 scalar values.
3127           // See "GFX10 Shader Programming", section 3.6.2.3.
3128 
3129           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3130           if (Size < 4) Size = 4;
3131 
3132           if (NumLiterals == 0) {
3133             NumLiterals = 1;
3134             LiteralSize = Size;
3135           } else if (LiteralSize != Size) {
3136             NumLiterals = 2;
3137           }
3138         }
3139       }
3140     }
3141   }
3142   ConstantBusUseCount += NumLiterals;
3143 
3144   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3145     return true;
3146 
3147   SMLoc LitLoc = getLitLoc(Operands);
3148   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3149   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3150   Error(Loc, "invalid operand (violates constant bus restrictions)");
3151   return false;
3152 }
3153 
3154 bool
3155 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3156                                                  const OperandVector &Operands) {
3157   const unsigned Opcode = Inst.getOpcode();
3158   const MCInstrDesc &Desc = MII.get(Opcode);
3159 
3160   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3161   if (DstIdx == -1 ||
3162       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3163     return true;
3164   }
3165 
3166   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3167 
3168   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3169   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3170   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3171 
3172   assert(DstIdx != -1);
3173   const MCOperand &Dst = Inst.getOperand(DstIdx);
3174   assert(Dst.isReg());
3175   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3176 
3177   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3178 
3179   for (int SrcIdx : SrcIndices) {
3180     if (SrcIdx == -1) break;
3181     const MCOperand &Src = Inst.getOperand(SrcIdx);
3182     if (Src.isReg()) {
3183       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3184       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3185         Error(getRegLoc(SrcReg, Operands),
3186           "destination must be different than all sources");
3187         return false;
3188       }
3189     }
3190   }
3191 
3192   return true;
3193 }
3194 
3195 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3196 
3197   const unsigned Opc = Inst.getOpcode();
3198   const MCInstrDesc &Desc = MII.get(Opc);
3199 
3200   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3201     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3202     assert(ClampIdx != -1);
3203     return Inst.getOperand(ClampIdx).getImm() == 0;
3204   }
3205 
3206   return true;
3207 }
3208 
3209 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3210 
3211   const unsigned Opc = Inst.getOpcode();
3212   const MCInstrDesc &Desc = MII.get(Opc);
3213 
3214   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3215     return true;
3216 
3217   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3218   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3219   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3220 
3221   assert(VDataIdx != -1);
3222 
3223   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3224     return true;
3225 
3226   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3227   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3228   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3229   if (DMask == 0)
3230     DMask = 1;
3231 
3232   unsigned DataSize =
3233     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3234   if (hasPackedD16()) {
3235     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3236     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3237       DataSize = (DataSize + 1) / 2;
3238   }
3239 
3240   return (VDataSize / 4) == DataSize + TFESize;
3241 }
3242 
3243 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3244   const unsigned Opc = Inst.getOpcode();
3245   const MCInstrDesc &Desc = MII.get(Opc);
3246 
3247   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3248     return true;
3249 
3250   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3251 
3252   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3253       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3254   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3255   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3256   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3257 
3258   assert(VAddr0Idx != -1);
3259   assert(SrsrcIdx != -1);
3260   assert(SrsrcIdx > VAddr0Idx);
3261 
3262   if (DimIdx == -1)
3263     return true; // intersect_ray
3264 
3265   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3266   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3267   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3268   unsigned VAddrSize =
3269       IsNSA ? SrsrcIdx - VAddr0Idx
3270             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3271 
3272   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3273                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3274                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3275                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3276   if (!IsNSA) {
3277     if (AddrSize > 8)
3278       AddrSize = 16;
3279     else if (AddrSize > 4)
3280       AddrSize = 8;
3281   }
3282 
3283   return VAddrSize == AddrSize;
3284 }
3285 
3286 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3287 
3288   const unsigned Opc = Inst.getOpcode();
3289   const MCInstrDesc &Desc = MII.get(Opc);
3290 
3291   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3292     return true;
3293   if (!Desc.mayLoad() || !Desc.mayStore())
3294     return true; // Not atomic
3295 
3296   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3297   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3298 
3299   // This is an incomplete check because image_atomic_cmpswap
3300   // may only use 0x3 and 0xf while other atomic operations
3301   // may use 0x1 and 0x3. However these limitations are
3302   // verified when we check that dmask matches dst size.
3303   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3304 }
3305 
3306 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3307 
3308   const unsigned Opc = Inst.getOpcode();
3309   const MCInstrDesc &Desc = MII.get(Opc);
3310 
3311   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3312     return true;
3313 
3314   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3315   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3316 
3317   // GATHER4 instructions use dmask in a different fashion compared to
3318   // other MIMG instructions. The only useful DMASK values are
3319   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3320   // (red,red,red,red) etc.) The ISA document doesn't mention
3321   // this.
3322   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3323 }
3324 
3325 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3326 {
3327   switch (Opcode) {
3328   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3329   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3330   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3331     return true;
3332   default:
3333     return false;
3334   }
3335 }
3336 
3337 // movrels* opcodes should only allow VGPRS as src0.
3338 // This is specified in .td description for vop1/vop3,
3339 // but sdwa is handled differently. See isSDWAOperand.
3340 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3341                                       const OperandVector &Operands) {
3342 
3343   const unsigned Opc = Inst.getOpcode();
3344   const MCInstrDesc &Desc = MII.get(Opc);
3345 
3346   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3347     return true;
3348 
3349   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3350   assert(Src0Idx != -1);
3351 
3352   SMLoc ErrLoc;
3353   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3354   if (Src0.isReg()) {
3355     auto Reg = mc2PseudoReg(Src0.getReg());
3356     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3357     if (!isSGPR(Reg, TRI))
3358       return true;
3359     ErrLoc = getRegLoc(Reg, Operands);
3360   } else {
3361     ErrLoc = getConstLoc(Operands);
3362   }
3363 
3364   Error(ErrLoc, "source operand must be a VGPR");
3365   return false;
3366 }
3367 
3368 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3369                                           const OperandVector &Operands) {
3370 
3371   const unsigned Opc = Inst.getOpcode();
3372 
3373   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3374     return true;
3375 
3376   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3377   assert(Src0Idx != -1);
3378 
3379   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3380   if (!Src0.isReg())
3381     return true;
3382 
3383   auto Reg = mc2PseudoReg(Src0.getReg());
3384   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3385   if (isSGPR(Reg, TRI)) {
3386     Error(getRegLoc(Reg, Operands),
3387           "source operand must be either a VGPR or an inline constant");
3388     return false;
3389   }
3390 
3391   return true;
3392 }
3393 
3394 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3395   switch (Inst.getOpcode()) {
3396   default:
3397     return true;
3398   case V_DIV_SCALE_F32_gfx6_gfx7:
3399   case V_DIV_SCALE_F32_vi:
3400   case V_DIV_SCALE_F32_gfx10:
3401   case V_DIV_SCALE_F64_gfx6_gfx7:
3402   case V_DIV_SCALE_F64_vi:
3403   case V_DIV_SCALE_F64_gfx10:
3404     break;
3405   }
3406 
3407   // TODO: Check that src0 = src1 or src2.
3408 
3409   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3410                     AMDGPU::OpName::src2_modifiers,
3411                     AMDGPU::OpName::src2_modifiers}) {
3412     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3413             .getImm() &
3414         SISrcMods::ABS) {
3415       return false;
3416     }
3417   }
3418 
3419   return true;
3420 }
3421 
3422 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3423 
3424   const unsigned Opc = Inst.getOpcode();
3425   const MCInstrDesc &Desc = MII.get(Opc);
3426 
3427   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3428     return true;
3429 
3430   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3431   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3432     if (isCI() || isSI())
3433       return false;
3434   }
3435 
3436   return true;
3437 }
3438 
3439 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3440   const unsigned Opc = Inst.getOpcode();
3441   const MCInstrDesc &Desc = MII.get(Opc);
3442 
3443   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3444     return true;
3445 
3446   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3447   if (DimIdx < 0)
3448     return true;
3449 
3450   long Imm = Inst.getOperand(DimIdx).getImm();
3451   if (Imm < 0 || Imm >= 8)
3452     return false;
3453 
3454   return true;
3455 }
3456 
3457 static bool IsRevOpcode(const unsigned Opcode)
3458 {
3459   switch (Opcode) {
3460   case AMDGPU::V_SUBREV_F32_e32:
3461   case AMDGPU::V_SUBREV_F32_e64:
3462   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3463   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3464   case AMDGPU::V_SUBREV_F32_e32_vi:
3465   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3466   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3467   case AMDGPU::V_SUBREV_F32_e64_vi:
3468 
3469   case AMDGPU::V_SUBREV_CO_U32_e32:
3470   case AMDGPU::V_SUBREV_CO_U32_e64:
3471   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3472   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3473 
3474   case AMDGPU::V_SUBBREV_U32_e32:
3475   case AMDGPU::V_SUBBREV_U32_e64:
3476   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3477   case AMDGPU::V_SUBBREV_U32_e32_vi:
3478   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3479   case AMDGPU::V_SUBBREV_U32_e64_vi:
3480 
3481   case AMDGPU::V_SUBREV_U32_e32:
3482   case AMDGPU::V_SUBREV_U32_e64:
3483   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3484   case AMDGPU::V_SUBREV_U32_e32_vi:
3485   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3486   case AMDGPU::V_SUBREV_U32_e64_vi:
3487 
3488   case AMDGPU::V_SUBREV_F16_e32:
3489   case AMDGPU::V_SUBREV_F16_e64:
3490   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3491   case AMDGPU::V_SUBREV_F16_e32_vi:
3492   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3493   case AMDGPU::V_SUBREV_F16_e64_vi:
3494 
3495   case AMDGPU::V_SUBREV_U16_e32:
3496   case AMDGPU::V_SUBREV_U16_e64:
3497   case AMDGPU::V_SUBREV_U16_e32_vi:
3498   case AMDGPU::V_SUBREV_U16_e64_vi:
3499 
3500   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3501   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3502   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3503 
3504   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3505   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3506 
3507   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3508   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3509 
3510   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3511   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3512 
3513   case AMDGPU::V_LSHRREV_B32_e32:
3514   case AMDGPU::V_LSHRREV_B32_e64:
3515   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3516   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3517   case AMDGPU::V_LSHRREV_B32_e32_vi:
3518   case AMDGPU::V_LSHRREV_B32_e64_vi:
3519   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3520   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3521 
3522   case AMDGPU::V_ASHRREV_I32_e32:
3523   case AMDGPU::V_ASHRREV_I32_e64:
3524   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3525   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3526   case AMDGPU::V_ASHRREV_I32_e32_vi:
3527   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3528   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3529   case AMDGPU::V_ASHRREV_I32_e64_vi:
3530 
3531   case AMDGPU::V_LSHLREV_B32_e32:
3532   case AMDGPU::V_LSHLREV_B32_e64:
3533   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3534   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3535   case AMDGPU::V_LSHLREV_B32_e32_vi:
3536   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3537   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3538   case AMDGPU::V_LSHLREV_B32_e64_vi:
3539 
3540   case AMDGPU::V_LSHLREV_B16_e32:
3541   case AMDGPU::V_LSHLREV_B16_e64:
3542   case AMDGPU::V_LSHLREV_B16_e32_vi:
3543   case AMDGPU::V_LSHLREV_B16_e64_vi:
3544   case AMDGPU::V_LSHLREV_B16_gfx10:
3545 
3546   case AMDGPU::V_LSHRREV_B16_e32:
3547   case AMDGPU::V_LSHRREV_B16_e64:
3548   case AMDGPU::V_LSHRREV_B16_e32_vi:
3549   case AMDGPU::V_LSHRREV_B16_e64_vi:
3550   case AMDGPU::V_LSHRREV_B16_gfx10:
3551 
3552   case AMDGPU::V_ASHRREV_I16_e32:
3553   case AMDGPU::V_ASHRREV_I16_e64:
3554   case AMDGPU::V_ASHRREV_I16_e32_vi:
3555   case AMDGPU::V_ASHRREV_I16_e64_vi:
3556   case AMDGPU::V_ASHRREV_I16_gfx10:
3557 
3558   case AMDGPU::V_LSHLREV_B64:
3559   case AMDGPU::V_LSHLREV_B64_gfx10:
3560   case AMDGPU::V_LSHLREV_B64_vi:
3561 
3562   case AMDGPU::V_LSHRREV_B64:
3563   case AMDGPU::V_LSHRREV_B64_gfx10:
3564   case AMDGPU::V_LSHRREV_B64_vi:
3565 
3566   case AMDGPU::V_ASHRREV_I64:
3567   case AMDGPU::V_ASHRREV_I64_gfx10:
3568   case AMDGPU::V_ASHRREV_I64_vi:
3569 
3570   case AMDGPU::V_PK_LSHLREV_B16:
3571   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3572   case AMDGPU::V_PK_LSHLREV_B16_vi:
3573 
3574   case AMDGPU::V_PK_LSHRREV_B16:
3575   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3576   case AMDGPU::V_PK_LSHRREV_B16_vi:
3577   case AMDGPU::V_PK_ASHRREV_I16:
3578   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3579   case AMDGPU::V_PK_ASHRREV_I16_vi:
3580     return true;
3581   default:
3582     return false;
3583   }
3584 }
3585 
3586 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3587 
3588   using namespace SIInstrFlags;
3589   const unsigned Opcode = Inst.getOpcode();
3590   const MCInstrDesc &Desc = MII.get(Opcode);
3591 
3592   // lds_direct register is defined so that it can be used
3593   // with 9-bit operands only. Ignore encodings which do not accept these.
3594   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3595     return true;
3596 
3597   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3598   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3599   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3600 
3601   const int SrcIndices[] = { Src1Idx, Src2Idx };
3602 
3603   // lds_direct cannot be specified as either src1 or src2.
3604   for (int SrcIdx : SrcIndices) {
3605     if (SrcIdx == -1) break;
3606     const MCOperand &Src = Inst.getOperand(SrcIdx);
3607     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3608       return false;
3609     }
3610   }
3611 
3612   if (Src0Idx == -1)
3613     return true;
3614 
3615   const MCOperand &Src = Inst.getOperand(Src0Idx);
3616   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3617     return true;
3618 
3619   // lds_direct is specified as src0. Check additional limitations.
3620   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3621 }
3622 
3623 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3624   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3625     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3626     if (Op.isFlatOffset())
3627       return Op.getStartLoc();
3628   }
3629   return getLoc();
3630 }
3631 
3632 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3633                                          const OperandVector &Operands) {
3634   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3635   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3636     return true;
3637 
3638   auto Opcode = Inst.getOpcode();
3639   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3640   assert(OpNum != -1);
3641 
3642   const auto &Op = Inst.getOperand(OpNum);
3643   if (!hasFlatOffsets() && Op.getImm() != 0) {
3644     Error(getFlatOffsetLoc(Operands),
3645           "flat offset modifier is not supported on this GPU");
3646     return false;
3647   }
3648 
3649   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3650   // For FLAT segment the offset must be positive;
3651   // MSB is ignored and forced to zero.
3652   unsigned OffsetSize = isGFX9() ? 13 : 12;
3653   if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) {
3654     if (!isIntN(OffsetSize, Op.getImm())) {
3655       Error(getFlatOffsetLoc(Operands),
3656             isGFX9() ? "expected a 13-bit signed offset" :
3657                        "expected a 12-bit signed offset");
3658       return false;
3659     }
3660   } else {
3661     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3662       Error(getFlatOffsetLoc(Operands),
3663             isGFX9() ? "expected a 12-bit unsigned offset" :
3664                        "expected an 11-bit unsigned offset");
3665       return false;
3666     }
3667   }
3668 
3669   return true;
3670 }
3671 
3672 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3673   // Start with second operand because SMEM Offset cannot be dst or src0.
3674   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3675     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3676     if (Op.isSMEMOffset())
3677       return Op.getStartLoc();
3678   }
3679   return getLoc();
3680 }
3681 
3682 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3683                                          const OperandVector &Operands) {
3684   if (isCI() || isSI())
3685     return true;
3686 
3687   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3688   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3689     return true;
3690 
3691   auto Opcode = Inst.getOpcode();
3692   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3693   if (OpNum == -1)
3694     return true;
3695 
3696   const auto &Op = Inst.getOperand(OpNum);
3697   if (!Op.isImm())
3698     return true;
3699 
3700   uint64_t Offset = Op.getImm();
3701   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3702   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3703       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3704     return true;
3705 
3706   Error(getSMEMOffsetLoc(Operands),
3707         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3708                                "expected a 21-bit signed offset");
3709 
3710   return false;
3711 }
3712 
3713 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3714   unsigned Opcode = Inst.getOpcode();
3715   const MCInstrDesc &Desc = MII.get(Opcode);
3716   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3717     return true;
3718 
3719   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3720   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3721 
3722   const int OpIndices[] = { Src0Idx, Src1Idx };
3723 
3724   unsigned NumExprs = 0;
3725   unsigned NumLiterals = 0;
3726   uint32_t LiteralValue;
3727 
3728   for (int OpIdx : OpIndices) {
3729     if (OpIdx == -1) break;
3730 
3731     const MCOperand &MO = Inst.getOperand(OpIdx);
3732     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3733     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3734       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3735         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3736         if (NumLiterals == 0 || LiteralValue != Value) {
3737           LiteralValue = Value;
3738           ++NumLiterals;
3739         }
3740       } else if (MO.isExpr()) {
3741         ++NumExprs;
3742       }
3743     }
3744   }
3745 
3746   return NumLiterals + NumExprs <= 1;
3747 }
3748 
3749 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3750   const unsigned Opc = Inst.getOpcode();
3751   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3752       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3753     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3754     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3755 
3756     if (OpSel & ~3)
3757       return false;
3758   }
3759   return true;
3760 }
3761 
3762 // Check if VCC register matches wavefront size
3763 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3764   auto FB = getFeatureBits();
3765   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3766     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3767 }
3768 
3769 // VOP3 literal is only allowed in GFX10+ and only one can be used
3770 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3771                                           const OperandVector &Operands) {
3772   unsigned Opcode = Inst.getOpcode();
3773   const MCInstrDesc &Desc = MII.get(Opcode);
3774   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3775     return true;
3776 
3777   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3778   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3779   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3780 
3781   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3782 
3783   unsigned NumExprs = 0;
3784   unsigned NumLiterals = 0;
3785   uint32_t LiteralValue;
3786 
3787   for (int OpIdx : OpIndices) {
3788     if (OpIdx == -1) break;
3789 
3790     const MCOperand &MO = Inst.getOperand(OpIdx);
3791     if (!MO.isImm() && !MO.isExpr())
3792       continue;
3793     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3794       continue;
3795 
3796     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3797         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
3798       Error(getConstLoc(Operands),
3799             "inline constants are not allowed for this operand");
3800       return false;
3801     }
3802 
3803     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3804       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3805       if (NumLiterals == 0 || LiteralValue != Value) {
3806         LiteralValue = Value;
3807         ++NumLiterals;
3808       }
3809     } else if (MO.isExpr()) {
3810       ++NumExprs;
3811     }
3812   }
3813   NumLiterals += NumExprs;
3814 
3815   if (!NumLiterals)
3816     return true;
3817 
3818   if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
3819     Error(getLitLoc(Operands), "literal operands are not supported");
3820     return false;
3821   }
3822 
3823   if (NumLiterals > 1) {
3824     Error(getLitLoc(Operands), "only one literal operand is allowed");
3825     return false;
3826   }
3827 
3828   return true;
3829 }
3830 
3831 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
3832                                             const OperandVector &Operands,
3833                                             const SMLoc &IDLoc) {
3834   int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
3835                                           AMDGPU::OpName::glc1);
3836   if (GLCPos != -1) {
3837     // -1 is set by GLC_1 default operand. In all cases "glc" must be present
3838     // in the asm string, and the default value means it is not present.
3839     if (Inst.getOperand(GLCPos).getImm() == -1) {
3840       Error(IDLoc, "instruction must use glc");
3841       return false;
3842     }
3843   }
3844 
3845   return true;
3846 }
3847 
3848 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3849                                           const SMLoc &IDLoc,
3850                                           const OperandVector &Operands) {
3851   if (!validateLdsDirect(Inst)) {
3852     Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands),
3853       "invalid use of lds_direct");
3854     return false;
3855   }
3856   if (!validateSOPLiteral(Inst)) {
3857     Error(getLitLoc(Operands),
3858       "only one literal operand is allowed");
3859     return false;
3860   }
3861   if (!validateVOP3Literal(Inst, Operands)) {
3862     return false;
3863   }
3864   if (!validateConstantBusLimitations(Inst, Operands)) {
3865     return false;
3866   }
3867   if (!validateEarlyClobberLimitations(Inst, Operands)) {
3868     return false;
3869   }
3870   if (!validateIntClampSupported(Inst)) {
3871     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
3872       "integer clamping is not supported on this GPU");
3873     return false;
3874   }
3875   if (!validateOpSel(Inst)) {
3876     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
3877       "invalid op_sel operand");
3878     return false;
3879   }
3880   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3881   if (!validateMIMGD16(Inst)) {
3882     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
3883       "d16 modifier is not supported on this GPU");
3884     return false;
3885   }
3886   if (!validateMIMGDim(Inst)) {
3887     Error(IDLoc, "dim modifier is required on this GPU");
3888     return false;
3889   }
3890   if (!validateMIMGDataSize(Inst)) {
3891     Error(IDLoc,
3892       "image data size does not match dmask and tfe");
3893     return false;
3894   }
3895   if (!validateMIMGAddrSize(Inst)) {
3896     Error(IDLoc,
3897       "image address size does not match dim and a16");
3898     return false;
3899   }
3900   if (!validateMIMGAtomicDMask(Inst)) {
3901     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
3902       "invalid atomic image dmask");
3903     return false;
3904   }
3905   if (!validateMIMGGatherDMask(Inst)) {
3906     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
3907       "invalid image_gather dmask: only one bit must be set");
3908     return false;
3909   }
3910   if (!validateMovrels(Inst, Operands)) {
3911     return false;
3912   }
3913   if (!validateFlatOffset(Inst, Operands)) {
3914     return false;
3915   }
3916   if (!validateSMEMOffset(Inst, Operands)) {
3917     return false;
3918   }
3919   if (!validateMAIAccWrite(Inst, Operands)) {
3920     return false;
3921   }
3922   if (!validateDivScale(Inst)) {
3923     Error(IDLoc, "ABS not allowed in VOP3B instructions");
3924     return false;
3925   }
3926   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
3927     return false;
3928   }
3929 
3930   return true;
3931 }
3932 
3933 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3934                                             const FeatureBitset &FBS,
3935                                             unsigned VariantID = 0);
3936 
3937 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
3938                                 const FeatureBitset &AvailableFeatures,
3939                                 unsigned VariantID);
3940 
3941 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3942                                        const FeatureBitset &FBS) {
3943   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
3944 }
3945 
3946 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3947                                        const FeatureBitset &FBS,
3948                                        ArrayRef<unsigned> Variants) {
3949   for (auto Variant : Variants) {
3950     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
3951       return true;
3952   }
3953 
3954   return false;
3955 }
3956 
3957 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
3958                                                   const SMLoc &IDLoc) {
3959   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3960 
3961   // Check if requested instruction variant is supported.
3962   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
3963     return false;
3964 
3965   // This instruction is not supported.
3966   // Clear any other pending errors because they are no longer relevant.
3967   getParser().clearPendingErrors();
3968 
3969   // Requested instruction variant is not supported.
3970   // Check if any other variants are supported.
3971   StringRef VariantName = getMatchedVariantName();
3972   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
3973     return Error(IDLoc,
3974                  Twine(VariantName,
3975                        " variant of this instruction is not supported"));
3976   }
3977 
3978   // Finally check if this instruction is supported on any other GPU.
3979   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
3980     return Error(IDLoc, "instruction not supported on this GPU");
3981   }
3982 
3983   // Instruction not supported on any GPU. Probably a typo.
3984   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
3985   return Error(IDLoc, "invalid instruction" + Suggestion);
3986 }
3987 
3988 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3989                                               OperandVector &Operands,
3990                                               MCStreamer &Out,
3991                                               uint64_t &ErrorInfo,
3992                                               bool MatchingInlineAsm) {
3993   MCInst Inst;
3994   unsigned Result = Match_Success;
3995   for (auto Variant : getMatchedVariants()) {
3996     uint64_t EI;
3997     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3998                                   Variant);
3999     // We order match statuses from least to most specific. We use most specific
4000     // status as resulting
4001     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4002     if ((R == Match_Success) ||
4003         (R == Match_PreferE32) ||
4004         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4005         (R == Match_InvalidOperand && Result != Match_MissingFeature
4006                                    && Result != Match_PreferE32) ||
4007         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4008                                    && Result != Match_MissingFeature
4009                                    && Result != Match_PreferE32)) {
4010       Result = R;
4011       ErrorInfo = EI;
4012     }
4013     if (R == Match_Success)
4014       break;
4015   }
4016 
4017   if (Result == Match_Success) {
4018     if (!validateInstruction(Inst, IDLoc, Operands)) {
4019       return true;
4020     }
4021     Inst.setLoc(IDLoc);
4022     Out.emitInstruction(Inst, getSTI());
4023     return false;
4024   }
4025 
4026   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4027   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4028     return true;
4029   }
4030 
4031   switch (Result) {
4032   default: break;
4033   case Match_MissingFeature:
4034     // It has been verified that the specified instruction
4035     // mnemonic is valid. A match was found but it requires
4036     // features which are not supported on this GPU.
4037     return Error(IDLoc, "operands are not valid for this GPU or mode");
4038 
4039   case Match_InvalidOperand: {
4040     SMLoc ErrorLoc = IDLoc;
4041     if (ErrorInfo != ~0ULL) {
4042       if (ErrorInfo >= Operands.size()) {
4043         return Error(IDLoc, "too few operands for instruction");
4044       }
4045       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4046       if (ErrorLoc == SMLoc())
4047         ErrorLoc = IDLoc;
4048     }
4049     return Error(ErrorLoc, "invalid operand for instruction");
4050   }
4051 
4052   case Match_PreferE32:
4053     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4054                         "should be encoded as e32");
4055   case Match_MnemonicFail:
4056     llvm_unreachable("Invalid instructions should have been handled already");
4057   }
4058   llvm_unreachable("Implement any new match types added!");
4059 }
4060 
4061 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4062   int64_t Tmp = -1;
4063   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
4064     return true;
4065   }
4066   if (getParser().parseAbsoluteExpression(Tmp)) {
4067     return true;
4068   }
4069   Ret = static_cast<uint32_t>(Tmp);
4070   return false;
4071 }
4072 
4073 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4074                                                uint32_t &Minor) {
4075   if (ParseAsAbsoluteExpression(Major))
4076     return TokError("invalid major version");
4077 
4078   if (getLexer().isNot(AsmToken::Comma))
4079     return TokError("minor version number required, comma expected");
4080   Lex();
4081 
4082   if (ParseAsAbsoluteExpression(Minor))
4083     return TokError("invalid minor version");
4084 
4085   return false;
4086 }
4087 
4088 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4089   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4090     return TokError("directive only supported for amdgcn architecture");
4091 
4092   std::string Target;
4093 
4094   SMLoc TargetStart = getTok().getLoc();
4095   if (getParser().parseEscapedString(Target))
4096     return true;
4097   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4098 
4099   std::string ExpectedTarget;
4100   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
4101   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
4102 
4103   if (Target != ExpectedTargetOS.str())
4104     return getParser().Error(TargetRange.Start, "target must match options",
4105                              TargetRange);
4106 
4107   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
4108   return false;
4109 }
4110 
4111 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4112   return getParser().Error(Range.Start, "value out of range", Range);
4113 }
4114 
4115 bool AMDGPUAsmParser::calculateGPRBlocks(
4116     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4117     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4118     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4119     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4120   // TODO(scott.linder): These calculations are duplicated from
4121   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4122   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4123 
4124   unsigned NumVGPRs = NextFreeVGPR;
4125   unsigned NumSGPRs = NextFreeSGPR;
4126 
4127   if (Version.Major >= 10)
4128     NumSGPRs = 0;
4129   else {
4130     unsigned MaxAddressableNumSGPRs =
4131         IsaInfo::getAddressableNumSGPRs(&getSTI());
4132 
4133     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4134         NumSGPRs > MaxAddressableNumSGPRs)
4135       return OutOfRangeError(SGPRRange);
4136 
4137     NumSGPRs +=
4138         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4139 
4140     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4141         NumSGPRs > MaxAddressableNumSGPRs)
4142       return OutOfRangeError(SGPRRange);
4143 
4144     if (Features.test(FeatureSGPRInitBug))
4145       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4146   }
4147 
4148   VGPRBlocks =
4149       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4150   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4151 
4152   return false;
4153 }
4154 
4155 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4156   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4157     return TokError("directive only supported for amdgcn architecture");
4158 
4159   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4160     return TokError("directive only supported for amdhsa OS");
4161 
4162   StringRef KernelName;
4163   if (getParser().parseIdentifier(KernelName))
4164     return true;
4165 
4166   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4167 
4168   StringSet<> Seen;
4169 
4170   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4171 
4172   SMRange VGPRRange;
4173   uint64_t NextFreeVGPR = 0;
4174   SMRange SGPRRange;
4175   uint64_t NextFreeSGPR = 0;
4176   unsigned UserSGPRCount = 0;
4177   bool ReserveVCC = true;
4178   bool ReserveFlatScr = true;
4179   bool ReserveXNACK = hasXNACK();
4180   Optional<bool> EnableWavefrontSize32;
4181 
4182   while (true) {
4183     while (getLexer().is(AsmToken::EndOfStatement))
4184       Lex();
4185 
4186     if (getLexer().isNot(AsmToken::Identifier))
4187       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
4188 
4189     StringRef ID = getTok().getIdentifier();
4190     SMRange IDRange = getTok().getLocRange();
4191     Lex();
4192 
4193     if (ID == ".end_amdhsa_kernel")
4194       break;
4195 
4196     if (Seen.find(ID) != Seen.end())
4197       return TokError(".amdhsa_ directives cannot be repeated");
4198     Seen.insert(ID);
4199 
4200     SMLoc ValStart = getTok().getLoc();
4201     int64_t IVal;
4202     if (getParser().parseAbsoluteExpression(IVal))
4203       return true;
4204     SMLoc ValEnd = getTok().getLoc();
4205     SMRange ValRange = SMRange(ValStart, ValEnd);
4206 
4207     if (IVal < 0)
4208       return OutOfRangeError(ValRange);
4209 
4210     uint64_t Val = IVal;
4211 
4212 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4213   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4214     return OutOfRangeError(RANGE);                                             \
4215   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4216 
4217     if (ID == ".amdhsa_group_segment_fixed_size") {
4218       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4219         return OutOfRangeError(ValRange);
4220       KD.group_segment_fixed_size = Val;
4221     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4222       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4223         return OutOfRangeError(ValRange);
4224       KD.private_segment_fixed_size = Val;
4225     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4226       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4227                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4228                        Val, ValRange);
4229       if (Val)
4230         UserSGPRCount += 4;
4231     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4232       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4233                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4234                        ValRange);
4235       if (Val)
4236         UserSGPRCount += 2;
4237     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4238       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4239                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4240                        ValRange);
4241       if (Val)
4242         UserSGPRCount += 2;
4243     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4244       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4245                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4246                        Val, ValRange);
4247       if (Val)
4248         UserSGPRCount += 2;
4249     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4250       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4251                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4252                        ValRange);
4253       if (Val)
4254         UserSGPRCount += 2;
4255     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4256       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4257                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4258                        ValRange);
4259       if (Val)
4260         UserSGPRCount += 2;
4261     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4262       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4263                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4264                        Val, ValRange);
4265       if (Val)
4266         UserSGPRCount += 1;
4267     } else if (ID == ".amdhsa_wavefront_size32") {
4268       if (IVersion.Major < 10)
4269         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4270                                  IDRange);
4271       EnableWavefrontSize32 = Val;
4272       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4273                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4274                        Val, ValRange);
4275     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4276       PARSE_BITS_ENTRY(
4277           KD.compute_pgm_rsrc2,
4278           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
4279           ValRange);
4280     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4281       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4282                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4283                        ValRange);
4284     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4285       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4286                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4287                        ValRange);
4288     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4289       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4290                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4291                        ValRange);
4292     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4293       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4294                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4295                        ValRange);
4296     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4297       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4298                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4299                        ValRange);
4300     } else if (ID == ".amdhsa_next_free_vgpr") {
4301       VGPRRange = ValRange;
4302       NextFreeVGPR = Val;
4303     } else if (ID == ".amdhsa_next_free_sgpr") {
4304       SGPRRange = ValRange;
4305       NextFreeSGPR = Val;
4306     } else if (ID == ".amdhsa_reserve_vcc") {
4307       if (!isUInt<1>(Val))
4308         return OutOfRangeError(ValRange);
4309       ReserveVCC = Val;
4310     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4311       if (IVersion.Major < 7)
4312         return getParser().Error(IDRange.Start, "directive requires gfx7+",
4313                                  IDRange);
4314       if (!isUInt<1>(Val))
4315         return OutOfRangeError(ValRange);
4316       ReserveFlatScr = Val;
4317     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4318       if (IVersion.Major < 8)
4319         return getParser().Error(IDRange.Start, "directive requires gfx8+",
4320                                  IDRange);
4321       if (!isUInt<1>(Val))
4322         return OutOfRangeError(ValRange);
4323       ReserveXNACK = Val;
4324     } else if (ID == ".amdhsa_float_round_mode_32") {
4325       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4326                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4327     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4328       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4329                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4330     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4331       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4332                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4333     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4334       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4335                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4336                        ValRange);
4337     } else if (ID == ".amdhsa_dx10_clamp") {
4338       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4339                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4340     } else if (ID == ".amdhsa_ieee_mode") {
4341       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4342                        Val, ValRange);
4343     } else if (ID == ".amdhsa_fp16_overflow") {
4344       if (IVersion.Major < 9)
4345         return getParser().Error(IDRange.Start, "directive requires gfx9+",
4346                                  IDRange);
4347       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4348                        ValRange);
4349     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4350       if (IVersion.Major < 10)
4351         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4352                                  IDRange);
4353       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4354                        ValRange);
4355     } else if (ID == ".amdhsa_memory_ordered") {
4356       if (IVersion.Major < 10)
4357         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4358                                  IDRange);
4359       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4360                        ValRange);
4361     } else if (ID == ".amdhsa_forward_progress") {
4362       if (IVersion.Major < 10)
4363         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4364                                  IDRange);
4365       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4366                        ValRange);
4367     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4368       PARSE_BITS_ENTRY(
4369           KD.compute_pgm_rsrc2,
4370           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4371           ValRange);
4372     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4373       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4374                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4375                        Val, ValRange);
4376     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4377       PARSE_BITS_ENTRY(
4378           KD.compute_pgm_rsrc2,
4379           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4380           ValRange);
4381     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4382       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4383                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4384                        Val, ValRange);
4385     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4386       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4387                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4388                        Val, ValRange);
4389     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4390       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4391                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4392                        Val, ValRange);
4393     } else if (ID == ".amdhsa_exception_int_div_zero") {
4394       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4395                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4396                        Val, ValRange);
4397     } else {
4398       return getParser().Error(IDRange.Start,
4399                                "unknown .amdhsa_kernel directive", IDRange);
4400     }
4401 
4402 #undef PARSE_BITS_ENTRY
4403   }
4404 
4405   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4406     return TokError(".amdhsa_next_free_vgpr directive is required");
4407 
4408   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4409     return TokError(".amdhsa_next_free_sgpr directive is required");
4410 
4411   unsigned VGPRBlocks;
4412   unsigned SGPRBlocks;
4413   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4414                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4415                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4416                          SGPRBlocks))
4417     return true;
4418 
4419   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4420           VGPRBlocks))
4421     return OutOfRangeError(VGPRRange);
4422   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4423                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4424 
4425   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4426           SGPRBlocks))
4427     return OutOfRangeError(SGPRRange);
4428   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4429                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4430                   SGPRBlocks);
4431 
4432   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4433     return TokError("too many user SGPRs enabled");
4434   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4435                   UserSGPRCount);
4436 
4437   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4438       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4439       ReserveFlatScr, ReserveXNACK);
4440   return false;
4441 }
4442 
4443 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4444   uint32_t Major;
4445   uint32_t Minor;
4446 
4447   if (ParseDirectiveMajorMinor(Major, Minor))
4448     return true;
4449 
4450   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4451   return false;
4452 }
4453 
4454 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4455   uint32_t Major;
4456   uint32_t Minor;
4457   uint32_t Stepping;
4458   StringRef VendorName;
4459   StringRef ArchName;
4460 
4461   // If this directive has no arguments, then use the ISA version for the
4462   // targeted GPU.
4463   if (getLexer().is(AsmToken::EndOfStatement)) {
4464     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4465     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4466                                                       ISA.Stepping,
4467                                                       "AMD", "AMDGPU");
4468     return false;
4469   }
4470 
4471   if (ParseDirectiveMajorMinor(Major, Minor))
4472     return true;
4473 
4474   if (getLexer().isNot(AsmToken::Comma))
4475     return TokError("stepping version number required, comma expected");
4476   Lex();
4477 
4478   if (ParseAsAbsoluteExpression(Stepping))
4479     return TokError("invalid stepping version");
4480 
4481   if (getLexer().isNot(AsmToken::Comma))
4482     return TokError("vendor name required, comma expected");
4483   Lex();
4484 
4485   if (getLexer().isNot(AsmToken::String))
4486     return TokError("invalid vendor name");
4487 
4488   VendorName = getLexer().getTok().getStringContents();
4489   Lex();
4490 
4491   if (getLexer().isNot(AsmToken::Comma))
4492     return TokError("arch name required, comma expected");
4493   Lex();
4494 
4495   if (getLexer().isNot(AsmToken::String))
4496     return TokError("invalid arch name");
4497 
4498   ArchName = getLexer().getTok().getStringContents();
4499   Lex();
4500 
4501   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4502                                                     VendorName, ArchName);
4503   return false;
4504 }
4505 
4506 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4507                                                amd_kernel_code_t &Header) {
4508   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4509   // assembly for backwards compatibility.
4510   if (ID == "max_scratch_backing_memory_byte_size") {
4511     Parser.eatToEndOfStatement();
4512     return false;
4513   }
4514 
4515   SmallString<40> ErrStr;
4516   raw_svector_ostream Err(ErrStr);
4517   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4518     return TokError(Err.str());
4519   }
4520   Lex();
4521 
4522   if (ID == "enable_wavefront_size32") {
4523     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4524       if (!isGFX10Plus())
4525         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4526       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4527         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4528     } else {
4529       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4530         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4531     }
4532   }
4533 
4534   if (ID == "wavefront_size") {
4535     if (Header.wavefront_size == 5) {
4536       if (!isGFX10Plus())
4537         return TokError("wavefront_size=5 is only allowed on GFX10+");
4538       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4539         return TokError("wavefront_size=5 requires +WavefrontSize32");
4540     } else if (Header.wavefront_size == 6) {
4541       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4542         return TokError("wavefront_size=6 requires +WavefrontSize64");
4543     }
4544   }
4545 
4546   if (ID == "enable_wgp_mode") {
4547     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4548         !isGFX10Plus())
4549       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4550   }
4551 
4552   if (ID == "enable_mem_ordered") {
4553     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4554         !isGFX10Plus())
4555       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4556   }
4557 
4558   if (ID == "enable_fwd_progress") {
4559     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4560         !isGFX10Plus())
4561       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4562   }
4563 
4564   return false;
4565 }
4566 
4567 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4568   amd_kernel_code_t Header;
4569   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4570 
4571   while (true) {
4572     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4573     // will set the current token to EndOfStatement.
4574     while(getLexer().is(AsmToken::EndOfStatement))
4575       Lex();
4576 
4577     if (getLexer().isNot(AsmToken::Identifier))
4578       return TokError("expected value identifier or .end_amd_kernel_code_t");
4579 
4580     StringRef ID = getLexer().getTok().getIdentifier();
4581     Lex();
4582 
4583     if (ID == ".end_amd_kernel_code_t")
4584       break;
4585 
4586     if (ParseAMDKernelCodeTValue(ID, Header))
4587       return true;
4588   }
4589 
4590   getTargetStreamer().EmitAMDKernelCodeT(Header);
4591 
4592   return false;
4593 }
4594 
4595 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4596   if (getLexer().isNot(AsmToken::Identifier))
4597     return TokError("expected symbol name");
4598 
4599   StringRef KernelName = Parser.getTok().getString();
4600 
4601   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4602                                            ELF::STT_AMDGPU_HSA_KERNEL);
4603   Lex();
4604 
4605   KernelScope.initialize(getContext());
4606   return false;
4607 }
4608 
4609 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4610   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4611     return Error(getParser().getTok().getLoc(),
4612                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4613                  "architectures");
4614   }
4615 
4616   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4617 
4618   std::string ISAVersionStringFromSTI;
4619   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4620   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4621 
4622   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4623     return Error(getParser().getTok().getLoc(),
4624                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4625                  "arguments specified through the command line");
4626   }
4627 
4628   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4629   Lex();
4630 
4631   return false;
4632 }
4633 
4634 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4635   const char *AssemblerDirectiveBegin;
4636   const char *AssemblerDirectiveEnd;
4637   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4638       isHsaAbiVersion3(&getSTI())
4639           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4640                             HSAMD::V3::AssemblerDirectiveEnd)
4641           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4642                             HSAMD::AssemblerDirectiveEnd);
4643 
4644   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4645     return Error(getParser().getTok().getLoc(),
4646                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4647                  "not available on non-amdhsa OSes")).str());
4648   }
4649 
4650   std::string HSAMetadataString;
4651   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4652                           HSAMetadataString))
4653     return true;
4654 
4655   if (isHsaAbiVersion3(&getSTI())) {
4656     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4657       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4658   } else {
4659     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4660       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4661   }
4662 
4663   return false;
4664 }
4665 
4666 /// Common code to parse out a block of text (typically YAML) between start and
4667 /// end directives.
4668 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4669                                           const char *AssemblerDirectiveEnd,
4670                                           std::string &CollectString) {
4671 
4672   raw_string_ostream CollectStream(CollectString);
4673 
4674   getLexer().setSkipSpace(false);
4675 
4676   bool FoundEnd = false;
4677   while (!getLexer().is(AsmToken::Eof)) {
4678     while (getLexer().is(AsmToken::Space)) {
4679       CollectStream << getLexer().getTok().getString();
4680       Lex();
4681     }
4682 
4683     if (getLexer().is(AsmToken::Identifier)) {
4684       StringRef ID = getLexer().getTok().getIdentifier();
4685       if (ID == AssemblerDirectiveEnd) {
4686         Lex();
4687         FoundEnd = true;
4688         break;
4689       }
4690     }
4691 
4692     CollectStream << Parser.parseStringToEndOfStatement()
4693                   << getContext().getAsmInfo()->getSeparatorString();
4694 
4695     Parser.eatToEndOfStatement();
4696   }
4697 
4698   getLexer().setSkipSpace(true);
4699 
4700   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4701     return TokError(Twine("expected directive ") +
4702                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4703   }
4704 
4705   CollectStream.flush();
4706   return false;
4707 }
4708 
4709 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4710 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4711   std::string String;
4712   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4713                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4714     return true;
4715 
4716   auto PALMetadata = getTargetStreamer().getPALMetadata();
4717   if (!PALMetadata->setFromString(String))
4718     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4719   return false;
4720 }
4721 
4722 /// Parse the assembler directive for old linear-format PAL metadata.
4723 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4724   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4725     return Error(getParser().getTok().getLoc(),
4726                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4727                  "not available on non-amdpal OSes")).str());
4728   }
4729 
4730   auto PALMetadata = getTargetStreamer().getPALMetadata();
4731   PALMetadata->setLegacy();
4732   for (;;) {
4733     uint32_t Key, Value;
4734     if (ParseAsAbsoluteExpression(Key)) {
4735       return TokError(Twine("invalid value in ") +
4736                       Twine(PALMD::AssemblerDirective));
4737     }
4738     if (getLexer().isNot(AsmToken::Comma)) {
4739       return TokError(Twine("expected an even number of values in ") +
4740                       Twine(PALMD::AssemblerDirective));
4741     }
4742     Lex();
4743     if (ParseAsAbsoluteExpression(Value)) {
4744       return TokError(Twine("invalid value in ") +
4745                       Twine(PALMD::AssemblerDirective));
4746     }
4747     PALMetadata->setRegister(Key, Value);
4748     if (getLexer().isNot(AsmToken::Comma))
4749       break;
4750     Lex();
4751   }
4752   return false;
4753 }
4754 
4755 /// ParseDirectiveAMDGPULDS
4756 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4757 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4758   if (getParser().checkForValidSection())
4759     return true;
4760 
4761   StringRef Name;
4762   SMLoc NameLoc = getLexer().getLoc();
4763   if (getParser().parseIdentifier(Name))
4764     return TokError("expected identifier in directive");
4765 
4766   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4767   if (parseToken(AsmToken::Comma, "expected ','"))
4768     return true;
4769 
4770   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4771 
4772   int64_t Size;
4773   SMLoc SizeLoc = getLexer().getLoc();
4774   if (getParser().parseAbsoluteExpression(Size))
4775     return true;
4776   if (Size < 0)
4777     return Error(SizeLoc, "size must be non-negative");
4778   if (Size > LocalMemorySize)
4779     return Error(SizeLoc, "size is too large");
4780 
4781   int64_t Alignment = 4;
4782   if (getLexer().is(AsmToken::Comma)) {
4783     Lex();
4784     SMLoc AlignLoc = getLexer().getLoc();
4785     if (getParser().parseAbsoluteExpression(Alignment))
4786       return true;
4787     if (Alignment < 0 || !isPowerOf2_64(Alignment))
4788       return Error(AlignLoc, "alignment must be a power of two");
4789 
4790     // Alignment larger than the size of LDS is possible in theory, as long
4791     // as the linker manages to place to symbol at address 0, but we do want
4792     // to make sure the alignment fits nicely into a 32-bit integer.
4793     if (Alignment >= 1u << 31)
4794       return Error(AlignLoc, "alignment is too large");
4795   }
4796 
4797   if (parseToken(AsmToken::EndOfStatement,
4798                  "unexpected token in '.amdgpu_lds' directive"))
4799     return true;
4800 
4801   Symbol->redefineIfPossible();
4802   if (!Symbol->isUndefined())
4803     return Error(NameLoc, "invalid symbol redefinition");
4804 
4805   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
4806   return false;
4807 }
4808 
4809 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4810   StringRef IDVal = DirectiveID.getString();
4811 
4812   if (isHsaAbiVersion3(&getSTI())) {
4813     if (IDVal == ".amdgcn_target")
4814       return ParseDirectiveAMDGCNTarget();
4815 
4816     if (IDVal == ".amdhsa_kernel")
4817       return ParseDirectiveAMDHSAKernel();
4818 
4819     // TODO: Restructure/combine with PAL metadata directive.
4820     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4821       return ParseDirectiveHSAMetadata();
4822   } else {
4823     if (IDVal == ".hsa_code_object_version")
4824       return ParseDirectiveHSACodeObjectVersion();
4825 
4826     if (IDVal == ".hsa_code_object_isa")
4827       return ParseDirectiveHSACodeObjectISA();
4828 
4829     if (IDVal == ".amd_kernel_code_t")
4830       return ParseDirectiveAMDKernelCodeT();
4831 
4832     if (IDVal == ".amdgpu_hsa_kernel")
4833       return ParseDirectiveAMDGPUHsaKernel();
4834 
4835     if (IDVal == ".amd_amdgpu_isa")
4836       return ParseDirectiveISAVersion();
4837 
4838     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4839       return ParseDirectiveHSAMetadata();
4840   }
4841 
4842   if (IDVal == ".amdgpu_lds")
4843     return ParseDirectiveAMDGPULDS();
4844 
4845   if (IDVal == PALMD::AssemblerDirectiveBegin)
4846     return ParseDirectivePALMetadataBegin();
4847 
4848   if (IDVal == PALMD::AssemblerDirective)
4849     return ParseDirectivePALMetadata();
4850 
4851   return true;
4852 }
4853 
4854 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4855                                            unsigned RegNo) const {
4856 
4857   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4858        R.isValid(); ++R) {
4859     if (*R == RegNo)
4860       return isGFX9Plus();
4861   }
4862 
4863   // GFX10 has 2 more SGPRs 104 and 105.
4864   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4865        R.isValid(); ++R) {
4866     if (*R == RegNo)
4867       return hasSGPR104_SGPR105();
4868   }
4869 
4870   switch (RegNo) {
4871   case AMDGPU::SRC_SHARED_BASE:
4872   case AMDGPU::SRC_SHARED_LIMIT:
4873   case AMDGPU::SRC_PRIVATE_BASE:
4874   case AMDGPU::SRC_PRIVATE_LIMIT:
4875   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4876     return isGFX9Plus();
4877   case AMDGPU::TBA:
4878   case AMDGPU::TBA_LO:
4879   case AMDGPU::TBA_HI:
4880   case AMDGPU::TMA:
4881   case AMDGPU::TMA_LO:
4882   case AMDGPU::TMA_HI:
4883     return !isGFX9Plus();
4884   case AMDGPU::XNACK_MASK:
4885   case AMDGPU::XNACK_MASK_LO:
4886   case AMDGPU::XNACK_MASK_HI:
4887     return (isVI() || isGFX9()) && hasXNACK();
4888   case AMDGPU::SGPR_NULL:
4889     return isGFX10Plus();
4890   default:
4891     break;
4892   }
4893 
4894   if (isCI())
4895     return true;
4896 
4897   if (isSI() || isGFX10Plus()) {
4898     // No flat_scr on SI.
4899     // On GFX10 flat scratch is not a valid register operand and can only be
4900     // accessed with s_setreg/s_getreg.
4901     switch (RegNo) {
4902     case AMDGPU::FLAT_SCR:
4903     case AMDGPU::FLAT_SCR_LO:
4904     case AMDGPU::FLAT_SCR_HI:
4905       return false;
4906     default:
4907       return true;
4908     }
4909   }
4910 
4911   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4912   // SI/CI have.
4913   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4914        R.isValid(); ++R) {
4915     if (*R == RegNo)
4916       return hasSGPR102_SGPR103();
4917   }
4918 
4919   return true;
4920 }
4921 
4922 OperandMatchResultTy
4923 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4924                               OperandMode Mode) {
4925   // Try to parse with a custom parser
4926   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4927 
4928   // If we successfully parsed the operand or if there as an error parsing,
4929   // we are done.
4930   //
4931   // If we are parsing after we reach EndOfStatement then this means we
4932   // are appending default values to the Operands list.  This is only done
4933   // by custom parser, so we shouldn't continue on to the generic parsing.
4934   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4935       getLexer().is(AsmToken::EndOfStatement))
4936     return ResTy;
4937 
4938   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4939     unsigned Prefix = Operands.size();
4940     SMLoc LBraceLoc = getTok().getLoc();
4941     Parser.Lex(); // eat the '['
4942 
4943     for (;;) {
4944       ResTy = parseReg(Operands);
4945       if (ResTy != MatchOperand_Success)
4946         return ResTy;
4947 
4948       if (getLexer().is(AsmToken::RBrac))
4949         break;
4950 
4951       if (getLexer().isNot(AsmToken::Comma))
4952         return MatchOperand_ParseFail;
4953       Parser.Lex();
4954     }
4955 
4956     if (Operands.size() - Prefix > 1) {
4957       Operands.insert(Operands.begin() + Prefix,
4958                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4959       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4960                                                     getTok().getLoc()));
4961     }
4962 
4963     Parser.Lex(); // eat the ']'
4964     return MatchOperand_Success;
4965   }
4966 
4967   return parseRegOrImm(Operands);
4968 }
4969 
4970 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4971   // Clear any forced encodings from the previous instruction.
4972   setForcedEncodingSize(0);
4973   setForcedDPP(false);
4974   setForcedSDWA(false);
4975 
4976   if (Name.endswith("_e64")) {
4977     setForcedEncodingSize(64);
4978     return Name.substr(0, Name.size() - 4);
4979   } else if (Name.endswith("_e32")) {
4980     setForcedEncodingSize(32);
4981     return Name.substr(0, Name.size() - 4);
4982   } else if (Name.endswith("_dpp")) {
4983     setForcedDPP(true);
4984     return Name.substr(0, Name.size() - 4);
4985   } else if (Name.endswith("_sdwa")) {
4986     setForcedSDWA(true);
4987     return Name.substr(0, Name.size() - 5);
4988   }
4989   return Name;
4990 }
4991 
4992 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4993                                        StringRef Name,
4994                                        SMLoc NameLoc, OperandVector &Operands) {
4995   // Add the instruction mnemonic
4996   Name = parseMnemonicSuffix(Name);
4997   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4998 
4999   bool IsMIMG = Name.startswith("image_");
5000 
5001   while (!getLexer().is(AsmToken::EndOfStatement)) {
5002     OperandMode Mode = OperandMode_Default;
5003     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5004       Mode = OperandMode_NSA;
5005     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5006 
5007     // Eat the comma or space if there is one.
5008     if (getLexer().is(AsmToken::Comma))
5009       Parser.Lex();
5010 
5011     if (Res != MatchOperand_Success) {
5012       checkUnsupportedInstruction(Name, NameLoc);
5013       if (!Parser.hasPendingError()) {
5014         // FIXME: use real operand location rather than the current location.
5015         StringRef Msg =
5016           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5017                                             "not a valid operand.";
5018         Error(getLexer().getLoc(), Msg);
5019       }
5020       while (!getLexer().is(AsmToken::EndOfStatement)) {
5021         Parser.Lex();
5022       }
5023       Parser.Lex();
5024       return true;
5025     }
5026   }
5027   Parser.Lex();
5028 
5029   return false;
5030 }
5031 
5032 //===----------------------------------------------------------------------===//
5033 // Utility functions
5034 //===----------------------------------------------------------------------===//
5035 
5036 OperandMatchResultTy
5037 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5038 
5039   if (!trySkipId(Prefix, AsmToken::Colon))
5040     return MatchOperand_NoMatch;
5041 
5042   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5043 }
5044 
5045 OperandMatchResultTy
5046 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5047                                     AMDGPUOperand::ImmTy ImmTy,
5048                                     bool (*ConvertResult)(int64_t&)) {
5049   SMLoc S = getLoc();
5050   int64_t Value = 0;
5051 
5052   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5053   if (Res != MatchOperand_Success)
5054     return Res;
5055 
5056   if (ConvertResult && !ConvertResult(Value)) {
5057     Error(S, "invalid " + StringRef(Prefix) + " value.");
5058   }
5059 
5060   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5061   return MatchOperand_Success;
5062 }
5063 
5064 OperandMatchResultTy
5065 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5066                                              OperandVector &Operands,
5067                                              AMDGPUOperand::ImmTy ImmTy,
5068                                              bool (*ConvertResult)(int64_t&)) {
5069   SMLoc S = getLoc();
5070   if (!trySkipId(Prefix, AsmToken::Colon))
5071     return MatchOperand_NoMatch;
5072 
5073   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5074     return MatchOperand_ParseFail;
5075 
5076   unsigned Val = 0;
5077   const unsigned MaxSize = 4;
5078 
5079   // FIXME: How to verify the number of elements matches the number of src
5080   // operands?
5081   for (int I = 0; ; ++I) {
5082     int64_t Op;
5083     SMLoc Loc = getLoc();
5084     if (!parseExpr(Op))
5085       return MatchOperand_ParseFail;
5086 
5087     if (Op != 0 && Op != 1) {
5088       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5089       return MatchOperand_ParseFail;
5090     }
5091 
5092     Val |= (Op << I);
5093 
5094     if (trySkipToken(AsmToken::RBrac))
5095       break;
5096 
5097     if (I + 1 == MaxSize) {
5098       Error(getLoc(), "expected a closing square bracket");
5099       return MatchOperand_ParseFail;
5100     }
5101 
5102     if (!skipToken(AsmToken::Comma, "expected a comma"))
5103       return MatchOperand_ParseFail;
5104   }
5105 
5106   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5107   return MatchOperand_Success;
5108 }
5109 
5110 OperandMatchResultTy
5111 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
5112                                AMDGPUOperand::ImmTy ImmTy) {
5113   int64_t Bit = 0;
5114   SMLoc S = Parser.getTok().getLoc();
5115 
5116   // We are at the end of the statement, and this is a default argument, so
5117   // use a default value.
5118   if (getLexer().isNot(AsmToken::EndOfStatement)) {
5119     switch(getLexer().getKind()) {
5120       case AsmToken::Identifier: {
5121         StringRef Tok = Parser.getTok().getString();
5122         if (Tok == Name) {
5123           if (Tok == "r128" && !hasMIMG_R128())
5124             Error(S, "r128 modifier is not supported on this GPU");
5125           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
5126             Error(S, "a16 modifier is not supported on this GPU");
5127           Bit = 1;
5128           Parser.Lex();
5129         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
5130           Bit = 0;
5131           Parser.Lex();
5132         } else {
5133           return MatchOperand_NoMatch;
5134         }
5135         break;
5136       }
5137       default:
5138         return MatchOperand_NoMatch;
5139     }
5140   }
5141 
5142   if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC)
5143     return MatchOperand_ParseFail;
5144 
5145   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5146     ImmTy = AMDGPUOperand::ImmTyR128A16;
5147 
5148   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5149   return MatchOperand_Success;
5150 }
5151 
5152 static void addOptionalImmOperand(
5153   MCInst& Inst, const OperandVector& Operands,
5154   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5155   AMDGPUOperand::ImmTy ImmT,
5156   int64_t Default = 0) {
5157   auto i = OptionalIdx.find(ImmT);
5158   if (i != OptionalIdx.end()) {
5159     unsigned Idx = i->second;
5160     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5161   } else {
5162     Inst.addOperand(MCOperand::createImm(Default));
5163   }
5164 }
5165 
5166 OperandMatchResultTy
5167 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
5168   if (getLexer().isNot(AsmToken::Identifier)) {
5169     return MatchOperand_NoMatch;
5170   }
5171   StringRef Tok = Parser.getTok().getString();
5172   if (Tok != Prefix) {
5173     return MatchOperand_NoMatch;
5174   }
5175 
5176   Parser.Lex();
5177   if (getLexer().isNot(AsmToken::Colon)) {
5178     return MatchOperand_ParseFail;
5179   }
5180 
5181   Parser.Lex();
5182   if (getLexer().isNot(AsmToken::Identifier)) {
5183     return MatchOperand_ParseFail;
5184   }
5185 
5186   Value = Parser.getTok().getString();
5187   return MatchOperand_Success;
5188 }
5189 
5190 //===----------------------------------------------------------------------===//
5191 // MTBUF format
5192 //===----------------------------------------------------------------------===//
5193 
5194 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5195                                   int64_t MaxVal,
5196                                   int64_t &Fmt) {
5197   int64_t Val;
5198   SMLoc Loc = getLoc();
5199 
5200   auto Res = parseIntWithPrefix(Pref, Val);
5201   if (Res == MatchOperand_ParseFail)
5202     return false;
5203   if (Res == MatchOperand_NoMatch)
5204     return true;
5205 
5206   if (Val < 0 || Val > MaxVal) {
5207     Error(Loc, Twine("out of range ", StringRef(Pref)));
5208     return false;
5209   }
5210 
5211   Fmt = Val;
5212   return true;
5213 }
5214 
5215 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5216 // values to live in a joint format operand in the MCInst encoding.
5217 OperandMatchResultTy
5218 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5219   using namespace llvm::AMDGPU::MTBUFFormat;
5220 
5221   int64_t Dfmt = DFMT_UNDEF;
5222   int64_t Nfmt = NFMT_UNDEF;
5223 
5224   // dfmt and nfmt can appear in either order, and each is optional.
5225   for (int I = 0; I < 2; ++I) {
5226     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5227       return MatchOperand_ParseFail;
5228 
5229     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5230       return MatchOperand_ParseFail;
5231     }
5232     // Skip optional comma between dfmt/nfmt
5233     // but guard against 2 commas following each other.
5234     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5235         !peekToken().is(AsmToken::Comma)) {
5236       trySkipToken(AsmToken::Comma);
5237     }
5238   }
5239 
5240   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5241     return MatchOperand_NoMatch;
5242 
5243   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5244   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5245 
5246   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5247   return MatchOperand_Success;
5248 }
5249 
5250 OperandMatchResultTy
5251 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5252   using namespace llvm::AMDGPU::MTBUFFormat;
5253 
5254   int64_t Fmt = UFMT_UNDEF;
5255 
5256   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5257     return MatchOperand_ParseFail;
5258 
5259   if (Fmt == UFMT_UNDEF)
5260     return MatchOperand_NoMatch;
5261 
5262   Format = Fmt;
5263   return MatchOperand_Success;
5264 }
5265 
5266 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5267                                     int64_t &Nfmt,
5268                                     StringRef FormatStr,
5269                                     SMLoc Loc) {
5270   using namespace llvm::AMDGPU::MTBUFFormat;
5271   int64_t Format;
5272 
5273   Format = getDfmt(FormatStr);
5274   if (Format != DFMT_UNDEF) {
5275     Dfmt = Format;
5276     return true;
5277   }
5278 
5279   Format = getNfmt(FormatStr, getSTI());
5280   if (Format != NFMT_UNDEF) {
5281     Nfmt = Format;
5282     return true;
5283   }
5284 
5285   Error(Loc, "unsupported format");
5286   return false;
5287 }
5288 
5289 OperandMatchResultTy
5290 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5291                                           SMLoc FormatLoc,
5292                                           int64_t &Format) {
5293   using namespace llvm::AMDGPU::MTBUFFormat;
5294 
5295   int64_t Dfmt = DFMT_UNDEF;
5296   int64_t Nfmt = NFMT_UNDEF;
5297   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5298     return MatchOperand_ParseFail;
5299 
5300   if (trySkipToken(AsmToken::Comma)) {
5301     StringRef Str;
5302     SMLoc Loc = getLoc();
5303     if (!parseId(Str, "expected a format string") ||
5304         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5305       return MatchOperand_ParseFail;
5306     }
5307     if (Dfmt == DFMT_UNDEF) {
5308       Error(Loc, "duplicate numeric format");
5309       return MatchOperand_ParseFail;
5310     } else if (Nfmt == NFMT_UNDEF) {
5311       Error(Loc, "duplicate data format");
5312       return MatchOperand_ParseFail;
5313     }
5314   }
5315 
5316   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5317   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5318 
5319   if (isGFX10Plus()) {
5320     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5321     if (Ufmt == UFMT_UNDEF) {
5322       Error(FormatLoc, "unsupported format");
5323       return MatchOperand_ParseFail;
5324     }
5325     Format = Ufmt;
5326   } else {
5327     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5328   }
5329 
5330   return MatchOperand_Success;
5331 }
5332 
5333 OperandMatchResultTy
5334 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5335                                             SMLoc Loc,
5336                                             int64_t &Format) {
5337   using namespace llvm::AMDGPU::MTBUFFormat;
5338 
5339   auto Id = getUnifiedFormat(FormatStr);
5340   if (Id == UFMT_UNDEF)
5341     return MatchOperand_NoMatch;
5342 
5343   if (!isGFX10Plus()) {
5344     Error(Loc, "unified format is not supported on this GPU");
5345     return MatchOperand_ParseFail;
5346   }
5347 
5348   Format = Id;
5349   return MatchOperand_Success;
5350 }
5351 
5352 OperandMatchResultTy
5353 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5354   using namespace llvm::AMDGPU::MTBUFFormat;
5355   SMLoc Loc = getLoc();
5356 
5357   if (!parseExpr(Format))
5358     return MatchOperand_ParseFail;
5359   if (!isValidFormatEncoding(Format, getSTI())) {
5360     Error(Loc, "out of range format");
5361     return MatchOperand_ParseFail;
5362   }
5363 
5364   return MatchOperand_Success;
5365 }
5366 
5367 OperandMatchResultTy
5368 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5369   using namespace llvm::AMDGPU::MTBUFFormat;
5370 
5371   if (!trySkipId("format", AsmToken::Colon))
5372     return MatchOperand_NoMatch;
5373 
5374   if (trySkipToken(AsmToken::LBrac)) {
5375     StringRef FormatStr;
5376     SMLoc Loc = getLoc();
5377     if (!parseId(FormatStr, "expected a format string"))
5378       return MatchOperand_ParseFail;
5379 
5380     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5381     if (Res == MatchOperand_NoMatch)
5382       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5383     if (Res != MatchOperand_Success)
5384       return Res;
5385 
5386     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5387       return MatchOperand_ParseFail;
5388 
5389     return MatchOperand_Success;
5390   }
5391 
5392   return parseNumericFormat(Format);
5393 }
5394 
5395 OperandMatchResultTy
5396 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5397   using namespace llvm::AMDGPU::MTBUFFormat;
5398 
5399   int64_t Format = getDefaultFormatEncoding(getSTI());
5400   OperandMatchResultTy Res;
5401   SMLoc Loc = getLoc();
5402 
5403   // Parse legacy format syntax.
5404   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5405   if (Res == MatchOperand_ParseFail)
5406     return Res;
5407 
5408   bool FormatFound = (Res == MatchOperand_Success);
5409 
5410   Operands.push_back(
5411     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5412 
5413   if (FormatFound)
5414     trySkipToken(AsmToken::Comma);
5415 
5416   if (isToken(AsmToken::EndOfStatement)) {
5417     // We are expecting an soffset operand,
5418     // but let matcher handle the error.
5419     return MatchOperand_Success;
5420   }
5421 
5422   // Parse soffset.
5423   Res = parseRegOrImm(Operands);
5424   if (Res != MatchOperand_Success)
5425     return Res;
5426 
5427   trySkipToken(AsmToken::Comma);
5428 
5429   if (!FormatFound) {
5430     Res = parseSymbolicOrNumericFormat(Format);
5431     if (Res == MatchOperand_ParseFail)
5432       return Res;
5433     if (Res == MatchOperand_Success) {
5434       auto Size = Operands.size();
5435       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5436       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5437       Op.setImm(Format);
5438     }
5439     return MatchOperand_Success;
5440   }
5441 
5442   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5443     Error(getLoc(), "duplicate format");
5444     return MatchOperand_ParseFail;
5445   }
5446   return MatchOperand_Success;
5447 }
5448 
5449 //===----------------------------------------------------------------------===//
5450 // ds
5451 //===----------------------------------------------------------------------===//
5452 
5453 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5454                                     const OperandVector &Operands) {
5455   OptionalImmIndexMap OptionalIdx;
5456 
5457   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5458     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5459 
5460     // Add the register arguments
5461     if (Op.isReg()) {
5462       Op.addRegOperands(Inst, 1);
5463       continue;
5464     }
5465 
5466     // Handle optional arguments
5467     OptionalIdx[Op.getImmTy()] = i;
5468   }
5469 
5470   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5471   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5472   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5473 
5474   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5475 }
5476 
5477 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5478                                 bool IsGdsHardcoded) {
5479   OptionalImmIndexMap OptionalIdx;
5480 
5481   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5482     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5483 
5484     // Add the register arguments
5485     if (Op.isReg()) {
5486       Op.addRegOperands(Inst, 1);
5487       continue;
5488     }
5489 
5490     if (Op.isToken() && Op.getToken() == "gds") {
5491       IsGdsHardcoded = true;
5492       continue;
5493     }
5494 
5495     // Handle optional arguments
5496     OptionalIdx[Op.getImmTy()] = i;
5497   }
5498 
5499   AMDGPUOperand::ImmTy OffsetType =
5500     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5501      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5502      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5503                                                       AMDGPUOperand::ImmTyOffset;
5504 
5505   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5506 
5507   if (!IsGdsHardcoded) {
5508     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5509   }
5510   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5511 }
5512 
5513 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5514   OptionalImmIndexMap OptionalIdx;
5515 
5516   unsigned OperandIdx[4];
5517   unsigned EnMask = 0;
5518   int SrcIdx = 0;
5519 
5520   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5521     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5522 
5523     // Add the register arguments
5524     if (Op.isReg()) {
5525       assert(SrcIdx < 4);
5526       OperandIdx[SrcIdx] = Inst.size();
5527       Op.addRegOperands(Inst, 1);
5528       ++SrcIdx;
5529       continue;
5530     }
5531 
5532     if (Op.isOff()) {
5533       assert(SrcIdx < 4);
5534       OperandIdx[SrcIdx] = Inst.size();
5535       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5536       ++SrcIdx;
5537       continue;
5538     }
5539 
5540     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5541       Op.addImmOperands(Inst, 1);
5542       continue;
5543     }
5544 
5545     if (Op.isToken() && Op.getToken() == "done")
5546       continue;
5547 
5548     // Handle optional arguments
5549     OptionalIdx[Op.getImmTy()] = i;
5550   }
5551 
5552   assert(SrcIdx == 4);
5553 
5554   bool Compr = false;
5555   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5556     Compr = true;
5557     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5558     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5559     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5560   }
5561 
5562   for (auto i = 0; i < SrcIdx; ++i) {
5563     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5564       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5565     }
5566   }
5567 
5568   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5569   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5570 
5571   Inst.addOperand(MCOperand::createImm(EnMask));
5572 }
5573 
5574 //===----------------------------------------------------------------------===//
5575 // s_waitcnt
5576 //===----------------------------------------------------------------------===//
5577 
5578 static bool
5579 encodeCnt(
5580   const AMDGPU::IsaVersion ISA,
5581   int64_t &IntVal,
5582   int64_t CntVal,
5583   bool Saturate,
5584   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5585   unsigned (*decode)(const IsaVersion &Version, unsigned))
5586 {
5587   bool Failed = false;
5588 
5589   IntVal = encode(ISA, IntVal, CntVal);
5590   if (CntVal != decode(ISA, IntVal)) {
5591     if (Saturate) {
5592       IntVal = encode(ISA, IntVal, -1);
5593     } else {
5594       Failed = true;
5595     }
5596   }
5597   return Failed;
5598 }
5599 
5600 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5601 
5602   SMLoc CntLoc = getLoc();
5603   StringRef CntName = getTokenStr();
5604 
5605   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5606       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5607     return false;
5608 
5609   int64_t CntVal;
5610   SMLoc ValLoc = getLoc();
5611   if (!parseExpr(CntVal))
5612     return false;
5613 
5614   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5615 
5616   bool Failed = true;
5617   bool Sat = CntName.endswith("_sat");
5618 
5619   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5620     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5621   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5622     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5623   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5624     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5625   } else {
5626     Error(CntLoc, "invalid counter name " + CntName);
5627     return false;
5628   }
5629 
5630   if (Failed) {
5631     Error(ValLoc, "too large value for " + CntName);
5632     return false;
5633   }
5634 
5635   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5636     return false;
5637 
5638   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5639     if (isToken(AsmToken::EndOfStatement)) {
5640       Error(getLoc(), "expected a counter name");
5641       return false;
5642     }
5643   }
5644 
5645   return true;
5646 }
5647 
5648 OperandMatchResultTy
5649 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5650   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5651   int64_t Waitcnt = getWaitcntBitMask(ISA);
5652   SMLoc S = getLoc();
5653 
5654   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5655     while (!isToken(AsmToken::EndOfStatement)) {
5656       if (!parseCnt(Waitcnt))
5657         return MatchOperand_ParseFail;
5658     }
5659   } else {
5660     if (!parseExpr(Waitcnt))
5661       return MatchOperand_ParseFail;
5662   }
5663 
5664   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5665   return MatchOperand_Success;
5666 }
5667 
5668 bool
5669 AMDGPUOperand::isSWaitCnt() const {
5670   return isImm();
5671 }
5672 
5673 //===----------------------------------------------------------------------===//
5674 // hwreg
5675 //===----------------------------------------------------------------------===//
5676 
5677 bool
5678 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5679                                 OperandInfoTy &Offset,
5680                                 OperandInfoTy &Width) {
5681   using namespace llvm::AMDGPU::Hwreg;
5682 
5683   // The register may be specified by name or using a numeric code
5684   HwReg.Loc = getLoc();
5685   if (isToken(AsmToken::Identifier) &&
5686       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5687     HwReg.IsSymbolic = true;
5688     lex(); // skip register name
5689   } else if (!parseExpr(HwReg.Id, "a register name")) {
5690     return false;
5691   }
5692 
5693   if (trySkipToken(AsmToken::RParen))
5694     return true;
5695 
5696   // parse optional params
5697   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
5698     return false;
5699 
5700   Offset.Loc = getLoc();
5701   if (!parseExpr(Offset.Id))
5702     return false;
5703 
5704   if (!skipToken(AsmToken::Comma, "expected a comma"))
5705     return false;
5706 
5707   Width.Loc = getLoc();
5708   return parseExpr(Width.Id) &&
5709          skipToken(AsmToken::RParen, "expected a closing parenthesis");
5710 }
5711 
5712 bool
5713 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5714                                const OperandInfoTy &Offset,
5715                                const OperandInfoTy &Width) {
5716 
5717   using namespace llvm::AMDGPU::Hwreg;
5718 
5719   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5720     Error(HwReg.Loc,
5721           "specified hardware register is not supported on this GPU");
5722     return false;
5723   }
5724   if (!isValidHwreg(HwReg.Id)) {
5725     Error(HwReg.Loc,
5726           "invalid code of hardware register: only 6-bit values are legal");
5727     return false;
5728   }
5729   if (!isValidHwregOffset(Offset.Id)) {
5730     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
5731     return false;
5732   }
5733   if (!isValidHwregWidth(Width.Id)) {
5734     Error(Width.Loc,
5735           "invalid bitfield width: only values from 1 to 32 are legal");
5736     return false;
5737   }
5738   return true;
5739 }
5740 
5741 OperandMatchResultTy
5742 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5743   using namespace llvm::AMDGPU::Hwreg;
5744 
5745   int64_t ImmVal = 0;
5746   SMLoc Loc = getLoc();
5747 
5748   if (trySkipId("hwreg", AsmToken::LParen)) {
5749     OperandInfoTy HwReg(ID_UNKNOWN_);
5750     OperandInfoTy Offset(OFFSET_DEFAULT_);
5751     OperandInfoTy Width(WIDTH_DEFAULT_);
5752     if (parseHwregBody(HwReg, Offset, Width) &&
5753         validateHwreg(HwReg, Offset, Width)) {
5754       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
5755     } else {
5756       return MatchOperand_ParseFail;
5757     }
5758   } else if (parseExpr(ImmVal, "a hwreg macro")) {
5759     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5760       Error(Loc, "invalid immediate: only 16-bit values are legal");
5761       return MatchOperand_ParseFail;
5762     }
5763   } else {
5764     return MatchOperand_ParseFail;
5765   }
5766 
5767   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5768   return MatchOperand_Success;
5769 }
5770 
5771 bool AMDGPUOperand::isHwreg() const {
5772   return isImmTy(ImmTyHwreg);
5773 }
5774 
5775 //===----------------------------------------------------------------------===//
5776 // sendmsg
5777 //===----------------------------------------------------------------------===//
5778 
5779 bool
5780 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5781                                   OperandInfoTy &Op,
5782                                   OperandInfoTy &Stream) {
5783   using namespace llvm::AMDGPU::SendMsg;
5784 
5785   Msg.Loc = getLoc();
5786   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5787     Msg.IsSymbolic = true;
5788     lex(); // skip message name
5789   } else if (!parseExpr(Msg.Id, "a message name")) {
5790     return false;
5791   }
5792 
5793   if (trySkipToken(AsmToken::Comma)) {
5794     Op.IsDefined = true;
5795     Op.Loc = getLoc();
5796     if (isToken(AsmToken::Identifier) &&
5797         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5798       lex(); // skip operation name
5799     } else if (!parseExpr(Op.Id, "an operation name")) {
5800       return false;
5801     }
5802 
5803     if (trySkipToken(AsmToken::Comma)) {
5804       Stream.IsDefined = true;
5805       Stream.Loc = getLoc();
5806       if (!parseExpr(Stream.Id))
5807         return false;
5808     }
5809   }
5810 
5811   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5812 }
5813 
5814 bool
5815 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5816                                  const OperandInfoTy &Op,
5817                                  const OperandInfoTy &Stream) {
5818   using namespace llvm::AMDGPU::SendMsg;
5819 
5820   // Validation strictness depends on whether message is specified
5821   // in a symbolc or in a numeric form. In the latter case
5822   // only encoding possibility is checked.
5823   bool Strict = Msg.IsSymbolic;
5824 
5825   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5826     Error(Msg.Loc, "invalid message id");
5827     return false;
5828   }
5829   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5830     if (Op.IsDefined) {
5831       Error(Op.Loc, "message does not support operations");
5832     } else {
5833       Error(Msg.Loc, "missing message operation");
5834     }
5835     return false;
5836   }
5837   if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5838     Error(Op.Loc, "invalid operation id");
5839     return false;
5840   }
5841   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5842     Error(Stream.Loc, "message operation does not support streams");
5843     return false;
5844   }
5845   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5846     Error(Stream.Loc, "invalid message stream id");
5847     return false;
5848   }
5849   return true;
5850 }
5851 
5852 OperandMatchResultTy
5853 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5854   using namespace llvm::AMDGPU::SendMsg;
5855 
5856   int64_t ImmVal = 0;
5857   SMLoc Loc = getLoc();
5858 
5859   if (trySkipId("sendmsg", AsmToken::LParen)) {
5860     OperandInfoTy Msg(ID_UNKNOWN_);
5861     OperandInfoTy Op(OP_NONE_);
5862     OperandInfoTy Stream(STREAM_ID_NONE_);
5863     if (parseSendMsgBody(Msg, Op, Stream) &&
5864         validateSendMsg(Msg, Op, Stream)) {
5865       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5866     } else {
5867       return MatchOperand_ParseFail;
5868     }
5869   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
5870     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5871       Error(Loc, "invalid immediate: only 16-bit values are legal");
5872       return MatchOperand_ParseFail;
5873     }
5874   } else {
5875     return MatchOperand_ParseFail;
5876   }
5877 
5878   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5879   return MatchOperand_Success;
5880 }
5881 
5882 bool AMDGPUOperand::isSendMsg() const {
5883   return isImmTy(ImmTySendMsg);
5884 }
5885 
5886 //===----------------------------------------------------------------------===//
5887 // v_interp
5888 //===----------------------------------------------------------------------===//
5889 
5890 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5891   if (getLexer().getKind() != AsmToken::Identifier)
5892     return MatchOperand_NoMatch;
5893 
5894   StringRef Str = Parser.getTok().getString();
5895   int Slot = StringSwitch<int>(Str)
5896     .Case("p10", 0)
5897     .Case("p20", 1)
5898     .Case("p0", 2)
5899     .Default(-1);
5900 
5901   SMLoc S = Parser.getTok().getLoc();
5902   if (Slot == -1)
5903     return MatchOperand_ParseFail;
5904 
5905   Parser.Lex();
5906   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5907                                               AMDGPUOperand::ImmTyInterpSlot));
5908   return MatchOperand_Success;
5909 }
5910 
5911 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5912   if (getLexer().getKind() != AsmToken::Identifier)
5913     return MatchOperand_NoMatch;
5914 
5915   StringRef Str = Parser.getTok().getString();
5916   if (!Str.startswith("attr"))
5917     return MatchOperand_NoMatch;
5918 
5919   StringRef Chan = Str.take_back(2);
5920   int AttrChan = StringSwitch<int>(Chan)
5921     .Case(".x", 0)
5922     .Case(".y", 1)
5923     .Case(".z", 2)
5924     .Case(".w", 3)
5925     .Default(-1);
5926   if (AttrChan == -1)
5927     return MatchOperand_ParseFail;
5928 
5929   Str = Str.drop_back(2).drop_front(4);
5930 
5931   uint8_t Attr;
5932   if (Str.getAsInteger(10, Attr))
5933     return MatchOperand_ParseFail;
5934 
5935   SMLoc S = Parser.getTok().getLoc();
5936   Parser.Lex();
5937   if (Attr > 63) {
5938     Error(S, "out of bounds attr");
5939     return MatchOperand_ParseFail;
5940   }
5941 
5942   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5943 
5944   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5945                                               AMDGPUOperand::ImmTyInterpAttr));
5946   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5947                                               AMDGPUOperand::ImmTyAttrChan));
5948   return MatchOperand_Success;
5949 }
5950 
5951 //===----------------------------------------------------------------------===//
5952 // exp
5953 //===----------------------------------------------------------------------===//
5954 
5955 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5956                                                       uint8_t &Val) {
5957   if (Str == "null") {
5958     Val = Exp::ET_NULL;
5959     return MatchOperand_Success;
5960   }
5961 
5962   if (Str.startswith("mrt")) {
5963     Str = Str.drop_front(3);
5964     if (Str == "z") { // == mrtz
5965       Val = Exp::ET_MRTZ;
5966       return MatchOperand_Success;
5967     }
5968 
5969     if (Str.getAsInteger(10, Val))
5970       return MatchOperand_ParseFail;
5971 
5972     if (Val > Exp::ET_MRT7)
5973       return MatchOperand_ParseFail;
5974 
5975     return MatchOperand_Success;
5976   }
5977 
5978   if (Str.startswith("pos")) {
5979     Str = Str.drop_front(3);
5980     if (Str.getAsInteger(10, Val))
5981       return MatchOperand_ParseFail;
5982 
5983     if (Val > (isGFX10Plus() ? 4 : 3))
5984       return MatchOperand_ParseFail;
5985 
5986     Val += Exp::ET_POS0;
5987     return MatchOperand_Success;
5988   }
5989 
5990   if (isGFX10Plus() && Str == "prim") {
5991     Val = Exp::ET_PRIM;
5992     return MatchOperand_Success;
5993   }
5994 
5995   if (Str.startswith("param")) {
5996     Str = Str.drop_front(5);
5997     if (Str.getAsInteger(10, Val))
5998       return MatchOperand_ParseFail;
5999 
6000     if (Val >= 32)
6001       return MatchOperand_ParseFail;
6002 
6003     Val += Exp::ET_PARAM0;
6004     return MatchOperand_Success;
6005   }
6006 
6007   return MatchOperand_ParseFail;
6008 }
6009 
6010 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6011   if (!isToken(AsmToken::Identifier))
6012     return MatchOperand_NoMatch;
6013 
6014   SMLoc S = getLoc();
6015 
6016   uint8_t Val;
6017   auto Res = parseExpTgtImpl(getTokenStr(), Val);
6018   if (Res != MatchOperand_Success) {
6019     Error(S, "invalid exp target");
6020     return Res;
6021   }
6022 
6023   Parser.Lex();
6024   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
6025                                               AMDGPUOperand::ImmTyExpTgt));
6026   return MatchOperand_Success;
6027 }
6028 
6029 //===----------------------------------------------------------------------===//
6030 // parser helpers
6031 //===----------------------------------------------------------------------===//
6032 
6033 bool
6034 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6035   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6036 }
6037 
6038 bool
6039 AMDGPUAsmParser::isId(const StringRef Id) const {
6040   return isId(getToken(), Id);
6041 }
6042 
6043 bool
6044 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6045   return getTokenKind() == Kind;
6046 }
6047 
6048 bool
6049 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6050   if (isId(Id)) {
6051     lex();
6052     return true;
6053   }
6054   return false;
6055 }
6056 
6057 bool
6058 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6059   if (isId(Id) && peekToken().is(Kind)) {
6060     lex();
6061     lex();
6062     return true;
6063   }
6064   return false;
6065 }
6066 
6067 bool
6068 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6069   if (isToken(Kind)) {
6070     lex();
6071     return true;
6072   }
6073   return false;
6074 }
6075 
6076 bool
6077 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6078                            const StringRef ErrMsg) {
6079   if (!trySkipToken(Kind)) {
6080     Error(getLoc(), ErrMsg);
6081     return false;
6082   }
6083   return true;
6084 }
6085 
6086 bool
6087 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6088   SMLoc S = getLoc();
6089 
6090   const MCExpr *Expr;
6091   if (Parser.parseExpression(Expr))
6092     return false;
6093 
6094   if (Expr->evaluateAsAbsolute(Imm))
6095     return true;
6096 
6097   if (Expected.empty()) {
6098     Error(S, "expected absolute expression");
6099   } else {
6100     Error(S, Twine("expected ", Expected) +
6101              Twine(" or an absolute expression"));
6102   }
6103   return false;
6104 }
6105 
6106 bool
6107 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6108   SMLoc S = getLoc();
6109 
6110   const MCExpr *Expr;
6111   if (Parser.parseExpression(Expr))
6112     return false;
6113 
6114   int64_t IntVal;
6115   if (Expr->evaluateAsAbsolute(IntVal)) {
6116     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6117   } else {
6118     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6119   }
6120   return true;
6121 }
6122 
6123 bool
6124 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6125   if (isToken(AsmToken::String)) {
6126     Val = getToken().getStringContents();
6127     lex();
6128     return true;
6129   } else {
6130     Error(getLoc(), ErrMsg);
6131     return false;
6132   }
6133 }
6134 
6135 bool
6136 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6137   if (isToken(AsmToken::Identifier)) {
6138     Val = getTokenStr();
6139     lex();
6140     return true;
6141   } else {
6142     Error(getLoc(), ErrMsg);
6143     return false;
6144   }
6145 }
6146 
6147 AsmToken
6148 AMDGPUAsmParser::getToken() const {
6149   return Parser.getTok();
6150 }
6151 
6152 AsmToken
6153 AMDGPUAsmParser::peekToken() {
6154   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6155 }
6156 
6157 void
6158 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6159   auto TokCount = getLexer().peekTokens(Tokens);
6160 
6161   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6162     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6163 }
6164 
6165 AsmToken::TokenKind
6166 AMDGPUAsmParser::getTokenKind() const {
6167   return getLexer().getKind();
6168 }
6169 
6170 SMLoc
6171 AMDGPUAsmParser::getLoc() const {
6172   return getToken().getLoc();
6173 }
6174 
6175 StringRef
6176 AMDGPUAsmParser::getTokenStr() const {
6177   return getToken().getString();
6178 }
6179 
6180 void
6181 AMDGPUAsmParser::lex() {
6182   Parser.Lex();
6183 }
6184 
6185 SMLoc
6186 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6187                                const OperandVector &Operands) const {
6188   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6189     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6190     if (Test(Op))
6191       return Op.getStartLoc();
6192   }
6193   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6194 }
6195 
6196 SMLoc
6197 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6198                            const OperandVector &Operands) const {
6199   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6200   return getOperandLoc(Test, Operands);
6201 }
6202 
6203 SMLoc
6204 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6205                            const OperandVector &Operands) const {
6206   auto Test = [=](const AMDGPUOperand& Op) {
6207     return Op.isRegKind() && Op.getReg() == Reg;
6208   };
6209   return getOperandLoc(Test, Operands);
6210 }
6211 
6212 SMLoc
6213 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6214   auto Test = [](const AMDGPUOperand& Op) {
6215     return Op.IsImmKindLiteral() || Op.isExpr();
6216   };
6217   return getOperandLoc(Test, Operands);
6218 }
6219 
6220 SMLoc
6221 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6222   auto Test = [](const AMDGPUOperand& Op) {
6223     return Op.isImmKindConst();
6224   };
6225   return getOperandLoc(Test, Operands);
6226 }
6227 
6228 //===----------------------------------------------------------------------===//
6229 // swizzle
6230 //===----------------------------------------------------------------------===//
6231 
6232 LLVM_READNONE
6233 static unsigned
6234 encodeBitmaskPerm(const unsigned AndMask,
6235                   const unsigned OrMask,
6236                   const unsigned XorMask) {
6237   using namespace llvm::AMDGPU::Swizzle;
6238 
6239   return BITMASK_PERM_ENC |
6240          (AndMask << BITMASK_AND_SHIFT) |
6241          (OrMask  << BITMASK_OR_SHIFT)  |
6242          (XorMask << BITMASK_XOR_SHIFT);
6243 }
6244 
6245 bool
6246 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6247                                      const unsigned MinVal,
6248                                      const unsigned MaxVal,
6249                                      const StringRef ErrMsg,
6250                                      SMLoc &Loc) {
6251   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6252     return false;
6253   }
6254   Loc = Parser.getTok().getLoc();
6255   if (!parseExpr(Op)) {
6256     return false;
6257   }
6258   if (Op < MinVal || Op > MaxVal) {
6259     Error(Loc, ErrMsg);
6260     return false;
6261   }
6262 
6263   return true;
6264 }
6265 
6266 bool
6267 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6268                                       const unsigned MinVal,
6269                                       const unsigned MaxVal,
6270                                       const StringRef ErrMsg) {
6271   SMLoc Loc;
6272   for (unsigned i = 0; i < OpNum; ++i) {
6273     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6274       return false;
6275   }
6276 
6277   return true;
6278 }
6279 
6280 bool
6281 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6282   using namespace llvm::AMDGPU::Swizzle;
6283 
6284   int64_t Lane[LANE_NUM];
6285   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6286                            "expected a 2-bit lane id")) {
6287     Imm = QUAD_PERM_ENC;
6288     for (unsigned I = 0; I < LANE_NUM; ++I) {
6289       Imm |= Lane[I] << (LANE_SHIFT * I);
6290     }
6291     return true;
6292   }
6293   return false;
6294 }
6295 
6296 bool
6297 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6298   using namespace llvm::AMDGPU::Swizzle;
6299 
6300   SMLoc Loc;
6301   int64_t GroupSize;
6302   int64_t LaneIdx;
6303 
6304   if (!parseSwizzleOperand(GroupSize,
6305                            2, 32,
6306                            "group size must be in the interval [2,32]",
6307                            Loc)) {
6308     return false;
6309   }
6310   if (!isPowerOf2_64(GroupSize)) {
6311     Error(Loc, "group size must be a power of two");
6312     return false;
6313   }
6314   if (parseSwizzleOperand(LaneIdx,
6315                           0, GroupSize - 1,
6316                           "lane id must be in the interval [0,group size - 1]",
6317                           Loc)) {
6318     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6319     return true;
6320   }
6321   return false;
6322 }
6323 
6324 bool
6325 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6326   using namespace llvm::AMDGPU::Swizzle;
6327 
6328   SMLoc Loc;
6329   int64_t GroupSize;
6330 
6331   if (!parseSwizzleOperand(GroupSize,
6332                            2, 32,
6333                            "group size must be in the interval [2,32]",
6334                            Loc)) {
6335     return false;
6336   }
6337   if (!isPowerOf2_64(GroupSize)) {
6338     Error(Loc, "group size must be a power of two");
6339     return false;
6340   }
6341 
6342   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6343   return true;
6344 }
6345 
6346 bool
6347 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6348   using namespace llvm::AMDGPU::Swizzle;
6349 
6350   SMLoc Loc;
6351   int64_t GroupSize;
6352 
6353   if (!parseSwizzleOperand(GroupSize,
6354                            1, 16,
6355                            "group size must be in the interval [1,16]",
6356                            Loc)) {
6357     return false;
6358   }
6359   if (!isPowerOf2_64(GroupSize)) {
6360     Error(Loc, "group size must be a power of two");
6361     return false;
6362   }
6363 
6364   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6365   return true;
6366 }
6367 
6368 bool
6369 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6370   using namespace llvm::AMDGPU::Swizzle;
6371 
6372   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6373     return false;
6374   }
6375 
6376   StringRef Ctl;
6377   SMLoc StrLoc = Parser.getTok().getLoc();
6378   if (!parseString(Ctl)) {
6379     return false;
6380   }
6381   if (Ctl.size() != BITMASK_WIDTH) {
6382     Error(StrLoc, "expected a 5-character mask");
6383     return false;
6384   }
6385 
6386   unsigned AndMask = 0;
6387   unsigned OrMask = 0;
6388   unsigned XorMask = 0;
6389 
6390   for (size_t i = 0; i < Ctl.size(); ++i) {
6391     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6392     switch(Ctl[i]) {
6393     default:
6394       Error(StrLoc, "invalid mask");
6395       return false;
6396     case '0':
6397       break;
6398     case '1':
6399       OrMask |= Mask;
6400       break;
6401     case 'p':
6402       AndMask |= Mask;
6403       break;
6404     case 'i':
6405       AndMask |= Mask;
6406       XorMask |= Mask;
6407       break;
6408     }
6409   }
6410 
6411   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6412   return true;
6413 }
6414 
6415 bool
6416 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6417 
6418   SMLoc OffsetLoc = Parser.getTok().getLoc();
6419 
6420   if (!parseExpr(Imm, "a swizzle macro")) {
6421     return false;
6422   }
6423   if (!isUInt<16>(Imm)) {
6424     Error(OffsetLoc, "expected a 16-bit offset");
6425     return false;
6426   }
6427   return true;
6428 }
6429 
6430 bool
6431 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6432   using namespace llvm::AMDGPU::Swizzle;
6433 
6434   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6435 
6436     SMLoc ModeLoc = Parser.getTok().getLoc();
6437     bool Ok = false;
6438 
6439     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6440       Ok = parseSwizzleQuadPerm(Imm);
6441     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6442       Ok = parseSwizzleBitmaskPerm(Imm);
6443     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6444       Ok = parseSwizzleBroadcast(Imm);
6445     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6446       Ok = parseSwizzleSwap(Imm);
6447     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6448       Ok = parseSwizzleReverse(Imm);
6449     } else {
6450       Error(ModeLoc, "expected a swizzle mode");
6451     }
6452 
6453     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6454   }
6455 
6456   return false;
6457 }
6458 
6459 OperandMatchResultTy
6460 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6461   SMLoc S = Parser.getTok().getLoc();
6462   int64_t Imm = 0;
6463 
6464   if (trySkipId("offset")) {
6465 
6466     bool Ok = false;
6467     if (skipToken(AsmToken::Colon, "expected a colon")) {
6468       if (trySkipId("swizzle")) {
6469         Ok = parseSwizzleMacro(Imm);
6470       } else {
6471         Ok = parseSwizzleOffset(Imm);
6472       }
6473     }
6474 
6475     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6476 
6477     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6478   } else {
6479     // Swizzle "offset" operand is optional.
6480     // If it is omitted, try parsing other optional operands.
6481     return parseOptionalOpr(Operands);
6482   }
6483 }
6484 
6485 bool
6486 AMDGPUOperand::isSwizzle() const {
6487   return isImmTy(ImmTySwizzle);
6488 }
6489 
6490 //===----------------------------------------------------------------------===//
6491 // VGPR Index Mode
6492 //===----------------------------------------------------------------------===//
6493 
6494 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6495 
6496   using namespace llvm::AMDGPU::VGPRIndexMode;
6497 
6498   if (trySkipToken(AsmToken::RParen)) {
6499     return OFF;
6500   }
6501 
6502   int64_t Imm = 0;
6503 
6504   while (true) {
6505     unsigned Mode = 0;
6506     SMLoc S = Parser.getTok().getLoc();
6507 
6508     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6509       if (trySkipId(IdSymbolic[ModeId])) {
6510         Mode = 1 << ModeId;
6511         break;
6512       }
6513     }
6514 
6515     if (Mode == 0) {
6516       Error(S, (Imm == 0)?
6517                "expected a VGPR index mode or a closing parenthesis" :
6518                "expected a VGPR index mode");
6519       return UNDEF;
6520     }
6521 
6522     if (Imm & Mode) {
6523       Error(S, "duplicate VGPR index mode");
6524       return UNDEF;
6525     }
6526     Imm |= Mode;
6527 
6528     if (trySkipToken(AsmToken::RParen))
6529       break;
6530     if (!skipToken(AsmToken::Comma,
6531                    "expected a comma or a closing parenthesis"))
6532       return UNDEF;
6533   }
6534 
6535   return Imm;
6536 }
6537 
6538 OperandMatchResultTy
6539 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6540 
6541   using namespace llvm::AMDGPU::VGPRIndexMode;
6542 
6543   int64_t Imm = 0;
6544   SMLoc S = Parser.getTok().getLoc();
6545 
6546   if (getLexer().getKind() == AsmToken::Identifier &&
6547       Parser.getTok().getString() == "gpr_idx" &&
6548       getLexer().peekTok().is(AsmToken::LParen)) {
6549 
6550     Parser.Lex();
6551     Parser.Lex();
6552 
6553     Imm = parseGPRIdxMacro();
6554     if (Imm == UNDEF)
6555       return MatchOperand_ParseFail;
6556 
6557   } else {
6558     if (getParser().parseAbsoluteExpression(Imm))
6559       return MatchOperand_ParseFail;
6560     if (Imm < 0 || !isUInt<4>(Imm)) {
6561       Error(S, "invalid immediate: only 4-bit values are legal");
6562       return MatchOperand_ParseFail;
6563     }
6564   }
6565 
6566   Operands.push_back(
6567       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6568   return MatchOperand_Success;
6569 }
6570 
6571 bool AMDGPUOperand::isGPRIdxMode() const {
6572   return isImmTy(ImmTyGprIdxMode);
6573 }
6574 
6575 //===----------------------------------------------------------------------===//
6576 // sopp branch targets
6577 //===----------------------------------------------------------------------===//
6578 
6579 OperandMatchResultTy
6580 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6581 
6582   // Make sure we are not parsing something
6583   // that looks like a label or an expression but is not.
6584   // This will improve error messages.
6585   if (isRegister() || isModifier())
6586     return MatchOperand_NoMatch;
6587 
6588   if (!parseExpr(Operands))
6589     return MatchOperand_ParseFail;
6590 
6591   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6592   assert(Opr.isImm() || Opr.isExpr());
6593   SMLoc Loc = Opr.getStartLoc();
6594 
6595   // Currently we do not support arbitrary expressions as branch targets.
6596   // Only labels and absolute expressions are accepted.
6597   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6598     Error(Loc, "expected an absolute expression or a label");
6599   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6600     Error(Loc, "expected a 16-bit signed jump offset");
6601   }
6602 
6603   return MatchOperand_Success;
6604 }
6605 
6606 //===----------------------------------------------------------------------===//
6607 // Boolean holding registers
6608 //===----------------------------------------------------------------------===//
6609 
6610 OperandMatchResultTy
6611 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6612   return parseReg(Operands);
6613 }
6614 
6615 //===----------------------------------------------------------------------===//
6616 // mubuf
6617 //===----------------------------------------------------------------------===//
6618 
6619 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
6620   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
6621 }
6622 
6623 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
6624   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
6625 }
6626 
6627 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const {
6628   return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC);
6629 }
6630 
6631 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
6632   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
6633 }
6634 
6635 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6636                                const OperandVector &Operands,
6637                                bool IsAtomic,
6638                                bool IsAtomicReturn,
6639                                bool IsLds) {
6640   bool IsLdsOpcode = IsLds;
6641   bool HasLdsModifier = false;
6642   OptionalImmIndexMap OptionalIdx;
6643   assert(IsAtomicReturn ? IsAtomic : true);
6644   unsigned FirstOperandIdx = 1;
6645 
6646   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6647     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6648 
6649     // Add the register arguments
6650     if (Op.isReg()) {
6651       Op.addRegOperands(Inst, 1);
6652       // Insert a tied src for atomic return dst.
6653       // This cannot be postponed as subsequent calls to
6654       // addImmOperands rely on correct number of MC operands.
6655       if (IsAtomicReturn && i == FirstOperandIdx)
6656         Op.addRegOperands(Inst, 1);
6657       continue;
6658     }
6659 
6660     // Handle the case where soffset is an immediate
6661     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6662       Op.addImmOperands(Inst, 1);
6663       continue;
6664     }
6665 
6666     HasLdsModifier |= Op.isLDS();
6667 
6668     // Handle tokens like 'offen' which are sometimes hard-coded into the
6669     // asm string.  There are no MCInst operands for these.
6670     if (Op.isToken()) {
6671       continue;
6672     }
6673     assert(Op.isImm());
6674 
6675     // Handle optional arguments
6676     OptionalIdx[Op.getImmTy()] = i;
6677   }
6678 
6679   // This is a workaround for an llvm quirk which may result in an
6680   // incorrect instruction selection. Lds and non-lds versions of
6681   // MUBUF instructions are identical except that lds versions
6682   // have mandatory 'lds' modifier. However this modifier follows
6683   // optional modifiers and llvm asm matcher regards this 'lds'
6684   // modifier as an optional one. As a result, an lds version
6685   // of opcode may be selected even if it has no 'lds' modifier.
6686   if (IsLdsOpcode && !HasLdsModifier) {
6687     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6688     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6689       Inst.setOpcode(NoLdsOpcode);
6690       IsLdsOpcode = false;
6691     }
6692   }
6693 
6694   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6695   if (!IsAtomic || IsAtomicReturn) {
6696     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC,
6697                           IsAtomicReturn ? -1 : 0);
6698   }
6699   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6700 
6701   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6702     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6703   }
6704 
6705   if (isGFX10Plus())
6706     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6707 }
6708 
6709 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6710   OptionalImmIndexMap OptionalIdx;
6711 
6712   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6713     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6714 
6715     // Add the register arguments
6716     if (Op.isReg()) {
6717       Op.addRegOperands(Inst, 1);
6718       continue;
6719     }
6720 
6721     // Handle the case where soffset is an immediate
6722     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6723       Op.addImmOperands(Inst, 1);
6724       continue;
6725     }
6726 
6727     // Handle tokens like 'offen' which are sometimes hard-coded into the
6728     // asm string.  There are no MCInst operands for these.
6729     if (Op.isToken()) {
6730       continue;
6731     }
6732     assert(Op.isImm());
6733 
6734     // Handle optional arguments
6735     OptionalIdx[Op.getImmTy()] = i;
6736   }
6737 
6738   addOptionalImmOperand(Inst, Operands, OptionalIdx,
6739                         AMDGPUOperand::ImmTyOffset);
6740   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6741   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6742   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6743   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6744 
6745   if (isGFX10Plus())
6746     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6747 }
6748 
6749 //===----------------------------------------------------------------------===//
6750 // mimg
6751 //===----------------------------------------------------------------------===//
6752 
6753 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6754                               bool IsAtomic) {
6755   unsigned I = 1;
6756   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6757   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6758     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6759   }
6760 
6761   if (IsAtomic) {
6762     // Add src, same as dst
6763     assert(Desc.getNumDefs() == 1);
6764     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6765   }
6766 
6767   OptionalImmIndexMap OptionalIdx;
6768 
6769   for (unsigned E = Operands.size(); I != E; ++I) {
6770     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6771 
6772     // Add the register arguments
6773     if (Op.isReg()) {
6774       Op.addRegOperands(Inst, 1);
6775     } else if (Op.isImmModifier()) {
6776       OptionalIdx[Op.getImmTy()] = I;
6777     } else if (!Op.isToken()) {
6778       llvm_unreachable("unexpected operand type");
6779     }
6780   }
6781 
6782   bool IsGFX10Plus = isGFX10Plus();
6783 
6784   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6785   if (IsGFX10Plus)
6786     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6787   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6788   if (IsGFX10Plus)
6789     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6790   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6791   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6792   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6793   if (IsGFX10Plus)
6794     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6795   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6796   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6797   if (!IsGFX10Plus)
6798     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6799   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6800 }
6801 
6802 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6803   cvtMIMG(Inst, Operands, true);
6804 }
6805 
6806 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
6807                                       const OperandVector &Operands) {
6808   for (unsigned I = 1; I < Operands.size(); ++I) {
6809     auto &Operand = (AMDGPUOperand &)*Operands[I];
6810     if (Operand.isReg())
6811       Operand.addRegOperands(Inst, 1);
6812   }
6813 
6814   Inst.addOperand(MCOperand::createImm(1)); // a16
6815 }
6816 
6817 //===----------------------------------------------------------------------===//
6818 // smrd
6819 //===----------------------------------------------------------------------===//
6820 
6821 bool AMDGPUOperand::isSMRDOffset8() const {
6822   return isImm() && isUInt<8>(getImm());
6823 }
6824 
6825 bool AMDGPUOperand::isSMEMOffset() const {
6826   return isImm(); // Offset range is checked later by validator.
6827 }
6828 
6829 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6830   // 32-bit literals are only supported on CI and we only want to use them
6831   // when the offset is > 8-bits.
6832   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6833 }
6834 
6835 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6836   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6837 }
6838 
6839 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
6840   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6841 }
6842 
6843 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6844   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6845 }
6846 
6847 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6848   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6849 }
6850 
6851 //===----------------------------------------------------------------------===//
6852 // vop3
6853 //===----------------------------------------------------------------------===//
6854 
6855 static bool ConvertOmodMul(int64_t &Mul) {
6856   if (Mul != 1 && Mul != 2 && Mul != 4)
6857     return false;
6858 
6859   Mul >>= 1;
6860   return true;
6861 }
6862 
6863 static bool ConvertOmodDiv(int64_t &Div) {
6864   if (Div == 1) {
6865     Div = 0;
6866     return true;
6867   }
6868 
6869   if (Div == 2) {
6870     Div = 3;
6871     return true;
6872   }
6873 
6874   return false;
6875 }
6876 
6877 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6878   if (BoundCtrl == 0) {
6879     BoundCtrl = 1;
6880     return true;
6881   }
6882 
6883   if (BoundCtrl == -1) {
6884     BoundCtrl = 0;
6885     return true;
6886   }
6887 
6888   return false;
6889 }
6890 
6891 // Note: the order in this table matches the order of operands in AsmString.
6892 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6893   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6894   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6895   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6896   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6897   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6898   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6899   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6900   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6901   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6902   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6903   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6904   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6905   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6906   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6907   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6908   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6909   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6910   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6911   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6912   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6913   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6914   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6915   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6916   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6917   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6918   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6919   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6920   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6921   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6922   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6923   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6924   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6925   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6926   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6927   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6928   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6929   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6930   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6931   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6932   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6933   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6934   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6935   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6936 };
6937 
6938 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6939 
6940   OperandMatchResultTy res = parseOptionalOpr(Operands);
6941 
6942   // This is a hack to enable hardcoded mandatory operands which follow
6943   // optional operands.
6944   //
6945   // Current design assumes that all operands after the first optional operand
6946   // are also optional. However implementation of some instructions violates
6947   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6948   //
6949   // To alleviate this problem, we have to (implicitly) parse extra operands
6950   // to make sure autogenerated parser of custom operands never hit hardcoded
6951   // mandatory operands.
6952 
6953   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6954     if (res != MatchOperand_Success ||
6955         isToken(AsmToken::EndOfStatement))
6956       break;
6957 
6958     trySkipToken(AsmToken::Comma);
6959     res = parseOptionalOpr(Operands);
6960   }
6961 
6962   return res;
6963 }
6964 
6965 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6966   OperandMatchResultTy res;
6967   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6968     // try to parse any optional operand here
6969     if (Op.IsBit) {
6970       res = parseNamedBit(Op.Name, Operands, Op.Type);
6971     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6972       res = parseOModOperand(Operands);
6973     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6974                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6975                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6976       res = parseSDWASel(Operands, Op.Name, Op.Type);
6977     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6978       res = parseSDWADstUnused(Operands);
6979     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6980                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6981                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6982                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6983       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6984                                         Op.ConvertResult);
6985     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6986       res = parseDim(Operands);
6987     } else {
6988       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6989     }
6990     if (res != MatchOperand_NoMatch) {
6991       return res;
6992     }
6993   }
6994   return MatchOperand_NoMatch;
6995 }
6996 
6997 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6998   StringRef Name = Parser.getTok().getString();
6999   if (Name == "mul") {
7000     return parseIntWithPrefix("mul", Operands,
7001                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7002   }
7003 
7004   if (Name == "div") {
7005     return parseIntWithPrefix("div", Operands,
7006                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7007   }
7008 
7009   return MatchOperand_NoMatch;
7010 }
7011 
7012 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7013   cvtVOP3P(Inst, Operands);
7014 
7015   int Opc = Inst.getOpcode();
7016 
7017   int SrcNum;
7018   const int Ops[] = { AMDGPU::OpName::src0,
7019                       AMDGPU::OpName::src1,
7020                       AMDGPU::OpName::src2 };
7021   for (SrcNum = 0;
7022        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7023        ++SrcNum);
7024   assert(SrcNum > 0);
7025 
7026   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7027   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7028 
7029   if ((OpSel & (1 << SrcNum)) != 0) {
7030     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7031     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7032     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7033   }
7034 }
7035 
7036 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7037       // 1. This operand is input modifiers
7038   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7039       // 2. This is not last operand
7040       && Desc.NumOperands > (OpNum + 1)
7041       // 3. Next operand is register class
7042       && Desc.OpInfo[OpNum + 1].RegClass != -1
7043       // 4. Next register is not tied to any other operand
7044       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7045 }
7046 
7047 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7048 {
7049   OptionalImmIndexMap OptionalIdx;
7050   unsigned Opc = Inst.getOpcode();
7051 
7052   unsigned I = 1;
7053   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7054   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7055     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7056   }
7057 
7058   for (unsigned E = Operands.size(); I != E; ++I) {
7059     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7060     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7061       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7062     } else if (Op.isInterpSlot() ||
7063                Op.isInterpAttr() ||
7064                Op.isAttrChan()) {
7065       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7066     } else if (Op.isImmModifier()) {
7067       OptionalIdx[Op.getImmTy()] = I;
7068     } else {
7069       llvm_unreachable("unhandled operand type");
7070     }
7071   }
7072 
7073   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7074     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7075   }
7076 
7077   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7078     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7079   }
7080 
7081   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7082     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7083   }
7084 }
7085 
7086 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7087                               OptionalImmIndexMap &OptionalIdx) {
7088   unsigned Opc = Inst.getOpcode();
7089 
7090   unsigned I = 1;
7091   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7092   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7093     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7094   }
7095 
7096   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7097     // This instruction has src modifiers
7098     for (unsigned E = Operands.size(); I != E; ++I) {
7099       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7100       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7101         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7102       } else if (Op.isImmModifier()) {
7103         OptionalIdx[Op.getImmTy()] = I;
7104       } else if (Op.isRegOrImm()) {
7105         Op.addRegOrImmOperands(Inst, 1);
7106       } else {
7107         llvm_unreachable("unhandled operand type");
7108       }
7109     }
7110   } else {
7111     // No src modifiers
7112     for (unsigned E = Operands.size(); I != E; ++I) {
7113       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7114       if (Op.isMod()) {
7115         OptionalIdx[Op.getImmTy()] = I;
7116       } else {
7117         Op.addRegOrImmOperands(Inst, 1);
7118       }
7119     }
7120   }
7121 
7122   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7123     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7124   }
7125 
7126   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7127     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7128   }
7129 
7130   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7131   // it has src2 register operand that is tied to dst operand
7132   // we don't allow modifiers for this operand in assembler so src2_modifiers
7133   // should be 0.
7134   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7135       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7136       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7137       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7138       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7139       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7140       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7141       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7142       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7143       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7144     auto it = Inst.begin();
7145     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7146     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7147     ++it;
7148     // Copy the operand to ensure it's not invalidated when Inst grows.
7149     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7150   }
7151 }
7152 
7153 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7154   OptionalImmIndexMap OptionalIdx;
7155   cvtVOP3(Inst, Operands, OptionalIdx);
7156 }
7157 
7158 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
7159                                const OperandVector &Operands) {
7160   OptionalImmIndexMap OptIdx;
7161   const int Opc = Inst.getOpcode();
7162   const MCInstrDesc &Desc = MII.get(Opc);
7163 
7164   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7165 
7166   cvtVOP3(Inst, Operands, OptIdx);
7167 
7168   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7169     assert(!IsPacked);
7170     Inst.addOperand(Inst.getOperand(0));
7171   }
7172 
7173   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7174   // instruction, and then figure out where to actually put the modifiers
7175 
7176   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7177 
7178   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7179   if (OpSelHiIdx != -1) {
7180     int DefaultVal = IsPacked ? -1 : 0;
7181     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7182                           DefaultVal);
7183   }
7184 
7185   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7186   if (NegLoIdx != -1) {
7187     assert(IsPacked);
7188     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7189     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7190   }
7191 
7192   const int Ops[] = { AMDGPU::OpName::src0,
7193                       AMDGPU::OpName::src1,
7194                       AMDGPU::OpName::src2 };
7195   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7196                          AMDGPU::OpName::src1_modifiers,
7197                          AMDGPU::OpName::src2_modifiers };
7198 
7199   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7200 
7201   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7202   unsigned OpSelHi = 0;
7203   unsigned NegLo = 0;
7204   unsigned NegHi = 0;
7205 
7206   if (OpSelHiIdx != -1) {
7207     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7208   }
7209 
7210   if (NegLoIdx != -1) {
7211     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7212     NegLo = Inst.getOperand(NegLoIdx).getImm();
7213     NegHi = Inst.getOperand(NegHiIdx).getImm();
7214   }
7215 
7216   for (int J = 0; J < 3; ++J) {
7217     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7218     if (OpIdx == -1)
7219       break;
7220 
7221     uint32_t ModVal = 0;
7222 
7223     if ((OpSel & (1 << J)) != 0)
7224       ModVal |= SISrcMods::OP_SEL_0;
7225 
7226     if ((OpSelHi & (1 << J)) != 0)
7227       ModVal |= SISrcMods::OP_SEL_1;
7228 
7229     if ((NegLo & (1 << J)) != 0)
7230       ModVal |= SISrcMods::NEG;
7231 
7232     if ((NegHi & (1 << J)) != 0)
7233       ModVal |= SISrcMods::NEG_HI;
7234 
7235     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7236 
7237     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7238   }
7239 }
7240 
7241 //===----------------------------------------------------------------------===//
7242 // dpp
7243 //===----------------------------------------------------------------------===//
7244 
7245 bool AMDGPUOperand::isDPP8() const {
7246   return isImmTy(ImmTyDPP8);
7247 }
7248 
7249 bool AMDGPUOperand::isDPPCtrl() const {
7250   using namespace AMDGPU::DPP;
7251 
7252   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7253   if (result) {
7254     int64_t Imm = getImm();
7255     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7256            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7257            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7258            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7259            (Imm == DppCtrl::WAVE_SHL1) ||
7260            (Imm == DppCtrl::WAVE_ROL1) ||
7261            (Imm == DppCtrl::WAVE_SHR1) ||
7262            (Imm == DppCtrl::WAVE_ROR1) ||
7263            (Imm == DppCtrl::ROW_MIRROR) ||
7264            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7265            (Imm == DppCtrl::BCAST15) ||
7266            (Imm == DppCtrl::BCAST31) ||
7267            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7268            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7269   }
7270   return false;
7271 }
7272 
7273 //===----------------------------------------------------------------------===//
7274 // mAI
7275 //===----------------------------------------------------------------------===//
7276 
7277 bool AMDGPUOperand::isBLGP() const {
7278   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7279 }
7280 
7281 bool AMDGPUOperand::isCBSZ() const {
7282   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7283 }
7284 
7285 bool AMDGPUOperand::isABID() const {
7286   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7287 }
7288 
7289 bool AMDGPUOperand::isS16Imm() const {
7290   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7291 }
7292 
7293 bool AMDGPUOperand::isU16Imm() const {
7294   return isImm() && isUInt<16>(getImm());
7295 }
7296 
7297 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7298   if (!isGFX10Plus())
7299     return MatchOperand_NoMatch;
7300 
7301   SMLoc S = Parser.getTok().getLoc();
7302 
7303   if (getLexer().isNot(AsmToken::Identifier))
7304     return MatchOperand_NoMatch;
7305   if (getLexer().getTok().getString() != "dim")
7306     return MatchOperand_NoMatch;
7307 
7308   Parser.Lex();
7309   if (getLexer().isNot(AsmToken::Colon))
7310     return MatchOperand_ParseFail;
7311 
7312   Parser.Lex();
7313 
7314   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
7315   // integer.
7316   std::string Token;
7317   if (getLexer().is(AsmToken::Integer)) {
7318     SMLoc Loc = getLexer().getTok().getEndLoc();
7319     Token = std::string(getLexer().getTok().getString());
7320     Parser.Lex();
7321     if (getLexer().getTok().getLoc() != Loc)
7322       return MatchOperand_ParseFail;
7323   }
7324   if (getLexer().isNot(AsmToken::Identifier))
7325     return MatchOperand_ParseFail;
7326   Token += getLexer().getTok().getString();
7327 
7328   StringRef DimId = Token;
7329   if (DimId.startswith("SQ_RSRC_IMG_"))
7330     DimId = DimId.substr(12);
7331 
7332   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7333   if (!DimInfo)
7334     return MatchOperand_ParseFail;
7335 
7336   Parser.Lex();
7337 
7338   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
7339                                               AMDGPUOperand::ImmTyDim));
7340   return MatchOperand_Success;
7341 }
7342 
7343 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7344   SMLoc S = Parser.getTok().getLoc();
7345   StringRef Prefix;
7346 
7347   if (getLexer().getKind() == AsmToken::Identifier) {
7348     Prefix = Parser.getTok().getString();
7349   } else {
7350     return MatchOperand_NoMatch;
7351   }
7352 
7353   if (Prefix != "dpp8")
7354     return parseDPPCtrl(Operands);
7355   if (!isGFX10Plus())
7356     return MatchOperand_NoMatch;
7357 
7358   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7359 
7360   int64_t Sels[8];
7361 
7362   Parser.Lex();
7363   if (getLexer().isNot(AsmToken::Colon))
7364     return MatchOperand_ParseFail;
7365 
7366   Parser.Lex();
7367   if (getLexer().isNot(AsmToken::LBrac))
7368     return MatchOperand_ParseFail;
7369 
7370   Parser.Lex();
7371   if (getParser().parseAbsoluteExpression(Sels[0]))
7372     return MatchOperand_ParseFail;
7373   if (0 > Sels[0] || 7 < Sels[0])
7374     return MatchOperand_ParseFail;
7375 
7376   for (size_t i = 1; i < 8; ++i) {
7377     if (getLexer().isNot(AsmToken::Comma))
7378       return MatchOperand_ParseFail;
7379 
7380     Parser.Lex();
7381     if (getParser().parseAbsoluteExpression(Sels[i]))
7382       return MatchOperand_ParseFail;
7383     if (0 > Sels[i] || 7 < Sels[i])
7384       return MatchOperand_ParseFail;
7385   }
7386 
7387   if (getLexer().isNot(AsmToken::RBrac))
7388     return MatchOperand_ParseFail;
7389   Parser.Lex();
7390 
7391   unsigned DPP8 = 0;
7392   for (size_t i = 0; i < 8; ++i)
7393     DPP8 |= (Sels[i] << (i * 3));
7394 
7395   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7396   return MatchOperand_Success;
7397 }
7398 
7399 OperandMatchResultTy
7400 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7401   using namespace AMDGPU::DPP;
7402 
7403   SMLoc S = Parser.getTok().getLoc();
7404   StringRef Prefix;
7405   int64_t Int;
7406 
7407   if (getLexer().getKind() == AsmToken::Identifier) {
7408     Prefix = Parser.getTok().getString();
7409   } else {
7410     return MatchOperand_NoMatch;
7411   }
7412 
7413   if (Prefix == "row_mirror") {
7414     Int = DppCtrl::ROW_MIRROR;
7415     Parser.Lex();
7416   } else if (Prefix == "row_half_mirror") {
7417     Int = DppCtrl::ROW_HALF_MIRROR;
7418     Parser.Lex();
7419   } else {
7420     // Check to prevent parseDPPCtrlOps from eating invalid tokens
7421     if (Prefix != "quad_perm"
7422         && Prefix != "row_shl"
7423         && Prefix != "row_shr"
7424         && Prefix != "row_ror"
7425         && Prefix != "wave_shl"
7426         && Prefix != "wave_rol"
7427         && Prefix != "wave_shr"
7428         && Prefix != "wave_ror"
7429         && Prefix != "row_bcast"
7430         && Prefix != "row_share"
7431         && Prefix != "row_xmask") {
7432       return MatchOperand_NoMatch;
7433     }
7434 
7435     if (!isGFX10Plus() && (Prefix == "row_share" || Prefix == "row_xmask"))
7436       return MatchOperand_NoMatch;
7437 
7438     if (!isVI() && !isGFX9() &&
7439         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
7440          Prefix == "wave_rol" || Prefix == "wave_ror" ||
7441          Prefix == "row_bcast"))
7442       return MatchOperand_NoMatch;
7443 
7444     Parser.Lex();
7445     if (getLexer().isNot(AsmToken::Colon))
7446       return MatchOperand_ParseFail;
7447 
7448     if (Prefix == "quad_perm") {
7449       // quad_perm:[%d,%d,%d,%d]
7450       Parser.Lex();
7451       if (getLexer().isNot(AsmToken::LBrac))
7452         return MatchOperand_ParseFail;
7453       Parser.Lex();
7454 
7455       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
7456         return MatchOperand_ParseFail;
7457 
7458       for (int i = 0; i < 3; ++i) {
7459         if (getLexer().isNot(AsmToken::Comma))
7460           return MatchOperand_ParseFail;
7461         Parser.Lex();
7462 
7463         int64_t Temp;
7464         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
7465           return MatchOperand_ParseFail;
7466         const int shift = i*2 + 2;
7467         Int += (Temp << shift);
7468       }
7469 
7470       if (getLexer().isNot(AsmToken::RBrac))
7471         return MatchOperand_ParseFail;
7472       Parser.Lex();
7473     } else {
7474       // sel:%d
7475       Parser.Lex();
7476       if (getParser().parseAbsoluteExpression(Int))
7477         return MatchOperand_ParseFail;
7478 
7479       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
7480         Int |= DppCtrl::ROW_SHL0;
7481       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
7482         Int |= DppCtrl::ROW_SHR0;
7483       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
7484         Int |= DppCtrl::ROW_ROR0;
7485       } else if (Prefix == "wave_shl" && 1 == Int) {
7486         Int = DppCtrl::WAVE_SHL1;
7487       } else if (Prefix == "wave_rol" && 1 == Int) {
7488         Int = DppCtrl::WAVE_ROL1;
7489       } else if (Prefix == "wave_shr" && 1 == Int) {
7490         Int = DppCtrl::WAVE_SHR1;
7491       } else if (Prefix == "wave_ror" && 1 == Int) {
7492         Int = DppCtrl::WAVE_ROR1;
7493       } else if (Prefix == "row_bcast") {
7494         if (Int == 15) {
7495           Int = DppCtrl::BCAST15;
7496         } else if (Int == 31) {
7497           Int = DppCtrl::BCAST31;
7498         } else {
7499           return MatchOperand_ParseFail;
7500         }
7501       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
7502         Int |= DppCtrl::ROW_SHARE_FIRST;
7503       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
7504         Int |= DppCtrl::ROW_XMASK_FIRST;
7505       } else {
7506         return MatchOperand_ParseFail;
7507       }
7508     }
7509   }
7510 
7511   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
7512   return MatchOperand_Success;
7513 }
7514 
7515 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7516   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7517 }
7518 
7519 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7520   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7521 }
7522 
7523 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7524   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7525 }
7526 
7527 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7528   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7529 }
7530 
7531 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7532   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7533 }
7534 
7535 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7536   OptionalImmIndexMap OptionalIdx;
7537 
7538   unsigned I = 1;
7539   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7540   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7541     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7542   }
7543 
7544   int Fi = 0;
7545   for (unsigned E = Operands.size(); I != E; ++I) {
7546     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7547                                             MCOI::TIED_TO);
7548     if (TiedTo != -1) {
7549       assert((unsigned)TiedTo < Inst.getNumOperands());
7550       // handle tied old or src2 for MAC instructions
7551       Inst.addOperand(Inst.getOperand(TiedTo));
7552     }
7553     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7554     // Add the register arguments
7555     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7556       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7557       // Skip it.
7558       continue;
7559     }
7560 
7561     if (IsDPP8) {
7562       if (Op.isDPP8()) {
7563         Op.addImmOperands(Inst, 1);
7564       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7565         Op.addRegWithFPInputModsOperands(Inst, 2);
7566       } else if (Op.isFI()) {
7567         Fi = Op.getImm();
7568       } else if (Op.isReg()) {
7569         Op.addRegOperands(Inst, 1);
7570       } else {
7571         llvm_unreachable("Invalid operand type");
7572       }
7573     } else {
7574       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7575         Op.addRegWithFPInputModsOperands(Inst, 2);
7576       } else if (Op.isDPPCtrl()) {
7577         Op.addImmOperands(Inst, 1);
7578       } else if (Op.isImm()) {
7579         // Handle optional arguments
7580         OptionalIdx[Op.getImmTy()] = I;
7581       } else {
7582         llvm_unreachable("Invalid operand type");
7583       }
7584     }
7585   }
7586 
7587   if (IsDPP8) {
7588     using namespace llvm::AMDGPU::DPP;
7589     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7590   } else {
7591     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7592     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7593     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7594     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7595       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7596     }
7597   }
7598 }
7599 
7600 //===----------------------------------------------------------------------===//
7601 // sdwa
7602 //===----------------------------------------------------------------------===//
7603 
7604 OperandMatchResultTy
7605 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7606                               AMDGPUOperand::ImmTy Type) {
7607   using namespace llvm::AMDGPU::SDWA;
7608 
7609   SMLoc S = Parser.getTok().getLoc();
7610   StringRef Value;
7611   OperandMatchResultTy res;
7612 
7613   res = parseStringWithPrefix(Prefix, Value);
7614   if (res != MatchOperand_Success) {
7615     return res;
7616   }
7617 
7618   int64_t Int;
7619   Int = StringSwitch<int64_t>(Value)
7620         .Case("BYTE_0", SdwaSel::BYTE_0)
7621         .Case("BYTE_1", SdwaSel::BYTE_1)
7622         .Case("BYTE_2", SdwaSel::BYTE_2)
7623         .Case("BYTE_3", SdwaSel::BYTE_3)
7624         .Case("WORD_0", SdwaSel::WORD_0)
7625         .Case("WORD_1", SdwaSel::WORD_1)
7626         .Case("DWORD", SdwaSel::DWORD)
7627         .Default(0xffffffff);
7628   Parser.Lex(); // eat last token
7629 
7630   if (Int == 0xffffffff) {
7631     return MatchOperand_ParseFail;
7632   }
7633 
7634   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
7635   return MatchOperand_Success;
7636 }
7637 
7638 OperandMatchResultTy
7639 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
7640   using namespace llvm::AMDGPU::SDWA;
7641 
7642   SMLoc S = Parser.getTok().getLoc();
7643   StringRef Value;
7644   OperandMatchResultTy res;
7645 
7646   res = parseStringWithPrefix("dst_unused", Value);
7647   if (res != MatchOperand_Success) {
7648     return res;
7649   }
7650 
7651   int64_t Int;
7652   Int = StringSwitch<int64_t>(Value)
7653         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
7654         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
7655         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
7656         .Default(0xffffffff);
7657   Parser.Lex(); // eat last token
7658 
7659   if (Int == 0xffffffff) {
7660     return MatchOperand_ParseFail;
7661   }
7662 
7663   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
7664   return MatchOperand_Success;
7665 }
7666 
7667 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
7668   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
7669 }
7670 
7671 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
7672   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
7673 }
7674 
7675 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
7676   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
7677 }
7678 
7679 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
7680   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
7681 }
7682 
7683 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
7684   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
7685 }
7686 
7687 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
7688                               uint64_t BasicInstType,
7689                               bool SkipDstVcc,
7690                               bool SkipSrcVcc) {
7691   using namespace llvm::AMDGPU::SDWA;
7692 
7693   OptionalImmIndexMap OptionalIdx;
7694   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
7695   bool SkippedVcc = false;
7696 
7697   unsigned I = 1;
7698   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7699   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7700     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7701   }
7702 
7703   for (unsigned E = Operands.size(); I != E; ++I) {
7704     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7705     if (SkipVcc && !SkippedVcc && Op.isReg() &&
7706         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7707       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7708       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7709       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7710       // Skip VCC only if we didn't skip it on previous iteration.
7711       // Note that src0 and src1 occupy 2 slots each because of modifiers.
7712       if (BasicInstType == SIInstrFlags::VOP2 &&
7713           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7714            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7715         SkippedVcc = true;
7716         continue;
7717       } else if (BasicInstType == SIInstrFlags::VOPC &&
7718                  Inst.getNumOperands() == 0) {
7719         SkippedVcc = true;
7720         continue;
7721       }
7722     }
7723     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7724       Op.addRegOrImmWithInputModsOperands(Inst, 2);
7725     } else if (Op.isImm()) {
7726       // Handle optional arguments
7727       OptionalIdx[Op.getImmTy()] = I;
7728     } else {
7729       llvm_unreachable("Invalid operand type");
7730     }
7731     SkippedVcc = false;
7732   }
7733 
7734   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7735       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7736       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7737     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7738     switch (BasicInstType) {
7739     case SIInstrFlags::VOP1:
7740       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7741       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7742         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7743       }
7744       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7745       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7746       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7747       break;
7748 
7749     case SIInstrFlags::VOP2:
7750       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7751       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7752         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7753       }
7754       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7755       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7756       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7757       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7758       break;
7759 
7760     case SIInstrFlags::VOPC:
7761       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7762         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7763       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7764       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7765       break;
7766 
7767     default:
7768       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7769     }
7770   }
7771 
7772   // special case v_mac_{f16, f32}:
7773   // it has src2 register operand that is tied to dst operand
7774   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7775       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7776     auto it = Inst.begin();
7777     std::advance(
7778       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7779     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7780   }
7781 }
7782 
7783 //===----------------------------------------------------------------------===//
7784 // mAI
7785 //===----------------------------------------------------------------------===//
7786 
7787 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7788   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7789 }
7790 
7791 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7792   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7793 }
7794 
7795 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7796   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7797 }
7798 
7799 /// Force static initialization.
7800 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7801   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7802   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7803 }
7804 
7805 #define GET_REGISTER_MATCHER
7806 #define GET_MATCHER_IMPLEMENTATION
7807 #define GET_MNEMONIC_SPELL_CHECKER
7808 #define GET_MNEMONIC_CHECKER
7809 #include "AMDGPUGenAsmMatcher.inc"
7810 
7811 // This fuction should be defined after auto-generated include so that we have
7812 // MatchClassKind enum defined
7813 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7814                                                      unsigned Kind) {
7815   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7816   // But MatchInstructionImpl() expects to meet token and fails to validate
7817   // operand. This method checks if we are given immediate operand but expect to
7818   // get corresponding token.
7819   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7820   switch (Kind) {
7821   case MCK_addr64:
7822     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7823   case MCK_gds:
7824     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7825   case MCK_lds:
7826     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7827   case MCK_glc:
7828     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7829   case MCK_idxen:
7830     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7831   case MCK_offen:
7832     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7833   case MCK_SSrcB32:
7834     // When operands have expression values, they will return true for isToken,
7835     // because it is not possible to distinguish between a token and an
7836     // expression at parse time. MatchInstructionImpl() will always try to
7837     // match an operand as a token, when isToken returns true, and when the
7838     // name of the expression is not a valid token, the match will fail,
7839     // so we need to handle it here.
7840     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7841   case MCK_SSrcF32:
7842     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7843   case MCK_SoppBrTarget:
7844     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7845   case MCK_VReg32OrOff:
7846     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7847   case MCK_InterpSlot:
7848     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7849   case MCK_Attr:
7850     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7851   case MCK_AttrChan:
7852     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7853   case MCK_ImmSMEMOffset:
7854     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
7855   case MCK_SReg_64:
7856   case MCK_SReg_64_XEXEC:
7857     // Null is defined as a 32-bit register but
7858     // it should also be enabled with 64-bit operands.
7859     // The following code enables it for SReg_64 operands
7860     // used as source and destination. Remaining source
7861     // operands are handled in isInlinableImm.
7862     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7863   default:
7864     return Match_InvalidOperand;
7865   }
7866 }
7867 
7868 //===----------------------------------------------------------------------===//
7869 // endpgm
7870 //===----------------------------------------------------------------------===//
7871 
7872 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7873   SMLoc S = Parser.getTok().getLoc();
7874   int64_t Imm = 0;
7875 
7876   if (!parseExpr(Imm)) {
7877     // The operand is optional, if not present default to 0
7878     Imm = 0;
7879   }
7880 
7881   if (!isUInt<16>(Imm)) {
7882     Error(S, "expected a 16-bit value");
7883     return MatchOperand_ParseFail;
7884   }
7885 
7886   Operands.push_back(
7887       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7888   return MatchOperand_Success;
7889 }
7890 
7891 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7892