1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyA16,
167     ImmTyLWE,
168     ImmTyExpTgt,
169     ImmTyExpCompr,
170     ImmTyExpVM,
171     ImmTyFORMAT,
172     ImmTyHwreg,
173     ImmTyOff,
174     ImmTySendMsg,
175     ImmTyInterpSlot,
176     ImmTyInterpAttr,
177     ImmTyAttrChan,
178     ImmTyOpSel,
179     ImmTyOpSelHi,
180     ImmTyNegLo,
181     ImmTyNegHi,
182     ImmTySwizzle,
183     ImmTyGprIdxMode,
184     ImmTyHigh,
185     ImmTyBLGP,
186     ImmTyCBSZ,
187     ImmTyABID,
188     ImmTyEndpgm,
189   };
190 
191   enum ImmKindTy {
192     ImmKindTyNone,
193     ImmKindTyLiteral,
194     ImmKindTyConst,
195   };
196 
197 private:
198   struct TokOp {
199     const char *Data;
200     unsigned Length;
201   };
202 
203   struct ImmOp {
204     int64_t Val;
205     ImmTy Type;
206     bool IsFPImm;
207     mutable ImmKindTy Kind;
208     Modifiers Mods;
209   };
210 
211   struct RegOp {
212     unsigned RegNo;
213     Modifiers Mods;
214   };
215 
216   union {
217     TokOp Tok;
218     ImmOp Imm;
219     RegOp Reg;
220     const MCExpr *Expr;
221   };
222 
223 public:
224   bool isToken() const override {
225     if (Kind == Token)
226       return true;
227 
228     // When parsing operands, we can't always tell if something was meant to be
229     // a token, like 'gds', or an expression that references a global variable.
230     // In this case, we assume the string is an expression, and if we need to
231     // interpret is a token, then we treat the symbol name as the token.
232     return isSymbolRefExpr();
233   }
234 
235   bool isSymbolRefExpr() const {
236     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
237   }
238 
239   bool isImm() const override {
240     return Kind == Immediate;
241   }
242 
243   void setImmKindNone() const {
244     assert(isImm());
245     Imm.Kind = ImmKindTyNone;
246   }
247 
248   void setImmKindLiteral() const {
249     assert(isImm());
250     Imm.Kind = ImmKindTyLiteral;
251   }
252 
253   void setImmKindConst() const {
254     assert(isImm());
255     Imm.Kind = ImmKindTyConst;
256   }
257 
258   bool IsImmKindLiteral() const {
259     return isImm() && Imm.Kind == ImmKindTyLiteral;
260   }
261 
262   bool isImmKindConst() const {
263     return isImm() && Imm.Kind == ImmKindTyConst;
264   }
265 
266   bool isInlinableImm(MVT type) const;
267   bool isLiteralImm(MVT type) const;
268 
269   bool isRegKind() const {
270     return Kind == Register;
271   }
272 
273   bool isReg() const override {
274     return isRegKind() && !hasModifiers();
275   }
276 
277   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
278     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
279   }
280 
281   bool isRegOrImmWithInt16InputMods() const {
282     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
283   }
284 
285   bool isRegOrImmWithInt32InputMods() const {
286     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
287   }
288 
289   bool isRegOrImmWithInt64InputMods() const {
290     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
291   }
292 
293   bool isRegOrImmWithFP16InputMods() const {
294     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
295   }
296 
297   bool isRegOrImmWithFP32InputMods() const {
298     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
299   }
300 
301   bool isRegOrImmWithFP64InputMods() const {
302     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
303   }
304 
305   bool isVReg() const {
306     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
307            isRegClass(AMDGPU::VReg_64RegClassID) ||
308            isRegClass(AMDGPU::VReg_96RegClassID) ||
309            isRegClass(AMDGPU::VReg_128RegClassID) ||
310            isRegClass(AMDGPU::VReg_160RegClassID) ||
311            isRegClass(AMDGPU::VReg_192RegClassID) ||
312            isRegClass(AMDGPU::VReg_256RegClassID) ||
313            isRegClass(AMDGPU::VReg_512RegClassID) ||
314            isRegClass(AMDGPU::VReg_1024RegClassID);
315   }
316 
317   bool isVReg32() const {
318     return isRegClass(AMDGPU::VGPR_32RegClassID);
319   }
320 
321   bool isVReg32OrOff() const {
322     return isOff() || isVReg32();
323   }
324 
325   bool isNull() const {
326     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
327   }
328 
329   bool isSDWAOperand(MVT type) const;
330   bool isSDWAFP16Operand() const;
331   bool isSDWAFP32Operand() const;
332   bool isSDWAInt16Operand() const;
333   bool isSDWAInt32Operand() const;
334 
335   bool isImmTy(ImmTy ImmT) const {
336     return isImm() && Imm.Type == ImmT;
337   }
338 
339   bool isImmModifier() const {
340     return isImm() && Imm.Type != ImmTyNone;
341   }
342 
343   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
344   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
345   bool isDMask() const { return isImmTy(ImmTyDMask); }
346   bool isDim() const { return isImmTy(ImmTyDim); }
347   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
348   bool isDA() const { return isImmTy(ImmTyDA); }
349   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
350   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
351   bool isLWE() const { return isImmTy(ImmTyLWE); }
352   bool isOff() const { return isImmTy(ImmTyOff); }
353   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
354   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
355   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
356   bool isOffen() const { return isImmTy(ImmTyOffen); }
357   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
358   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
359   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
360   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
361   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
362 
363   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
364   bool isGDS() const { return isImmTy(ImmTyGDS); }
365   bool isLDS() const { return isImmTy(ImmTyLDS); }
366   bool isDLC() const { return isImmTy(ImmTyDLC); }
367   bool isGLC() const { return isImmTy(ImmTyGLC); }
368   // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced
369   // value of the GLC operand.
370   bool isGLC_1() const { return isImmTy(ImmTyGLC); }
371   bool isSLC() const { return isImmTy(ImmTySLC); }
372   bool isSWZ() const { return isImmTy(ImmTySWZ); }
373   bool isTFE() const { return isImmTy(ImmTyTFE); }
374   bool isD16() const { return isImmTy(ImmTyD16); }
375   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
376   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
377   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
378   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
379   bool isFI() const { return isImmTy(ImmTyDppFi); }
380   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
381   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
382   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
383   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
384   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
385   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
386   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
387   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
388   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
389   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
390   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
391   bool isHigh() const { return isImmTy(ImmTyHigh); }
392 
393   bool isMod() const {
394     return isClampSI() || isOModSI();
395   }
396 
397   bool isRegOrImm() const {
398     return isReg() || isImm();
399   }
400 
401   bool isRegClass(unsigned RCID) const;
402 
403   bool isInlineValue() const;
404 
405   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
406     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
407   }
408 
409   bool isSCSrcB16() const {
410     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
411   }
412 
413   bool isSCSrcV2B16() const {
414     return isSCSrcB16();
415   }
416 
417   bool isSCSrcB32() const {
418     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
419   }
420 
421   bool isSCSrcB64() const {
422     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
423   }
424 
425   bool isBoolReg() const;
426 
427   bool isSCSrcF16() const {
428     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
429   }
430 
431   bool isSCSrcV2F16() const {
432     return isSCSrcF16();
433   }
434 
435   bool isSCSrcF32() const {
436     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
437   }
438 
439   bool isSCSrcF64() const {
440     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
441   }
442 
443   bool isSSrcB32() const {
444     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
445   }
446 
447   bool isSSrcB16() const {
448     return isSCSrcB16() || isLiteralImm(MVT::i16);
449   }
450 
451   bool isSSrcV2B16() const {
452     llvm_unreachable("cannot happen");
453     return isSSrcB16();
454   }
455 
456   bool isSSrcB64() const {
457     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
458     // See isVSrc64().
459     return isSCSrcB64() || isLiteralImm(MVT::i64);
460   }
461 
462   bool isSSrcF32() const {
463     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
464   }
465 
466   bool isSSrcF64() const {
467     return isSCSrcB64() || isLiteralImm(MVT::f64);
468   }
469 
470   bool isSSrcF16() const {
471     return isSCSrcB16() || isLiteralImm(MVT::f16);
472   }
473 
474   bool isSSrcV2F16() const {
475     llvm_unreachable("cannot happen");
476     return isSSrcF16();
477   }
478 
479   bool isSSrcOrLdsB32() const {
480     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
481            isLiteralImm(MVT::i32) || isExpr();
482   }
483 
484   bool isVCSrcB32() const {
485     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
486   }
487 
488   bool isVCSrcB64() const {
489     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
490   }
491 
492   bool isVCSrcB16() const {
493     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
494   }
495 
496   bool isVCSrcV2B16() const {
497     return isVCSrcB16();
498   }
499 
500   bool isVCSrcF32() const {
501     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
502   }
503 
504   bool isVCSrcF64() const {
505     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
506   }
507 
508   bool isVCSrcF16() const {
509     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
510   }
511 
512   bool isVCSrcV2F16() const {
513     return isVCSrcF16();
514   }
515 
516   bool isVSrcB32() const {
517     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
518   }
519 
520   bool isVSrcB64() const {
521     return isVCSrcF64() || isLiteralImm(MVT::i64);
522   }
523 
524   bool isVSrcB16() const {
525     return isVCSrcB16() || isLiteralImm(MVT::i16);
526   }
527 
528   bool isVSrcV2B16() const {
529     return isVSrcB16() || isLiteralImm(MVT::v2i16);
530   }
531 
532   bool isVSrcF32() const {
533     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
534   }
535 
536   bool isVSrcF64() const {
537     return isVCSrcF64() || isLiteralImm(MVT::f64);
538   }
539 
540   bool isVSrcF16() const {
541     return isVCSrcF16() || isLiteralImm(MVT::f16);
542   }
543 
544   bool isVSrcV2F16() const {
545     return isVSrcF16() || isLiteralImm(MVT::v2f16);
546   }
547 
548   bool isVISrcB32() const {
549     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
550   }
551 
552   bool isVISrcB16() const {
553     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
554   }
555 
556   bool isVISrcV2B16() const {
557     return isVISrcB16();
558   }
559 
560   bool isVISrcF32() const {
561     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
562   }
563 
564   bool isVISrcF16() const {
565     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
566   }
567 
568   bool isVISrcV2F16() const {
569     return isVISrcF16() || isVISrcB32();
570   }
571 
572   bool isAISrcB32() const {
573     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
574   }
575 
576   bool isAISrcB16() const {
577     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
578   }
579 
580   bool isAISrcV2B16() const {
581     return isAISrcB16();
582   }
583 
584   bool isAISrcF32() const {
585     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
586   }
587 
588   bool isAISrcF16() const {
589     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
590   }
591 
592   bool isAISrcV2F16() const {
593     return isAISrcF16() || isAISrcB32();
594   }
595 
596   bool isAISrc_128B32() const {
597     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
598   }
599 
600   bool isAISrc_128B16() const {
601     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
602   }
603 
604   bool isAISrc_128V2B16() const {
605     return isAISrc_128B16();
606   }
607 
608   bool isAISrc_128F32() const {
609     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
610   }
611 
612   bool isAISrc_128F16() const {
613     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
614   }
615 
616   bool isAISrc_128V2F16() const {
617     return isAISrc_128F16() || isAISrc_128B32();
618   }
619 
620   bool isAISrc_512B32() const {
621     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
622   }
623 
624   bool isAISrc_512B16() const {
625     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
626   }
627 
628   bool isAISrc_512V2B16() const {
629     return isAISrc_512B16();
630   }
631 
632   bool isAISrc_512F32() const {
633     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
634   }
635 
636   bool isAISrc_512F16() const {
637     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
638   }
639 
640   bool isAISrc_512V2F16() const {
641     return isAISrc_512F16() || isAISrc_512B32();
642   }
643 
644   bool isAISrc_1024B32() const {
645     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
646   }
647 
648   bool isAISrc_1024B16() const {
649     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
650   }
651 
652   bool isAISrc_1024V2B16() const {
653     return isAISrc_1024B16();
654   }
655 
656   bool isAISrc_1024F32() const {
657     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
658   }
659 
660   bool isAISrc_1024F16() const {
661     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
662   }
663 
664   bool isAISrc_1024V2F16() const {
665     return isAISrc_1024F16() || isAISrc_1024B32();
666   }
667 
668   bool isKImmFP32() const {
669     return isLiteralImm(MVT::f32);
670   }
671 
672   bool isKImmFP16() const {
673     return isLiteralImm(MVT::f16);
674   }
675 
676   bool isMem() const override {
677     return false;
678   }
679 
680   bool isExpr() const {
681     return Kind == Expression;
682   }
683 
684   bool isSoppBrTarget() const {
685     return isExpr() || isImm();
686   }
687 
688   bool isSWaitCnt() const;
689   bool isHwreg() const;
690   bool isSendMsg() const;
691   bool isSwizzle() const;
692   bool isSMRDOffset8() const;
693   bool isSMEMOffset() const;
694   bool isSMRDLiteralOffset() const;
695   bool isDPP8() const;
696   bool isDPPCtrl() const;
697   bool isBLGP() const;
698   bool isCBSZ() const;
699   bool isABID() const;
700   bool isGPRIdxMode() const;
701   bool isS16Imm() const;
702   bool isU16Imm() const;
703   bool isEndpgm() const;
704 
705   StringRef getExpressionAsToken() const {
706     assert(isExpr());
707     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
708     return S->getSymbol().getName();
709   }
710 
711   StringRef getToken() const {
712     assert(isToken());
713 
714     if (Kind == Expression)
715       return getExpressionAsToken();
716 
717     return StringRef(Tok.Data, Tok.Length);
718   }
719 
720   int64_t getImm() const {
721     assert(isImm());
722     return Imm.Val;
723   }
724 
725   void setImm(int64_t Val) {
726     assert(isImm());
727     Imm.Val = Val;
728   }
729 
730   ImmTy getImmTy() const {
731     assert(isImm());
732     return Imm.Type;
733   }
734 
735   unsigned getReg() const override {
736     assert(isRegKind());
737     return Reg.RegNo;
738   }
739 
740   SMLoc getStartLoc() const override {
741     return StartLoc;
742   }
743 
744   SMLoc getEndLoc() const override {
745     return EndLoc;
746   }
747 
748   SMRange getLocRange() const {
749     return SMRange(StartLoc, EndLoc);
750   }
751 
752   Modifiers getModifiers() const {
753     assert(isRegKind() || isImmTy(ImmTyNone));
754     return isRegKind() ? Reg.Mods : Imm.Mods;
755   }
756 
757   void setModifiers(Modifiers Mods) {
758     assert(isRegKind() || isImmTy(ImmTyNone));
759     if (isRegKind())
760       Reg.Mods = Mods;
761     else
762       Imm.Mods = Mods;
763   }
764 
765   bool hasModifiers() const {
766     return getModifiers().hasModifiers();
767   }
768 
769   bool hasFPModifiers() const {
770     return getModifiers().hasFPModifiers();
771   }
772 
773   bool hasIntModifiers() const {
774     return getModifiers().hasIntModifiers();
775   }
776 
777   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
778 
779   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
780 
781   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
782 
783   template <unsigned Bitwidth>
784   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
785 
786   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
787     addKImmFPOperands<16>(Inst, N);
788   }
789 
790   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
791     addKImmFPOperands<32>(Inst, N);
792   }
793 
794   void addRegOperands(MCInst &Inst, unsigned N) const;
795 
796   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
797     addRegOperands(Inst, N);
798   }
799 
800   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
801     if (isRegKind())
802       addRegOperands(Inst, N);
803     else if (isExpr())
804       Inst.addOperand(MCOperand::createExpr(Expr));
805     else
806       addImmOperands(Inst, N);
807   }
808 
809   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
810     Modifiers Mods = getModifiers();
811     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
812     if (isRegKind()) {
813       addRegOperands(Inst, N);
814     } else {
815       addImmOperands(Inst, N, false);
816     }
817   }
818 
819   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
820     assert(!hasIntModifiers());
821     addRegOrImmWithInputModsOperands(Inst, N);
822   }
823 
824   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
825     assert(!hasFPModifiers());
826     addRegOrImmWithInputModsOperands(Inst, N);
827   }
828 
829   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
830     Modifiers Mods = getModifiers();
831     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
832     assert(isRegKind());
833     addRegOperands(Inst, N);
834   }
835 
836   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
837     assert(!hasIntModifiers());
838     addRegWithInputModsOperands(Inst, N);
839   }
840 
841   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
842     assert(!hasFPModifiers());
843     addRegWithInputModsOperands(Inst, N);
844   }
845 
846   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
847     if (isImm())
848       addImmOperands(Inst, N);
849     else {
850       assert(isExpr());
851       Inst.addOperand(MCOperand::createExpr(Expr));
852     }
853   }
854 
855   static void printImmTy(raw_ostream& OS, ImmTy Type) {
856     switch (Type) {
857     case ImmTyNone: OS << "None"; break;
858     case ImmTyGDS: OS << "GDS"; break;
859     case ImmTyLDS: OS << "LDS"; break;
860     case ImmTyOffen: OS << "Offen"; break;
861     case ImmTyIdxen: OS << "Idxen"; break;
862     case ImmTyAddr64: OS << "Addr64"; break;
863     case ImmTyOffset: OS << "Offset"; break;
864     case ImmTyInstOffset: OS << "InstOffset"; break;
865     case ImmTyOffset0: OS << "Offset0"; break;
866     case ImmTyOffset1: OS << "Offset1"; break;
867     case ImmTyDLC: OS << "DLC"; break;
868     case ImmTyGLC: OS << "GLC"; break;
869     case ImmTySLC: OS << "SLC"; break;
870     case ImmTySWZ: OS << "SWZ"; break;
871     case ImmTyTFE: OS << "TFE"; break;
872     case ImmTyD16: OS << "D16"; break;
873     case ImmTyFORMAT: OS << "FORMAT"; break;
874     case ImmTyClampSI: OS << "ClampSI"; break;
875     case ImmTyOModSI: OS << "OModSI"; break;
876     case ImmTyDPP8: OS << "DPP8"; break;
877     case ImmTyDppCtrl: OS << "DppCtrl"; break;
878     case ImmTyDppRowMask: OS << "DppRowMask"; break;
879     case ImmTyDppBankMask: OS << "DppBankMask"; break;
880     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
881     case ImmTyDppFi: OS << "FI"; break;
882     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
883     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
884     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
885     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
886     case ImmTyDMask: OS << "DMask"; break;
887     case ImmTyDim: OS << "Dim"; break;
888     case ImmTyUNorm: OS << "UNorm"; break;
889     case ImmTyDA: OS << "DA"; break;
890     case ImmTyR128A16: OS << "R128A16"; break;
891     case ImmTyA16: OS << "A16"; break;
892     case ImmTyLWE: OS << "LWE"; break;
893     case ImmTyOff: OS << "Off"; break;
894     case ImmTyExpTgt: OS << "ExpTgt"; break;
895     case ImmTyExpCompr: OS << "ExpCompr"; break;
896     case ImmTyExpVM: OS << "ExpVM"; break;
897     case ImmTyHwreg: OS << "Hwreg"; break;
898     case ImmTySendMsg: OS << "SendMsg"; break;
899     case ImmTyInterpSlot: OS << "InterpSlot"; break;
900     case ImmTyInterpAttr: OS << "InterpAttr"; break;
901     case ImmTyAttrChan: OS << "AttrChan"; break;
902     case ImmTyOpSel: OS << "OpSel"; break;
903     case ImmTyOpSelHi: OS << "OpSelHi"; break;
904     case ImmTyNegLo: OS << "NegLo"; break;
905     case ImmTyNegHi: OS << "NegHi"; break;
906     case ImmTySwizzle: OS << "Swizzle"; break;
907     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
908     case ImmTyHigh: OS << "High"; break;
909     case ImmTyBLGP: OS << "BLGP"; break;
910     case ImmTyCBSZ: OS << "CBSZ"; break;
911     case ImmTyABID: OS << "ABID"; break;
912     case ImmTyEndpgm: OS << "Endpgm"; break;
913     }
914   }
915 
916   void print(raw_ostream &OS) const override {
917     switch (Kind) {
918     case Register:
919       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
920       break;
921     case Immediate:
922       OS << '<' << getImm();
923       if (getImmTy() != ImmTyNone) {
924         OS << " type: "; printImmTy(OS, getImmTy());
925       }
926       OS << " mods: " << Imm.Mods << '>';
927       break;
928     case Token:
929       OS << '\'' << getToken() << '\'';
930       break;
931     case Expression:
932       OS << "<expr " << *Expr << '>';
933       break;
934     }
935   }
936 
937   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
938                                       int64_t Val, SMLoc Loc,
939                                       ImmTy Type = ImmTyNone,
940                                       bool IsFPImm = false) {
941     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
942     Op->Imm.Val = Val;
943     Op->Imm.IsFPImm = IsFPImm;
944     Op->Imm.Kind = ImmKindTyNone;
945     Op->Imm.Type = Type;
946     Op->Imm.Mods = Modifiers();
947     Op->StartLoc = Loc;
948     Op->EndLoc = Loc;
949     return Op;
950   }
951 
952   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
953                                         StringRef Str, SMLoc Loc,
954                                         bool HasExplicitEncodingSize = true) {
955     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
956     Res->Tok.Data = Str.data();
957     Res->Tok.Length = Str.size();
958     Res->StartLoc = Loc;
959     Res->EndLoc = Loc;
960     return Res;
961   }
962 
963   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
964                                       unsigned RegNo, SMLoc S,
965                                       SMLoc E) {
966     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
967     Op->Reg.RegNo = RegNo;
968     Op->Reg.Mods = Modifiers();
969     Op->StartLoc = S;
970     Op->EndLoc = E;
971     return Op;
972   }
973 
974   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
975                                        const class MCExpr *Expr, SMLoc S) {
976     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
977     Op->Expr = Expr;
978     Op->StartLoc = S;
979     Op->EndLoc = S;
980     return Op;
981   }
982 };
983 
984 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
985   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
986   return OS;
987 }
988 
989 //===----------------------------------------------------------------------===//
990 // AsmParser
991 //===----------------------------------------------------------------------===//
992 
993 // Holds info related to the current kernel, e.g. count of SGPRs used.
994 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
995 // .amdgpu_hsa_kernel or at EOF.
996 class KernelScopeInfo {
997   int SgprIndexUnusedMin = -1;
998   int VgprIndexUnusedMin = -1;
999   MCContext *Ctx = nullptr;
1000 
1001   void usesSgprAt(int i) {
1002     if (i >= SgprIndexUnusedMin) {
1003       SgprIndexUnusedMin = ++i;
1004       if (Ctx) {
1005         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1006         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1007       }
1008     }
1009   }
1010 
1011   void usesVgprAt(int i) {
1012     if (i >= VgprIndexUnusedMin) {
1013       VgprIndexUnusedMin = ++i;
1014       if (Ctx) {
1015         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1016         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1017       }
1018     }
1019   }
1020 
1021 public:
1022   KernelScopeInfo() = default;
1023 
1024   void initialize(MCContext &Context) {
1025     Ctx = &Context;
1026     usesSgprAt(SgprIndexUnusedMin = -1);
1027     usesVgprAt(VgprIndexUnusedMin = -1);
1028   }
1029 
1030   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1031     switch (RegKind) {
1032       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1033       case IS_AGPR: // fall through
1034       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1035       default: break;
1036     }
1037   }
1038 };
1039 
1040 class AMDGPUAsmParser : public MCTargetAsmParser {
1041   MCAsmParser &Parser;
1042 
1043   // Number of extra operands parsed after the first optional operand.
1044   // This may be necessary to skip hardcoded mandatory operands.
1045   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1046 
1047   unsigned ForcedEncodingSize = 0;
1048   bool ForcedDPP = false;
1049   bool ForcedSDWA = false;
1050   KernelScopeInfo KernelScope;
1051 
1052   /// @name Auto-generated Match Functions
1053   /// {
1054 
1055 #define GET_ASSEMBLER_HEADER
1056 #include "AMDGPUGenAsmMatcher.inc"
1057 
1058   /// }
1059 
1060 private:
1061   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1062   bool OutOfRangeError(SMRange Range);
1063   /// Calculate VGPR/SGPR blocks required for given target, reserved
1064   /// registers, and user-specified NextFreeXGPR values.
1065   ///
1066   /// \param Features [in] Target features, used for bug corrections.
1067   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1068   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1069   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1070   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1071   /// descriptor field, if valid.
1072   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1073   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1074   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1075   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1076   /// \param VGPRBlocks [out] Result VGPR block count.
1077   /// \param SGPRBlocks [out] Result SGPR block count.
1078   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1079                           bool FlatScrUsed, bool XNACKUsed,
1080                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1081                           SMRange VGPRRange, unsigned NextFreeSGPR,
1082                           SMRange SGPRRange, unsigned &VGPRBlocks,
1083                           unsigned &SGPRBlocks);
1084   bool ParseDirectiveAMDGCNTarget();
1085   bool ParseDirectiveAMDHSAKernel();
1086   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1087   bool ParseDirectiveHSACodeObjectVersion();
1088   bool ParseDirectiveHSACodeObjectISA();
1089   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1090   bool ParseDirectiveAMDKernelCodeT();
1091   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1092   bool ParseDirectiveAMDGPUHsaKernel();
1093 
1094   bool ParseDirectiveISAVersion();
1095   bool ParseDirectiveHSAMetadata();
1096   bool ParseDirectivePALMetadataBegin();
1097   bool ParseDirectivePALMetadata();
1098   bool ParseDirectiveAMDGPULDS();
1099 
1100   /// Common code to parse out a block of text (typically YAML) between start and
1101   /// end directives.
1102   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1103                            const char *AssemblerDirectiveEnd,
1104                            std::string &CollectString);
1105 
1106   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1107                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1108   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1109                            unsigned &RegNum, unsigned &RegWidth,
1110                            bool RestoreOnFailure = false);
1111   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1112                            unsigned &RegNum, unsigned &RegWidth,
1113                            SmallVectorImpl<AsmToken> &Tokens);
1114   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1115                            unsigned &RegWidth,
1116                            SmallVectorImpl<AsmToken> &Tokens);
1117   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1118                            unsigned &RegWidth,
1119                            SmallVectorImpl<AsmToken> &Tokens);
1120   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1121                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1122   bool ParseRegRange(unsigned& Num, unsigned& Width);
1123   unsigned getRegularReg(RegisterKind RegKind,
1124                          unsigned RegNum,
1125                          unsigned RegWidth,
1126                          SMLoc Loc);
1127 
1128   bool isRegister();
1129   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1130   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1131   void initializeGprCountSymbol(RegisterKind RegKind);
1132   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1133                              unsigned RegWidth);
1134   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1135                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1136   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1137                  bool IsGdsHardcoded);
1138 
1139 public:
1140   enum AMDGPUMatchResultTy {
1141     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1142   };
1143   enum OperandMode {
1144     OperandMode_Default,
1145     OperandMode_NSA,
1146   };
1147 
1148   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1149 
1150   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1151                const MCInstrInfo &MII,
1152                const MCTargetOptions &Options)
1153       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1154     MCAsmParserExtension::Initialize(Parser);
1155 
1156     if (getFeatureBits().none()) {
1157       // Set default features.
1158       copySTI().ToggleFeature("southern-islands");
1159     }
1160 
1161     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1162 
1163     {
1164       // TODO: make those pre-defined variables read-only.
1165       // Currently there is none suitable machinery in the core llvm-mc for this.
1166       // MCSymbol::isRedefinable is intended for another purpose, and
1167       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1168       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1169       MCContext &Ctx = getContext();
1170       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1171         MCSymbol *Sym =
1172             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1173         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1174         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1175         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1176         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1177         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1178       } else {
1179         MCSymbol *Sym =
1180             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1181         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1182         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1183         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1184         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1185         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1186       }
1187       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1188         initializeGprCountSymbol(IS_VGPR);
1189         initializeGprCountSymbol(IS_SGPR);
1190       } else
1191         KernelScope.initialize(getContext());
1192     }
1193   }
1194 
1195   bool hasXNACK() const {
1196     return AMDGPU::hasXNACK(getSTI());
1197   }
1198 
1199   bool hasMIMG_R128() const {
1200     return AMDGPU::hasMIMG_R128(getSTI());
1201   }
1202 
1203   bool hasPackedD16() const {
1204     return AMDGPU::hasPackedD16(getSTI());
1205   }
1206 
1207   bool hasGFX10A16() const {
1208     return AMDGPU::hasGFX10A16(getSTI());
1209   }
1210 
1211   bool isSI() const {
1212     return AMDGPU::isSI(getSTI());
1213   }
1214 
1215   bool isCI() const {
1216     return AMDGPU::isCI(getSTI());
1217   }
1218 
1219   bool isVI() const {
1220     return AMDGPU::isVI(getSTI());
1221   }
1222 
1223   bool isGFX9() const {
1224     return AMDGPU::isGFX9(getSTI());
1225   }
1226 
1227   bool isGFX9Plus() const {
1228     return AMDGPU::isGFX9Plus(getSTI());
1229   }
1230 
1231   bool isGFX10() const {
1232     return AMDGPU::isGFX10(getSTI());
1233   }
1234 
1235   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1236 
1237   bool isGFX10_BEncoding() const {
1238     return AMDGPU::isGFX10_BEncoding(getSTI());
1239   }
1240 
1241   bool hasInv2PiInlineImm() const {
1242     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1243   }
1244 
1245   bool hasFlatOffsets() const {
1246     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1247   }
1248 
1249   bool hasSGPR102_SGPR103() const {
1250     return !isVI() && !isGFX9();
1251   }
1252 
1253   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1254 
1255   bool hasIntClamp() const {
1256     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1257   }
1258 
1259   AMDGPUTargetStreamer &getTargetStreamer() {
1260     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1261     return static_cast<AMDGPUTargetStreamer &>(TS);
1262   }
1263 
1264   const MCRegisterInfo *getMRI() const {
1265     // We need this const_cast because for some reason getContext() is not const
1266     // in MCAsmParser.
1267     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1268   }
1269 
1270   const MCInstrInfo *getMII() const {
1271     return &MII;
1272   }
1273 
1274   const FeatureBitset &getFeatureBits() const {
1275     return getSTI().getFeatureBits();
1276   }
1277 
1278   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1279   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1280   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1281 
1282   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1283   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1284   bool isForcedDPP() const { return ForcedDPP; }
1285   bool isForcedSDWA() const { return ForcedSDWA; }
1286   ArrayRef<unsigned> getMatchedVariants() const;
1287   StringRef getMatchedVariantName() const;
1288 
1289   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1290   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1291                      bool RestoreOnFailure);
1292   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1293   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1294                                         SMLoc &EndLoc) override;
1295   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1296   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1297                                       unsigned Kind) override;
1298   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1299                                OperandVector &Operands, MCStreamer &Out,
1300                                uint64_t &ErrorInfo,
1301                                bool MatchingInlineAsm) override;
1302   bool ParseDirective(AsmToken DirectiveID) override;
1303   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1304                                     OperandMode Mode = OperandMode_Default);
1305   StringRef parseMnemonicSuffix(StringRef Name);
1306   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1307                         SMLoc NameLoc, OperandVector &Operands) override;
1308   //bool ProcessInstruction(MCInst &Inst);
1309 
1310   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1311 
1312   OperandMatchResultTy
1313   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1314                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1315                      bool (*ConvertResult)(int64_t &) = nullptr);
1316 
1317   OperandMatchResultTy
1318   parseOperandArrayWithPrefix(const char *Prefix,
1319                               OperandVector &Operands,
1320                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1321                               bool (*ConvertResult)(int64_t&) = nullptr);
1322 
1323   OperandMatchResultTy
1324   parseNamedBit(const char *Name, OperandVector &Operands,
1325                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1326   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1327                                              StringRef &Value);
1328 
1329   bool isModifier();
1330   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1331   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1332   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1333   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1334   bool parseSP3NegModifier();
1335   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1336   OperandMatchResultTy parseReg(OperandVector &Operands);
1337   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1338   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1339   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1340   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1341   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1342   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1343   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1344   OperandMatchResultTy parseUfmt(int64_t &Format);
1345   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1346   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1347   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1348   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1349   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1350   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1351   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1352 
1353   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1354   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1355   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1356   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1357 
1358   bool parseCnt(int64_t &IntVal);
1359   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1360   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1361 
1362 private:
1363   struct OperandInfoTy {
1364     SMLoc Loc;
1365     int64_t Id;
1366     bool IsSymbolic = false;
1367     bool IsDefined = false;
1368 
1369     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1370   };
1371 
1372   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1373   bool validateSendMsg(const OperandInfoTy &Msg,
1374                        const OperandInfoTy &Op,
1375                        const OperandInfoTy &Stream);
1376 
1377   bool parseHwregBody(OperandInfoTy &HwReg,
1378                       OperandInfoTy &Offset,
1379                       OperandInfoTy &Width);
1380   bool validateHwreg(const OperandInfoTy &HwReg,
1381                      const OperandInfoTy &Offset,
1382                      const OperandInfoTy &Width);
1383 
1384   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1385   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1386   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1387 
1388   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1389                       const OperandVector &Operands) const;
1390   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1391   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1392   SMLoc getLitLoc(const OperandVector &Operands) const;
1393   SMLoc getConstLoc(const OperandVector &Operands) const;
1394 
1395   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1396   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1397   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1398   bool validateSOPLiteral(const MCInst &Inst) const;
1399   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1400   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1401   bool validateIntClampSupported(const MCInst &Inst);
1402   bool validateMIMGAtomicDMask(const MCInst &Inst);
1403   bool validateMIMGGatherDMask(const MCInst &Inst);
1404   bool validateMovrels(const MCInst &Inst);
1405   bool validateMIMGDataSize(const MCInst &Inst);
1406   bool validateMIMGAddrSize(const MCInst &Inst);
1407   bool validateMIMGD16(const MCInst &Inst);
1408   bool validateMIMGDim(const MCInst &Inst);
1409   bool validateLdsDirect(const MCInst &Inst);
1410   bool validateOpSel(const MCInst &Inst);
1411   bool validateVccOperand(unsigned Reg) const;
1412   bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1413   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1414   bool validateDivScale(const MCInst &Inst);
1415   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1416                              const SMLoc &IDLoc);
1417   unsigned getConstantBusLimit(unsigned Opcode) const;
1418   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1419   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1420   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1421 
1422   bool isSupportedMnemo(StringRef Mnemo,
1423                         const FeatureBitset &FBS);
1424   bool isSupportedMnemo(StringRef Mnemo,
1425                         const FeatureBitset &FBS,
1426                         ArrayRef<unsigned> Variants);
1427   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1428 
1429   bool isId(const StringRef Id) const;
1430   bool isId(const AsmToken &Token, const StringRef Id) const;
1431   bool isToken(const AsmToken::TokenKind Kind) const;
1432   bool trySkipId(const StringRef Id);
1433   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1434   bool trySkipToken(const AsmToken::TokenKind Kind);
1435   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1436   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1437   bool parseId(StringRef &Val, const StringRef ErrMsg);
1438 
1439   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1440   AsmToken::TokenKind getTokenKind() const;
1441   bool parseExpr(int64_t &Imm);
1442   bool parseExpr(OperandVector &Operands);
1443   StringRef getTokenStr() const;
1444   AsmToken peekToken();
1445   AsmToken getToken() const;
1446   SMLoc getLoc() const;
1447   void lex();
1448 
1449 public:
1450   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1451   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1452 
1453   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1454   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1455   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1456   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1457   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1458   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1459 
1460   bool parseSwizzleOperand(int64_t &Op,
1461                            const unsigned MinVal,
1462                            const unsigned MaxVal,
1463                            const StringRef ErrMsg,
1464                            SMLoc &Loc);
1465   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1466                             const unsigned MinVal,
1467                             const unsigned MaxVal,
1468                             const StringRef ErrMsg);
1469   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1470   bool parseSwizzleOffset(int64_t &Imm);
1471   bool parseSwizzleMacro(int64_t &Imm);
1472   bool parseSwizzleQuadPerm(int64_t &Imm);
1473   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1474   bool parseSwizzleBroadcast(int64_t &Imm);
1475   bool parseSwizzleSwap(int64_t &Imm);
1476   bool parseSwizzleReverse(int64_t &Imm);
1477 
1478   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1479   int64_t parseGPRIdxMacro();
1480 
1481   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1482   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1483   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1484   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1485   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1486 
1487   AMDGPUOperand::Ptr defaultDLC() const;
1488   AMDGPUOperand::Ptr defaultGLC() const;
1489   AMDGPUOperand::Ptr defaultGLC_1() const;
1490   AMDGPUOperand::Ptr defaultSLC() const;
1491 
1492   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1493   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1494   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1495   AMDGPUOperand::Ptr defaultFlatOffset() const;
1496 
1497   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1498 
1499   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1500                OptionalImmIndexMap &OptionalIdx);
1501   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1502   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1503   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1504 
1505   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1506 
1507   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1508                bool IsAtomic = false);
1509   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1510   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1511 
1512   OperandMatchResultTy parseDim(OperandVector &Operands);
1513   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1514   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1515   AMDGPUOperand::Ptr defaultRowMask() const;
1516   AMDGPUOperand::Ptr defaultBankMask() const;
1517   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1518   AMDGPUOperand::Ptr defaultFI() const;
1519   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1520   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1521 
1522   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1523                                     AMDGPUOperand::ImmTy Type);
1524   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1525   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1526   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1527   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1528   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1529   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1530   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1531                uint64_t BasicInstType,
1532                bool SkipDstVcc = false,
1533                bool SkipSrcVcc = false);
1534 
1535   AMDGPUOperand::Ptr defaultBLGP() const;
1536   AMDGPUOperand::Ptr defaultCBSZ() const;
1537   AMDGPUOperand::Ptr defaultABID() const;
1538 
1539   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1540   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1541 };
1542 
1543 struct OptionalOperand {
1544   const char *Name;
1545   AMDGPUOperand::ImmTy Type;
1546   bool IsBit;
1547   bool (*ConvertResult)(int64_t&);
1548 };
1549 
1550 } // end anonymous namespace
1551 
1552 // May be called with integer type with equivalent bitwidth.
1553 static const fltSemantics *getFltSemantics(unsigned Size) {
1554   switch (Size) {
1555   case 4:
1556     return &APFloat::IEEEsingle();
1557   case 8:
1558     return &APFloat::IEEEdouble();
1559   case 2:
1560     return &APFloat::IEEEhalf();
1561   default:
1562     llvm_unreachable("unsupported fp type");
1563   }
1564 }
1565 
1566 static const fltSemantics *getFltSemantics(MVT VT) {
1567   return getFltSemantics(VT.getSizeInBits() / 8);
1568 }
1569 
1570 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1571   switch (OperandType) {
1572   case AMDGPU::OPERAND_REG_IMM_INT32:
1573   case AMDGPU::OPERAND_REG_IMM_FP32:
1574   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1575   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1576   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1577   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1578     return &APFloat::IEEEsingle();
1579   case AMDGPU::OPERAND_REG_IMM_INT64:
1580   case AMDGPU::OPERAND_REG_IMM_FP64:
1581   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1582   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1583     return &APFloat::IEEEdouble();
1584   case AMDGPU::OPERAND_REG_IMM_INT16:
1585   case AMDGPU::OPERAND_REG_IMM_FP16:
1586   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1587   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1588   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1589   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1590   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1591   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1592   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1593   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1594   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1595   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1596     return &APFloat::IEEEhalf();
1597   default:
1598     llvm_unreachable("unsupported fp type");
1599   }
1600 }
1601 
1602 //===----------------------------------------------------------------------===//
1603 // Operand
1604 //===----------------------------------------------------------------------===//
1605 
1606 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1607   bool Lost;
1608 
1609   // Convert literal to single precision
1610   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1611                                                APFloat::rmNearestTiesToEven,
1612                                                &Lost);
1613   // We allow precision lost but not overflow or underflow
1614   if (Status != APFloat::opOK &&
1615       Lost &&
1616       ((Status & APFloat::opOverflow)  != 0 ||
1617        (Status & APFloat::opUnderflow) != 0)) {
1618     return false;
1619   }
1620 
1621   return true;
1622 }
1623 
1624 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1625   return isUIntN(Size, Val) || isIntN(Size, Val);
1626 }
1627 
1628 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1629   if (VT.getScalarType() == MVT::i16) {
1630     // FP immediate values are broken.
1631     return isInlinableIntLiteral(Val);
1632   }
1633 
1634   // f16/v2f16 operands work correctly for all values.
1635   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1636 }
1637 
1638 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1639 
1640   // This is a hack to enable named inline values like
1641   // shared_base with both 32-bit and 64-bit operands.
1642   // Note that these values are defined as
1643   // 32-bit operands only.
1644   if (isInlineValue()) {
1645     return true;
1646   }
1647 
1648   if (!isImmTy(ImmTyNone)) {
1649     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1650     return false;
1651   }
1652   // TODO: We should avoid using host float here. It would be better to
1653   // check the float bit values which is what a few other places do.
1654   // We've had bot failures before due to weird NaN support on mips hosts.
1655 
1656   APInt Literal(64, Imm.Val);
1657 
1658   if (Imm.IsFPImm) { // We got fp literal token
1659     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1660       return AMDGPU::isInlinableLiteral64(Imm.Val,
1661                                           AsmParser->hasInv2PiInlineImm());
1662     }
1663 
1664     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1665     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1666       return false;
1667 
1668     if (type.getScalarSizeInBits() == 16) {
1669       return isInlineableLiteralOp16(
1670         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1671         type, AsmParser->hasInv2PiInlineImm());
1672     }
1673 
1674     // Check if single precision literal is inlinable
1675     return AMDGPU::isInlinableLiteral32(
1676       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1677       AsmParser->hasInv2PiInlineImm());
1678   }
1679 
1680   // We got int literal token.
1681   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1682     return AMDGPU::isInlinableLiteral64(Imm.Val,
1683                                         AsmParser->hasInv2PiInlineImm());
1684   }
1685 
1686   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1687     return false;
1688   }
1689 
1690   if (type.getScalarSizeInBits() == 16) {
1691     return isInlineableLiteralOp16(
1692       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1693       type, AsmParser->hasInv2PiInlineImm());
1694   }
1695 
1696   return AMDGPU::isInlinableLiteral32(
1697     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1698     AsmParser->hasInv2PiInlineImm());
1699 }
1700 
1701 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1702   // Check that this immediate can be added as literal
1703   if (!isImmTy(ImmTyNone)) {
1704     return false;
1705   }
1706 
1707   if (!Imm.IsFPImm) {
1708     // We got int literal token.
1709 
1710     if (type == MVT::f64 && hasFPModifiers()) {
1711       // Cannot apply fp modifiers to int literals preserving the same semantics
1712       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1713       // disable these cases.
1714       return false;
1715     }
1716 
1717     unsigned Size = type.getSizeInBits();
1718     if (Size == 64)
1719       Size = 32;
1720 
1721     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1722     // types.
1723     return isSafeTruncation(Imm.Val, Size);
1724   }
1725 
1726   // We got fp literal token
1727   if (type == MVT::f64) { // Expected 64-bit fp operand
1728     // We would set low 64-bits of literal to zeroes but we accept this literals
1729     return true;
1730   }
1731 
1732   if (type == MVT::i64) { // Expected 64-bit int operand
1733     // We don't allow fp literals in 64-bit integer instructions. It is
1734     // unclear how we should encode them.
1735     return false;
1736   }
1737 
1738   // We allow fp literals with f16x2 operands assuming that the specified
1739   // literal goes into the lower half and the upper half is zero. We also
1740   // require that the literal may be losslesly converted to f16.
1741   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1742                      (type == MVT::v2i16)? MVT::i16 : type;
1743 
1744   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1745   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1746 }
1747 
1748 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1749   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1750 }
1751 
1752 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1753   if (AsmParser->isVI())
1754     return isVReg32();
1755   else if (AsmParser->isGFX9Plus())
1756     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1757   else
1758     return false;
1759 }
1760 
1761 bool AMDGPUOperand::isSDWAFP16Operand() const {
1762   return isSDWAOperand(MVT::f16);
1763 }
1764 
1765 bool AMDGPUOperand::isSDWAFP32Operand() const {
1766   return isSDWAOperand(MVT::f32);
1767 }
1768 
1769 bool AMDGPUOperand::isSDWAInt16Operand() const {
1770   return isSDWAOperand(MVT::i16);
1771 }
1772 
1773 bool AMDGPUOperand::isSDWAInt32Operand() const {
1774   return isSDWAOperand(MVT::i32);
1775 }
1776 
1777 bool AMDGPUOperand::isBoolReg() const {
1778   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1779          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1780 }
1781 
1782 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1783 {
1784   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1785   assert(Size == 2 || Size == 4 || Size == 8);
1786 
1787   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1788 
1789   if (Imm.Mods.Abs) {
1790     Val &= ~FpSignMask;
1791   }
1792   if (Imm.Mods.Neg) {
1793     Val ^= FpSignMask;
1794   }
1795 
1796   return Val;
1797 }
1798 
1799 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1800   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1801                              Inst.getNumOperands())) {
1802     addLiteralImmOperand(Inst, Imm.Val,
1803                          ApplyModifiers &
1804                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1805   } else {
1806     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1807     Inst.addOperand(MCOperand::createImm(Imm.Val));
1808     setImmKindNone();
1809   }
1810 }
1811 
1812 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1813   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1814   auto OpNum = Inst.getNumOperands();
1815   // Check that this operand accepts literals
1816   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1817 
1818   if (ApplyModifiers) {
1819     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1820     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1821     Val = applyInputFPModifiers(Val, Size);
1822   }
1823 
1824   APInt Literal(64, Val);
1825   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1826 
1827   if (Imm.IsFPImm) { // We got fp literal token
1828     switch (OpTy) {
1829     case AMDGPU::OPERAND_REG_IMM_INT64:
1830     case AMDGPU::OPERAND_REG_IMM_FP64:
1831     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1832     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1833       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1834                                        AsmParser->hasInv2PiInlineImm())) {
1835         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1836         setImmKindConst();
1837         return;
1838       }
1839 
1840       // Non-inlineable
1841       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1842         // For fp operands we check if low 32 bits are zeros
1843         if (Literal.getLoBits(32) != 0) {
1844           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1845           "Can't encode literal as exact 64-bit floating-point operand. "
1846           "Low 32-bits will be set to zero");
1847         }
1848 
1849         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1850         setImmKindLiteral();
1851         return;
1852       }
1853 
1854       // We don't allow fp literals in 64-bit integer instructions. It is
1855       // unclear how we should encode them. This case should be checked earlier
1856       // in predicate methods (isLiteralImm())
1857       llvm_unreachable("fp literal in 64-bit integer instruction.");
1858 
1859     case AMDGPU::OPERAND_REG_IMM_INT32:
1860     case AMDGPU::OPERAND_REG_IMM_FP32:
1861     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1862     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1863     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1864     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1865     case AMDGPU::OPERAND_REG_IMM_INT16:
1866     case AMDGPU::OPERAND_REG_IMM_FP16:
1867     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1868     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1869     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1870     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1871     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1872     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1873     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1874     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1875     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1876     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1877       bool lost;
1878       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1879       // Convert literal to single precision
1880       FPLiteral.convert(*getOpFltSemantics(OpTy),
1881                         APFloat::rmNearestTiesToEven, &lost);
1882       // We allow precision lost but not overflow or underflow. This should be
1883       // checked earlier in isLiteralImm()
1884 
1885       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1886       Inst.addOperand(MCOperand::createImm(ImmVal));
1887       setImmKindLiteral();
1888       return;
1889     }
1890     default:
1891       llvm_unreachable("invalid operand size");
1892     }
1893 
1894     return;
1895   }
1896 
1897   // We got int literal token.
1898   // Only sign extend inline immediates.
1899   switch (OpTy) {
1900   case AMDGPU::OPERAND_REG_IMM_INT32:
1901   case AMDGPU::OPERAND_REG_IMM_FP32:
1902   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1903   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1904   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1905   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1906   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1907   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1908     if (isSafeTruncation(Val, 32) &&
1909         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1910                                      AsmParser->hasInv2PiInlineImm())) {
1911       Inst.addOperand(MCOperand::createImm(Val));
1912       setImmKindConst();
1913       return;
1914     }
1915 
1916     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1917     setImmKindLiteral();
1918     return;
1919 
1920   case AMDGPU::OPERAND_REG_IMM_INT64:
1921   case AMDGPU::OPERAND_REG_IMM_FP64:
1922   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1923   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1924     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1925       Inst.addOperand(MCOperand::createImm(Val));
1926       setImmKindConst();
1927       return;
1928     }
1929 
1930     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1931     setImmKindLiteral();
1932     return;
1933 
1934   case AMDGPU::OPERAND_REG_IMM_INT16:
1935   case AMDGPU::OPERAND_REG_IMM_FP16:
1936   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1937   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1938   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1939   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1940     if (isSafeTruncation(Val, 16) &&
1941         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1942                                      AsmParser->hasInv2PiInlineImm())) {
1943       Inst.addOperand(MCOperand::createImm(Val));
1944       setImmKindConst();
1945       return;
1946     }
1947 
1948     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1949     setImmKindLiteral();
1950     return;
1951 
1952   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1953   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1954   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1955   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1956     assert(isSafeTruncation(Val, 16));
1957     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1958                                         AsmParser->hasInv2PiInlineImm()));
1959 
1960     Inst.addOperand(MCOperand::createImm(Val));
1961     return;
1962   }
1963   default:
1964     llvm_unreachable("invalid operand size");
1965   }
1966 }
1967 
1968 template <unsigned Bitwidth>
1969 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1970   APInt Literal(64, Imm.Val);
1971   setImmKindNone();
1972 
1973   if (!Imm.IsFPImm) {
1974     // We got int literal token.
1975     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1976     return;
1977   }
1978 
1979   bool Lost;
1980   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1981   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1982                     APFloat::rmNearestTiesToEven, &Lost);
1983   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1984 }
1985 
1986 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1987   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1988 }
1989 
1990 static bool isInlineValue(unsigned Reg) {
1991   switch (Reg) {
1992   case AMDGPU::SRC_SHARED_BASE:
1993   case AMDGPU::SRC_SHARED_LIMIT:
1994   case AMDGPU::SRC_PRIVATE_BASE:
1995   case AMDGPU::SRC_PRIVATE_LIMIT:
1996   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1997     return true;
1998   case AMDGPU::SRC_VCCZ:
1999   case AMDGPU::SRC_EXECZ:
2000   case AMDGPU::SRC_SCC:
2001     return true;
2002   case AMDGPU::SGPR_NULL:
2003     return true;
2004   default:
2005     return false;
2006   }
2007 }
2008 
2009 bool AMDGPUOperand::isInlineValue() const {
2010   return isRegKind() && ::isInlineValue(getReg());
2011 }
2012 
2013 //===----------------------------------------------------------------------===//
2014 // AsmParser
2015 //===----------------------------------------------------------------------===//
2016 
2017 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2018   if (Is == IS_VGPR) {
2019     switch (RegWidth) {
2020       default: return -1;
2021       case 1: return AMDGPU::VGPR_32RegClassID;
2022       case 2: return AMDGPU::VReg_64RegClassID;
2023       case 3: return AMDGPU::VReg_96RegClassID;
2024       case 4: return AMDGPU::VReg_128RegClassID;
2025       case 5: return AMDGPU::VReg_160RegClassID;
2026       case 6: return AMDGPU::VReg_192RegClassID;
2027       case 8: return AMDGPU::VReg_256RegClassID;
2028       case 16: return AMDGPU::VReg_512RegClassID;
2029       case 32: return AMDGPU::VReg_1024RegClassID;
2030     }
2031   } else if (Is == IS_TTMP) {
2032     switch (RegWidth) {
2033       default: return -1;
2034       case 1: return AMDGPU::TTMP_32RegClassID;
2035       case 2: return AMDGPU::TTMP_64RegClassID;
2036       case 4: return AMDGPU::TTMP_128RegClassID;
2037       case 8: return AMDGPU::TTMP_256RegClassID;
2038       case 16: return AMDGPU::TTMP_512RegClassID;
2039     }
2040   } else if (Is == IS_SGPR) {
2041     switch (RegWidth) {
2042       default: return -1;
2043       case 1: return AMDGPU::SGPR_32RegClassID;
2044       case 2: return AMDGPU::SGPR_64RegClassID;
2045       case 3: return AMDGPU::SGPR_96RegClassID;
2046       case 4: return AMDGPU::SGPR_128RegClassID;
2047       case 5: return AMDGPU::SGPR_160RegClassID;
2048       case 6: return AMDGPU::SGPR_192RegClassID;
2049       case 8: return AMDGPU::SGPR_256RegClassID;
2050       case 16: return AMDGPU::SGPR_512RegClassID;
2051     }
2052   } else if (Is == IS_AGPR) {
2053     switch (RegWidth) {
2054       default: return -1;
2055       case 1: return AMDGPU::AGPR_32RegClassID;
2056       case 2: return AMDGPU::AReg_64RegClassID;
2057       case 3: return AMDGPU::AReg_96RegClassID;
2058       case 4: return AMDGPU::AReg_128RegClassID;
2059       case 5: return AMDGPU::AReg_160RegClassID;
2060       case 6: return AMDGPU::AReg_192RegClassID;
2061       case 8: return AMDGPU::AReg_256RegClassID;
2062       case 16: return AMDGPU::AReg_512RegClassID;
2063       case 32: return AMDGPU::AReg_1024RegClassID;
2064     }
2065   }
2066   return -1;
2067 }
2068 
2069 static unsigned getSpecialRegForName(StringRef RegName) {
2070   return StringSwitch<unsigned>(RegName)
2071     .Case("exec", AMDGPU::EXEC)
2072     .Case("vcc", AMDGPU::VCC)
2073     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2074     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2075     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2076     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2077     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2078     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2079     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2080     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2081     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2082     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2083     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2084     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2085     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2086     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2087     .Case("m0", AMDGPU::M0)
2088     .Case("vccz", AMDGPU::SRC_VCCZ)
2089     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2090     .Case("execz", AMDGPU::SRC_EXECZ)
2091     .Case("src_execz", AMDGPU::SRC_EXECZ)
2092     .Case("scc", AMDGPU::SRC_SCC)
2093     .Case("src_scc", AMDGPU::SRC_SCC)
2094     .Case("tba", AMDGPU::TBA)
2095     .Case("tma", AMDGPU::TMA)
2096     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2097     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2098     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2099     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2100     .Case("vcc_lo", AMDGPU::VCC_LO)
2101     .Case("vcc_hi", AMDGPU::VCC_HI)
2102     .Case("exec_lo", AMDGPU::EXEC_LO)
2103     .Case("exec_hi", AMDGPU::EXEC_HI)
2104     .Case("tma_lo", AMDGPU::TMA_LO)
2105     .Case("tma_hi", AMDGPU::TMA_HI)
2106     .Case("tba_lo", AMDGPU::TBA_LO)
2107     .Case("tba_hi", AMDGPU::TBA_HI)
2108     .Case("pc", AMDGPU::PC_REG)
2109     .Case("null", AMDGPU::SGPR_NULL)
2110     .Default(AMDGPU::NoRegister);
2111 }
2112 
2113 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2114                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2115   auto R = parseRegister();
2116   if (!R) return true;
2117   assert(R->isReg());
2118   RegNo = R->getReg();
2119   StartLoc = R->getStartLoc();
2120   EndLoc = R->getEndLoc();
2121   return false;
2122 }
2123 
2124 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2125                                     SMLoc &EndLoc) {
2126   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2127 }
2128 
2129 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2130                                                        SMLoc &StartLoc,
2131                                                        SMLoc &EndLoc) {
2132   bool Result =
2133       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2134   bool PendingErrors = getParser().hasPendingError();
2135   getParser().clearPendingErrors();
2136   if (PendingErrors)
2137     return MatchOperand_ParseFail;
2138   if (Result)
2139     return MatchOperand_NoMatch;
2140   return MatchOperand_Success;
2141 }
2142 
2143 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2144                                             RegisterKind RegKind, unsigned Reg1,
2145                                             SMLoc Loc) {
2146   switch (RegKind) {
2147   case IS_SPECIAL:
2148     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2149       Reg = AMDGPU::EXEC;
2150       RegWidth = 2;
2151       return true;
2152     }
2153     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2154       Reg = AMDGPU::FLAT_SCR;
2155       RegWidth = 2;
2156       return true;
2157     }
2158     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2159       Reg = AMDGPU::XNACK_MASK;
2160       RegWidth = 2;
2161       return true;
2162     }
2163     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2164       Reg = AMDGPU::VCC;
2165       RegWidth = 2;
2166       return true;
2167     }
2168     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2169       Reg = AMDGPU::TBA;
2170       RegWidth = 2;
2171       return true;
2172     }
2173     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2174       Reg = AMDGPU::TMA;
2175       RegWidth = 2;
2176       return true;
2177     }
2178     Error(Loc, "register does not fit in the list");
2179     return false;
2180   case IS_VGPR:
2181   case IS_SGPR:
2182   case IS_AGPR:
2183   case IS_TTMP:
2184     if (Reg1 != Reg + RegWidth) {
2185       Error(Loc, "registers in a list must have consecutive indices");
2186       return false;
2187     }
2188     RegWidth++;
2189     return true;
2190   default:
2191     llvm_unreachable("unexpected register kind");
2192   }
2193 }
2194 
2195 struct RegInfo {
2196   StringLiteral Name;
2197   RegisterKind Kind;
2198 };
2199 
2200 static constexpr RegInfo RegularRegisters[] = {
2201   {{"v"},    IS_VGPR},
2202   {{"s"},    IS_SGPR},
2203   {{"ttmp"}, IS_TTMP},
2204   {{"acc"},  IS_AGPR},
2205   {{"a"},    IS_AGPR},
2206 };
2207 
2208 static bool isRegularReg(RegisterKind Kind) {
2209   return Kind == IS_VGPR ||
2210          Kind == IS_SGPR ||
2211          Kind == IS_TTMP ||
2212          Kind == IS_AGPR;
2213 }
2214 
2215 static const RegInfo* getRegularRegInfo(StringRef Str) {
2216   for (const RegInfo &Reg : RegularRegisters)
2217     if (Str.startswith(Reg.Name))
2218       return &Reg;
2219   return nullptr;
2220 }
2221 
2222 static bool getRegNum(StringRef Str, unsigned& Num) {
2223   return !Str.getAsInteger(10, Num);
2224 }
2225 
2226 bool
2227 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2228                             const AsmToken &NextToken) const {
2229 
2230   // A list of consecutive registers: [s0,s1,s2,s3]
2231   if (Token.is(AsmToken::LBrac))
2232     return true;
2233 
2234   if (!Token.is(AsmToken::Identifier))
2235     return false;
2236 
2237   // A single register like s0 or a range of registers like s[0:1]
2238 
2239   StringRef Str = Token.getString();
2240   const RegInfo *Reg = getRegularRegInfo(Str);
2241   if (Reg) {
2242     StringRef RegName = Reg->Name;
2243     StringRef RegSuffix = Str.substr(RegName.size());
2244     if (!RegSuffix.empty()) {
2245       unsigned Num;
2246       // A single register with an index: rXX
2247       if (getRegNum(RegSuffix, Num))
2248         return true;
2249     } else {
2250       // A range of registers: r[XX:YY].
2251       if (NextToken.is(AsmToken::LBrac))
2252         return true;
2253     }
2254   }
2255 
2256   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2257 }
2258 
2259 bool
2260 AMDGPUAsmParser::isRegister()
2261 {
2262   return isRegister(getToken(), peekToken());
2263 }
2264 
2265 unsigned
2266 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2267                                unsigned RegNum,
2268                                unsigned RegWidth,
2269                                SMLoc Loc) {
2270 
2271   assert(isRegularReg(RegKind));
2272 
2273   unsigned AlignSize = 1;
2274   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2275     // SGPR and TTMP registers must be aligned.
2276     // Max required alignment is 4 dwords.
2277     AlignSize = std::min(RegWidth, 4u);
2278   }
2279 
2280   if (RegNum % AlignSize != 0) {
2281     Error(Loc, "invalid register alignment");
2282     return AMDGPU::NoRegister;
2283   }
2284 
2285   unsigned RegIdx = RegNum / AlignSize;
2286   int RCID = getRegClass(RegKind, RegWidth);
2287   if (RCID == -1) {
2288     Error(Loc, "invalid or unsupported register size");
2289     return AMDGPU::NoRegister;
2290   }
2291 
2292   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2293   const MCRegisterClass RC = TRI->getRegClass(RCID);
2294   if (RegIdx >= RC.getNumRegs()) {
2295     Error(Loc, "register index is out of range");
2296     return AMDGPU::NoRegister;
2297   }
2298 
2299   return RC.getRegister(RegIdx);
2300 }
2301 
2302 bool
2303 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2304   int64_t RegLo, RegHi;
2305   if (!skipToken(AsmToken::LBrac, "missing register index"))
2306     return false;
2307 
2308   SMLoc FirstIdxLoc = getLoc();
2309   SMLoc SecondIdxLoc;
2310 
2311   if (!parseExpr(RegLo))
2312     return false;
2313 
2314   if (trySkipToken(AsmToken::Colon)) {
2315     SecondIdxLoc = getLoc();
2316     if (!parseExpr(RegHi))
2317       return false;
2318   } else {
2319     RegHi = RegLo;
2320   }
2321 
2322   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2323     return false;
2324 
2325   if (!isUInt<32>(RegLo)) {
2326     Error(FirstIdxLoc, "invalid register index");
2327     return false;
2328   }
2329 
2330   if (!isUInt<32>(RegHi)) {
2331     Error(SecondIdxLoc, "invalid register index");
2332     return false;
2333   }
2334 
2335   if (RegLo > RegHi) {
2336     Error(FirstIdxLoc, "first register index should not exceed second index");
2337     return false;
2338   }
2339 
2340   Num = static_cast<unsigned>(RegLo);
2341   Width = (RegHi - RegLo) + 1;
2342   return true;
2343 }
2344 
2345 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2346                                           unsigned &RegNum, unsigned &RegWidth,
2347                                           SmallVectorImpl<AsmToken> &Tokens) {
2348   assert(isToken(AsmToken::Identifier));
2349   unsigned Reg = getSpecialRegForName(getTokenStr());
2350   if (Reg) {
2351     RegNum = 0;
2352     RegWidth = 1;
2353     RegKind = IS_SPECIAL;
2354     Tokens.push_back(getToken());
2355     lex(); // skip register name
2356   }
2357   return Reg;
2358 }
2359 
2360 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2361                                           unsigned &RegNum, unsigned &RegWidth,
2362                                           SmallVectorImpl<AsmToken> &Tokens) {
2363   assert(isToken(AsmToken::Identifier));
2364   StringRef RegName = getTokenStr();
2365   auto Loc = getLoc();
2366 
2367   const RegInfo *RI = getRegularRegInfo(RegName);
2368   if (!RI) {
2369     Error(Loc, "invalid register name");
2370     return AMDGPU::NoRegister;
2371   }
2372 
2373   Tokens.push_back(getToken());
2374   lex(); // skip register name
2375 
2376   RegKind = RI->Kind;
2377   StringRef RegSuffix = RegName.substr(RI->Name.size());
2378   if (!RegSuffix.empty()) {
2379     // Single 32-bit register: vXX.
2380     if (!getRegNum(RegSuffix, RegNum)) {
2381       Error(Loc, "invalid register index");
2382       return AMDGPU::NoRegister;
2383     }
2384     RegWidth = 1;
2385   } else {
2386     // Range of registers: v[XX:YY]. ":YY" is optional.
2387     if (!ParseRegRange(RegNum, RegWidth))
2388       return AMDGPU::NoRegister;
2389   }
2390 
2391   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2392 }
2393 
2394 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2395                                        unsigned &RegWidth,
2396                                        SmallVectorImpl<AsmToken> &Tokens) {
2397   unsigned Reg = AMDGPU::NoRegister;
2398   auto ListLoc = getLoc();
2399 
2400   if (!skipToken(AsmToken::LBrac,
2401                  "expected a register or a list of registers")) {
2402     return AMDGPU::NoRegister;
2403   }
2404 
2405   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2406 
2407   auto Loc = getLoc();
2408   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2409     return AMDGPU::NoRegister;
2410   if (RegWidth != 1) {
2411     Error(Loc, "expected a single 32-bit register");
2412     return AMDGPU::NoRegister;
2413   }
2414 
2415   for (; trySkipToken(AsmToken::Comma); ) {
2416     RegisterKind NextRegKind;
2417     unsigned NextReg, NextRegNum, NextRegWidth;
2418     Loc = getLoc();
2419 
2420     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2421                              NextRegNum, NextRegWidth,
2422                              Tokens)) {
2423       return AMDGPU::NoRegister;
2424     }
2425     if (NextRegWidth != 1) {
2426       Error(Loc, "expected a single 32-bit register");
2427       return AMDGPU::NoRegister;
2428     }
2429     if (NextRegKind != RegKind) {
2430       Error(Loc, "registers in a list must be of the same kind");
2431       return AMDGPU::NoRegister;
2432     }
2433     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2434       return AMDGPU::NoRegister;
2435   }
2436 
2437   if (!skipToken(AsmToken::RBrac,
2438                  "expected a comma or a closing square bracket")) {
2439     return AMDGPU::NoRegister;
2440   }
2441 
2442   if (isRegularReg(RegKind))
2443     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2444 
2445   return Reg;
2446 }
2447 
2448 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2449                                           unsigned &RegNum, unsigned &RegWidth,
2450                                           SmallVectorImpl<AsmToken> &Tokens) {
2451   auto Loc = getLoc();
2452   Reg = AMDGPU::NoRegister;
2453 
2454   if (isToken(AsmToken::Identifier)) {
2455     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2456     if (Reg == AMDGPU::NoRegister)
2457       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2458   } else {
2459     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2460   }
2461 
2462   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2463   if (Reg == AMDGPU::NoRegister) {
2464     assert(Parser.hasPendingError());
2465     return false;
2466   }
2467 
2468   if (!subtargetHasRegister(*TRI, Reg)) {
2469     if (Reg == AMDGPU::SGPR_NULL) {
2470       Error(Loc, "'null' operand is not supported on this GPU");
2471     } else {
2472       Error(Loc, "register not available on this GPU");
2473     }
2474     return false;
2475   }
2476 
2477   return true;
2478 }
2479 
2480 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2481                                           unsigned &RegNum, unsigned &RegWidth,
2482                                           bool RestoreOnFailure /*=false*/) {
2483   Reg = AMDGPU::NoRegister;
2484 
2485   SmallVector<AsmToken, 1> Tokens;
2486   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2487     if (RestoreOnFailure) {
2488       while (!Tokens.empty()) {
2489         getLexer().UnLex(Tokens.pop_back_val());
2490       }
2491     }
2492     return true;
2493   }
2494   return false;
2495 }
2496 
2497 Optional<StringRef>
2498 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2499   switch (RegKind) {
2500   case IS_VGPR:
2501     return StringRef(".amdgcn.next_free_vgpr");
2502   case IS_SGPR:
2503     return StringRef(".amdgcn.next_free_sgpr");
2504   default:
2505     return None;
2506   }
2507 }
2508 
2509 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2510   auto SymbolName = getGprCountSymbolName(RegKind);
2511   assert(SymbolName && "initializing invalid register kind");
2512   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2513   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2514 }
2515 
2516 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2517                                             unsigned DwordRegIndex,
2518                                             unsigned RegWidth) {
2519   // Symbols are only defined for GCN targets
2520   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2521     return true;
2522 
2523   auto SymbolName = getGprCountSymbolName(RegKind);
2524   if (!SymbolName)
2525     return true;
2526   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2527 
2528   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2529   int64_t OldCount;
2530 
2531   if (!Sym->isVariable())
2532     return !Error(getParser().getTok().getLoc(),
2533                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2534   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2535     return !Error(
2536         getParser().getTok().getLoc(),
2537         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2538 
2539   if (OldCount <= NewMax)
2540     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2541 
2542   return true;
2543 }
2544 
2545 std::unique_ptr<AMDGPUOperand>
2546 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2547   const auto &Tok = Parser.getTok();
2548   SMLoc StartLoc = Tok.getLoc();
2549   SMLoc EndLoc = Tok.getEndLoc();
2550   RegisterKind RegKind;
2551   unsigned Reg, RegNum, RegWidth;
2552 
2553   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2554     return nullptr;
2555   }
2556   if (isHsaAbiVersion3(&getSTI())) {
2557     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2558       return nullptr;
2559   } else
2560     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2561   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2562 }
2563 
2564 OperandMatchResultTy
2565 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2566   // TODO: add syntactic sugar for 1/(2*PI)
2567 
2568   assert(!isRegister());
2569   assert(!isModifier());
2570 
2571   const auto& Tok = getToken();
2572   const auto& NextTok = peekToken();
2573   bool IsReal = Tok.is(AsmToken::Real);
2574   SMLoc S = getLoc();
2575   bool Negate = false;
2576 
2577   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2578     lex();
2579     IsReal = true;
2580     Negate = true;
2581   }
2582 
2583   if (IsReal) {
2584     // Floating-point expressions are not supported.
2585     // Can only allow floating-point literals with an
2586     // optional sign.
2587 
2588     StringRef Num = getTokenStr();
2589     lex();
2590 
2591     APFloat RealVal(APFloat::IEEEdouble());
2592     auto roundMode = APFloat::rmNearestTiesToEven;
2593     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2594       return MatchOperand_ParseFail;
2595     }
2596     if (Negate)
2597       RealVal.changeSign();
2598 
2599     Operands.push_back(
2600       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2601                                AMDGPUOperand::ImmTyNone, true));
2602 
2603     return MatchOperand_Success;
2604 
2605   } else {
2606     int64_t IntVal;
2607     const MCExpr *Expr;
2608     SMLoc S = getLoc();
2609 
2610     if (HasSP3AbsModifier) {
2611       // This is a workaround for handling expressions
2612       // as arguments of SP3 'abs' modifier, for example:
2613       //     |1.0|
2614       //     |-1|
2615       //     |1+x|
2616       // This syntax is not compatible with syntax of standard
2617       // MC expressions (due to the trailing '|').
2618       SMLoc EndLoc;
2619       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2620         return MatchOperand_ParseFail;
2621     } else {
2622       if (Parser.parseExpression(Expr))
2623         return MatchOperand_ParseFail;
2624     }
2625 
2626     if (Expr->evaluateAsAbsolute(IntVal)) {
2627       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2628     } else {
2629       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2630     }
2631 
2632     return MatchOperand_Success;
2633   }
2634 
2635   return MatchOperand_NoMatch;
2636 }
2637 
2638 OperandMatchResultTy
2639 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2640   if (!isRegister())
2641     return MatchOperand_NoMatch;
2642 
2643   if (auto R = parseRegister()) {
2644     assert(R->isReg());
2645     Operands.push_back(std::move(R));
2646     return MatchOperand_Success;
2647   }
2648   return MatchOperand_ParseFail;
2649 }
2650 
2651 OperandMatchResultTy
2652 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2653   auto res = parseReg(Operands);
2654   if (res != MatchOperand_NoMatch) {
2655     return res;
2656   } else if (isModifier()) {
2657     return MatchOperand_NoMatch;
2658   } else {
2659     return parseImm(Operands, HasSP3AbsMod);
2660   }
2661 }
2662 
2663 bool
2664 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2665   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2666     const auto &str = Token.getString();
2667     return str == "abs" || str == "neg" || str == "sext";
2668   }
2669   return false;
2670 }
2671 
2672 bool
2673 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2674   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2675 }
2676 
2677 bool
2678 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2679   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2680 }
2681 
2682 bool
2683 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2684   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2685 }
2686 
2687 // Check if this is an operand modifier or an opcode modifier
2688 // which may look like an expression but it is not. We should
2689 // avoid parsing these modifiers as expressions. Currently
2690 // recognized sequences are:
2691 //   |...|
2692 //   abs(...)
2693 //   neg(...)
2694 //   sext(...)
2695 //   -reg
2696 //   -|...|
2697 //   -abs(...)
2698 //   name:...
2699 // Note that simple opcode modifiers like 'gds' may be parsed as
2700 // expressions; this is a special case. See getExpressionAsToken.
2701 //
2702 bool
2703 AMDGPUAsmParser::isModifier() {
2704 
2705   AsmToken Tok = getToken();
2706   AsmToken NextToken[2];
2707   peekTokens(NextToken);
2708 
2709   return isOperandModifier(Tok, NextToken[0]) ||
2710          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2711          isOpcodeModifierWithVal(Tok, NextToken[0]);
2712 }
2713 
2714 // Check if the current token is an SP3 'neg' modifier.
2715 // Currently this modifier is allowed in the following context:
2716 //
2717 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2718 // 2. Before an 'abs' modifier: -abs(...)
2719 // 3. Before an SP3 'abs' modifier: -|...|
2720 //
2721 // In all other cases "-" is handled as a part
2722 // of an expression that follows the sign.
2723 //
2724 // Note: When "-" is followed by an integer literal,
2725 // this is interpreted as integer negation rather
2726 // than a floating-point NEG modifier applied to N.
2727 // Beside being contr-intuitive, such use of floating-point
2728 // NEG modifier would have resulted in different meaning
2729 // of integer literals used with VOP1/2/C and VOP3,
2730 // for example:
2731 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2732 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2733 // Negative fp literals with preceding "-" are
2734 // handled likewise for unifomtity
2735 //
2736 bool
2737 AMDGPUAsmParser::parseSP3NegModifier() {
2738 
2739   AsmToken NextToken[2];
2740   peekTokens(NextToken);
2741 
2742   if (isToken(AsmToken::Minus) &&
2743       (isRegister(NextToken[0], NextToken[1]) ||
2744        NextToken[0].is(AsmToken::Pipe) ||
2745        isId(NextToken[0], "abs"))) {
2746     lex();
2747     return true;
2748   }
2749 
2750   return false;
2751 }
2752 
2753 OperandMatchResultTy
2754 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2755                                               bool AllowImm) {
2756   bool Neg, SP3Neg;
2757   bool Abs, SP3Abs;
2758   SMLoc Loc;
2759 
2760   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2761   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2762     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2763     return MatchOperand_ParseFail;
2764   }
2765 
2766   SP3Neg = parseSP3NegModifier();
2767 
2768   Loc = getLoc();
2769   Neg = trySkipId("neg");
2770   if (Neg && SP3Neg) {
2771     Error(Loc, "expected register or immediate");
2772     return MatchOperand_ParseFail;
2773   }
2774   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2775     return MatchOperand_ParseFail;
2776 
2777   Abs = trySkipId("abs");
2778   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2779     return MatchOperand_ParseFail;
2780 
2781   Loc = getLoc();
2782   SP3Abs = trySkipToken(AsmToken::Pipe);
2783   if (Abs && SP3Abs) {
2784     Error(Loc, "expected register or immediate");
2785     return MatchOperand_ParseFail;
2786   }
2787 
2788   OperandMatchResultTy Res;
2789   if (AllowImm) {
2790     Res = parseRegOrImm(Operands, SP3Abs);
2791   } else {
2792     Res = parseReg(Operands);
2793   }
2794   if (Res != MatchOperand_Success) {
2795     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2796   }
2797 
2798   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2799     return MatchOperand_ParseFail;
2800   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2801     return MatchOperand_ParseFail;
2802   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2803     return MatchOperand_ParseFail;
2804 
2805   AMDGPUOperand::Modifiers Mods;
2806   Mods.Abs = Abs || SP3Abs;
2807   Mods.Neg = Neg || SP3Neg;
2808 
2809   if (Mods.hasFPModifiers()) {
2810     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2811     if (Op.isExpr()) {
2812       Error(Op.getStartLoc(), "expected an absolute expression");
2813       return MatchOperand_ParseFail;
2814     }
2815     Op.setModifiers(Mods);
2816   }
2817   return MatchOperand_Success;
2818 }
2819 
2820 OperandMatchResultTy
2821 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2822                                                bool AllowImm) {
2823   bool Sext = trySkipId("sext");
2824   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2825     return MatchOperand_ParseFail;
2826 
2827   OperandMatchResultTy Res;
2828   if (AllowImm) {
2829     Res = parseRegOrImm(Operands);
2830   } else {
2831     Res = parseReg(Operands);
2832   }
2833   if (Res != MatchOperand_Success) {
2834     return Sext? MatchOperand_ParseFail : Res;
2835   }
2836 
2837   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2838     return MatchOperand_ParseFail;
2839 
2840   AMDGPUOperand::Modifiers Mods;
2841   Mods.Sext = Sext;
2842 
2843   if (Mods.hasIntModifiers()) {
2844     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2845     if (Op.isExpr()) {
2846       Error(Op.getStartLoc(), "expected an absolute expression");
2847       return MatchOperand_ParseFail;
2848     }
2849     Op.setModifiers(Mods);
2850   }
2851 
2852   return MatchOperand_Success;
2853 }
2854 
2855 OperandMatchResultTy
2856 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2857   return parseRegOrImmWithFPInputMods(Operands, false);
2858 }
2859 
2860 OperandMatchResultTy
2861 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2862   return parseRegOrImmWithIntInputMods(Operands, false);
2863 }
2864 
2865 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2866   auto Loc = getLoc();
2867   if (trySkipId("off")) {
2868     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2869                                                 AMDGPUOperand::ImmTyOff, false));
2870     return MatchOperand_Success;
2871   }
2872 
2873   if (!isRegister())
2874     return MatchOperand_NoMatch;
2875 
2876   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2877   if (Reg) {
2878     Operands.push_back(std::move(Reg));
2879     return MatchOperand_Success;
2880   }
2881 
2882   return MatchOperand_ParseFail;
2883 
2884 }
2885 
2886 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2887   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2888 
2889   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2890       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2891       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2892       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2893     return Match_InvalidOperand;
2894 
2895   if ((TSFlags & SIInstrFlags::VOP3) &&
2896       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2897       getForcedEncodingSize() != 64)
2898     return Match_PreferE32;
2899 
2900   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2901       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2902     // v_mac_f32/16 allow only dst_sel == DWORD;
2903     auto OpNum =
2904         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2905     const auto &Op = Inst.getOperand(OpNum);
2906     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2907       return Match_InvalidOperand;
2908     }
2909   }
2910 
2911   return Match_Success;
2912 }
2913 
2914 static ArrayRef<unsigned> getAllVariants() {
2915   static const unsigned Variants[] = {
2916     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2917     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2918   };
2919 
2920   return makeArrayRef(Variants);
2921 }
2922 
2923 // What asm variants we should check
2924 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2925   if (getForcedEncodingSize() == 32) {
2926     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2927     return makeArrayRef(Variants);
2928   }
2929 
2930   if (isForcedVOP3()) {
2931     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2932     return makeArrayRef(Variants);
2933   }
2934 
2935   if (isForcedSDWA()) {
2936     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2937                                         AMDGPUAsmVariants::SDWA9};
2938     return makeArrayRef(Variants);
2939   }
2940 
2941   if (isForcedDPP()) {
2942     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2943     return makeArrayRef(Variants);
2944   }
2945 
2946   return getAllVariants();
2947 }
2948 
2949 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
2950   if (getForcedEncodingSize() == 32)
2951     return "e32";
2952 
2953   if (isForcedVOP3())
2954     return "e64";
2955 
2956   if (isForcedSDWA())
2957     return "sdwa";
2958 
2959   if (isForcedDPP())
2960     return "dpp";
2961 
2962   return "";
2963 }
2964 
2965 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2966   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2967   const unsigned Num = Desc.getNumImplicitUses();
2968   for (unsigned i = 0; i < Num; ++i) {
2969     unsigned Reg = Desc.ImplicitUses[i];
2970     switch (Reg) {
2971     case AMDGPU::FLAT_SCR:
2972     case AMDGPU::VCC:
2973     case AMDGPU::VCC_LO:
2974     case AMDGPU::VCC_HI:
2975     case AMDGPU::M0:
2976       return Reg;
2977     default:
2978       break;
2979     }
2980   }
2981   return AMDGPU::NoRegister;
2982 }
2983 
2984 // NB: This code is correct only when used to check constant
2985 // bus limitations because GFX7 support no f16 inline constants.
2986 // Note that there are no cases when a GFX7 opcode violates
2987 // constant bus limitations due to the use of an f16 constant.
2988 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2989                                        unsigned OpIdx) const {
2990   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2991 
2992   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2993     return false;
2994   }
2995 
2996   const MCOperand &MO = Inst.getOperand(OpIdx);
2997 
2998   int64_t Val = MO.getImm();
2999   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3000 
3001   switch (OpSize) { // expected operand size
3002   case 8:
3003     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3004   case 4:
3005     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3006   case 2: {
3007     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3008     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3009         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3010         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3011       return AMDGPU::isInlinableIntLiteral(Val);
3012 
3013     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3014         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3015         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3016       return AMDGPU::isInlinableIntLiteralV216(Val);
3017 
3018     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3019         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3020         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3021       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3022 
3023     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3024   }
3025   default:
3026     llvm_unreachable("invalid operand size");
3027   }
3028 }
3029 
3030 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3031   if (!isGFX10Plus())
3032     return 1;
3033 
3034   switch (Opcode) {
3035   // 64-bit shift instructions can use only one scalar value input
3036   case AMDGPU::V_LSHLREV_B64:
3037   case AMDGPU::V_LSHLREV_B64_gfx10:
3038   case AMDGPU::V_LSHL_B64:
3039   case AMDGPU::V_LSHRREV_B64:
3040   case AMDGPU::V_LSHRREV_B64_gfx10:
3041   case AMDGPU::V_LSHR_B64:
3042   case AMDGPU::V_ASHRREV_I64:
3043   case AMDGPU::V_ASHRREV_I64_gfx10:
3044   case AMDGPU::V_ASHR_I64:
3045     return 1;
3046   default:
3047     return 2;
3048   }
3049 }
3050 
3051 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3052   const MCOperand &MO = Inst.getOperand(OpIdx);
3053   if (MO.isImm()) {
3054     return !isInlineConstant(Inst, OpIdx);
3055   } else if (MO.isReg()) {
3056     auto Reg = MO.getReg();
3057     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3058     auto PReg = mc2PseudoReg(Reg);
3059     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3060   } else {
3061     return true;
3062   }
3063 }
3064 
3065 bool
3066 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3067                                                 const OperandVector &Operands) {
3068   const unsigned Opcode = Inst.getOpcode();
3069   const MCInstrDesc &Desc = MII.get(Opcode);
3070   unsigned LastSGPR = AMDGPU::NoRegister;
3071   unsigned ConstantBusUseCount = 0;
3072   unsigned NumLiterals = 0;
3073   unsigned LiteralSize;
3074 
3075   if (Desc.TSFlags &
3076       (SIInstrFlags::VOPC |
3077        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3078        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3079        SIInstrFlags::SDWA)) {
3080     // Check special imm operands (used by madmk, etc)
3081     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3082       ++ConstantBusUseCount;
3083     }
3084 
3085     SmallDenseSet<unsigned> SGPRsUsed;
3086     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3087     if (SGPRUsed != AMDGPU::NoRegister) {
3088       SGPRsUsed.insert(SGPRUsed);
3089       ++ConstantBusUseCount;
3090     }
3091 
3092     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3093     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3094     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3095 
3096     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3097 
3098     for (int OpIdx : OpIndices) {
3099       if (OpIdx == -1) break;
3100 
3101       const MCOperand &MO = Inst.getOperand(OpIdx);
3102       if (usesConstantBus(Inst, OpIdx)) {
3103         if (MO.isReg()) {
3104           LastSGPR = mc2PseudoReg(MO.getReg());
3105           // Pairs of registers with a partial intersections like these
3106           //   s0, s[0:1]
3107           //   flat_scratch_lo, flat_scratch
3108           //   flat_scratch_lo, flat_scratch_hi
3109           // are theoretically valid but they are disabled anyway.
3110           // Note that this code mimics SIInstrInfo::verifyInstruction
3111           if (!SGPRsUsed.count(LastSGPR)) {
3112             SGPRsUsed.insert(LastSGPR);
3113             ++ConstantBusUseCount;
3114           }
3115         } else { // Expression or a literal
3116 
3117           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3118             continue; // special operand like VINTERP attr_chan
3119 
3120           // An instruction may use only one literal.
3121           // This has been validated on the previous step.
3122           // See validateVOP3Literal.
3123           // This literal may be used as more than one operand.
3124           // If all these operands are of the same size,
3125           // this literal counts as one scalar value.
3126           // Otherwise it counts as 2 scalar values.
3127           // See "GFX10 Shader Programming", section 3.6.2.3.
3128 
3129           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3130           if (Size < 4) Size = 4;
3131 
3132           if (NumLiterals == 0) {
3133             NumLiterals = 1;
3134             LiteralSize = Size;
3135           } else if (LiteralSize != Size) {
3136             NumLiterals = 2;
3137           }
3138         }
3139       }
3140     }
3141   }
3142   ConstantBusUseCount += NumLiterals;
3143 
3144   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3145     return true;
3146 
3147   SMLoc LitLoc = getLitLoc(Operands);
3148   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3149   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3150   Error(Loc, "invalid operand (violates constant bus restrictions)");
3151   return false;
3152 }
3153 
3154 bool
3155 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3156                                                  const OperandVector &Operands) {
3157   const unsigned Opcode = Inst.getOpcode();
3158   const MCInstrDesc &Desc = MII.get(Opcode);
3159 
3160   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3161   if (DstIdx == -1 ||
3162       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3163     return true;
3164   }
3165 
3166   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3167 
3168   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3169   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3170   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3171 
3172   assert(DstIdx != -1);
3173   const MCOperand &Dst = Inst.getOperand(DstIdx);
3174   assert(Dst.isReg());
3175   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3176 
3177   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3178 
3179   for (int SrcIdx : SrcIndices) {
3180     if (SrcIdx == -1) break;
3181     const MCOperand &Src = Inst.getOperand(SrcIdx);
3182     if (Src.isReg()) {
3183       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3184       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3185         Error(getRegLoc(SrcReg, Operands),
3186           "destination must be different than all sources");
3187         return false;
3188       }
3189     }
3190   }
3191 
3192   return true;
3193 }
3194 
3195 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3196 
3197   const unsigned Opc = Inst.getOpcode();
3198   const MCInstrDesc &Desc = MII.get(Opc);
3199 
3200   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3201     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3202     assert(ClampIdx != -1);
3203     return Inst.getOperand(ClampIdx).getImm() == 0;
3204   }
3205 
3206   return true;
3207 }
3208 
3209 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3210 
3211   const unsigned Opc = Inst.getOpcode();
3212   const MCInstrDesc &Desc = MII.get(Opc);
3213 
3214   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3215     return true;
3216 
3217   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3218   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3219   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3220 
3221   assert(VDataIdx != -1);
3222 
3223   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3224     return true;
3225 
3226   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3227   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3228   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3229   if (DMask == 0)
3230     DMask = 1;
3231 
3232   unsigned DataSize =
3233     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3234   if (hasPackedD16()) {
3235     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3236     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3237       DataSize = (DataSize + 1) / 2;
3238   }
3239 
3240   return (VDataSize / 4) == DataSize + TFESize;
3241 }
3242 
3243 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3244   const unsigned Opc = Inst.getOpcode();
3245   const MCInstrDesc &Desc = MII.get(Opc);
3246 
3247   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3248     return true;
3249 
3250   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3251 
3252   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3253       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3254   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3255   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3256   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3257 
3258   assert(VAddr0Idx != -1);
3259   assert(SrsrcIdx != -1);
3260   assert(SrsrcIdx > VAddr0Idx);
3261 
3262   if (DimIdx == -1)
3263     return true; // intersect_ray
3264 
3265   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3266   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3267   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3268   unsigned VAddrSize =
3269       IsNSA ? SrsrcIdx - VAddr0Idx
3270             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3271 
3272   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3273                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3274                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3275                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3276   if (!IsNSA) {
3277     if (AddrSize > 8)
3278       AddrSize = 16;
3279     else if (AddrSize > 4)
3280       AddrSize = 8;
3281   }
3282 
3283   return VAddrSize == AddrSize;
3284 }
3285 
3286 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3287 
3288   const unsigned Opc = Inst.getOpcode();
3289   const MCInstrDesc &Desc = MII.get(Opc);
3290 
3291   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3292     return true;
3293   if (!Desc.mayLoad() || !Desc.mayStore())
3294     return true; // Not atomic
3295 
3296   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3297   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3298 
3299   // This is an incomplete check because image_atomic_cmpswap
3300   // may only use 0x3 and 0xf while other atomic operations
3301   // may use 0x1 and 0x3. However these limitations are
3302   // verified when we check that dmask matches dst size.
3303   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3304 }
3305 
3306 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3307 
3308   const unsigned Opc = Inst.getOpcode();
3309   const MCInstrDesc &Desc = MII.get(Opc);
3310 
3311   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3312     return true;
3313 
3314   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3315   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3316 
3317   // GATHER4 instructions use dmask in a different fashion compared to
3318   // other MIMG instructions. The only useful DMASK values are
3319   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3320   // (red,red,red,red) etc.) The ISA document doesn't mention
3321   // this.
3322   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3323 }
3324 
3325 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3326 {
3327   switch (Opcode) {
3328   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3329   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3330   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3331     return true;
3332   default:
3333     return false;
3334   }
3335 }
3336 
3337 // movrels* opcodes should only allow VGPRS as src0.
3338 // This is specified in .td description for vop1/vop3,
3339 // but sdwa is handled differently. See isSDWAOperand.
3340 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3341 
3342   const unsigned Opc = Inst.getOpcode();
3343   const MCInstrDesc &Desc = MII.get(Opc);
3344 
3345   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3346     return true;
3347 
3348   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3349   assert(Src0Idx != -1);
3350 
3351   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3352   if (!Src0.isReg())
3353     return false;
3354 
3355   auto Reg = Src0.getReg();
3356   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3357   return !isSGPR(mc2PseudoReg(Reg), TRI);
3358 }
3359 
3360 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3361                                           const OperandVector &Operands) {
3362 
3363   const unsigned Opc = Inst.getOpcode();
3364 
3365   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3366     return true;
3367 
3368   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3369   assert(Src0Idx != -1);
3370 
3371   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3372   if (!Src0.isReg())
3373     return true;
3374 
3375   auto Reg = mc2PseudoReg(Src0.getReg());
3376   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3377   if (isSGPR(Reg, TRI)) {
3378     Error(getRegLoc(Reg, Operands),
3379           "source operand must be either a VGPR or an inline constant");
3380     return false;
3381   }
3382 
3383   return true;
3384 }
3385 
3386 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3387   switch (Inst.getOpcode()) {
3388   default:
3389     return true;
3390   case V_DIV_SCALE_F32_gfx6_gfx7:
3391   case V_DIV_SCALE_F32_vi:
3392   case V_DIV_SCALE_F32_gfx10:
3393   case V_DIV_SCALE_F64_gfx6_gfx7:
3394   case V_DIV_SCALE_F64_vi:
3395   case V_DIV_SCALE_F64_gfx10:
3396     break;
3397   }
3398 
3399   // TODO: Check that src0 = src1 or src2.
3400 
3401   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3402                     AMDGPU::OpName::src2_modifiers,
3403                     AMDGPU::OpName::src2_modifiers}) {
3404     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3405             .getImm() &
3406         SISrcMods::ABS) {
3407       return false;
3408     }
3409   }
3410 
3411   return true;
3412 }
3413 
3414 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3415 
3416   const unsigned Opc = Inst.getOpcode();
3417   const MCInstrDesc &Desc = MII.get(Opc);
3418 
3419   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3420     return true;
3421 
3422   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3423   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3424     if (isCI() || isSI())
3425       return false;
3426   }
3427 
3428   return true;
3429 }
3430 
3431 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3432   const unsigned Opc = Inst.getOpcode();
3433   const MCInstrDesc &Desc = MII.get(Opc);
3434 
3435   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3436     return true;
3437 
3438   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3439   if (DimIdx < 0)
3440     return true;
3441 
3442   long Imm = Inst.getOperand(DimIdx).getImm();
3443   if (Imm < 0 || Imm >= 8)
3444     return false;
3445 
3446   return true;
3447 }
3448 
3449 static bool IsRevOpcode(const unsigned Opcode)
3450 {
3451   switch (Opcode) {
3452   case AMDGPU::V_SUBREV_F32_e32:
3453   case AMDGPU::V_SUBREV_F32_e64:
3454   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3455   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3456   case AMDGPU::V_SUBREV_F32_e32_vi:
3457   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3458   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3459   case AMDGPU::V_SUBREV_F32_e64_vi:
3460 
3461   case AMDGPU::V_SUBREV_CO_U32_e32:
3462   case AMDGPU::V_SUBREV_CO_U32_e64:
3463   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3464   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3465 
3466   case AMDGPU::V_SUBBREV_U32_e32:
3467   case AMDGPU::V_SUBBREV_U32_e64:
3468   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3469   case AMDGPU::V_SUBBREV_U32_e32_vi:
3470   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3471   case AMDGPU::V_SUBBREV_U32_e64_vi:
3472 
3473   case AMDGPU::V_SUBREV_U32_e32:
3474   case AMDGPU::V_SUBREV_U32_e64:
3475   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3476   case AMDGPU::V_SUBREV_U32_e32_vi:
3477   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3478   case AMDGPU::V_SUBREV_U32_e64_vi:
3479 
3480   case AMDGPU::V_SUBREV_F16_e32:
3481   case AMDGPU::V_SUBREV_F16_e64:
3482   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3483   case AMDGPU::V_SUBREV_F16_e32_vi:
3484   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3485   case AMDGPU::V_SUBREV_F16_e64_vi:
3486 
3487   case AMDGPU::V_SUBREV_U16_e32:
3488   case AMDGPU::V_SUBREV_U16_e64:
3489   case AMDGPU::V_SUBREV_U16_e32_vi:
3490   case AMDGPU::V_SUBREV_U16_e64_vi:
3491 
3492   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3493   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3494   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3495 
3496   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3497   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3498 
3499   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3500   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3501 
3502   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3503   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3504 
3505   case AMDGPU::V_LSHRREV_B32_e32:
3506   case AMDGPU::V_LSHRREV_B32_e64:
3507   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3508   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3509   case AMDGPU::V_LSHRREV_B32_e32_vi:
3510   case AMDGPU::V_LSHRREV_B32_e64_vi:
3511   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3512   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3513 
3514   case AMDGPU::V_ASHRREV_I32_e32:
3515   case AMDGPU::V_ASHRREV_I32_e64:
3516   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3517   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3518   case AMDGPU::V_ASHRREV_I32_e32_vi:
3519   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3520   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3521   case AMDGPU::V_ASHRREV_I32_e64_vi:
3522 
3523   case AMDGPU::V_LSHLREV_B32_e32:
3524   case AMDGPU::V_LSHLREV_B32_e64:
3525   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3526   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3527   case AMDGPU::V_LSHLREV_B32_e32_vi:
3528   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3529   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3530   case AMDGPU::V_LSHLREV_B32_e64_vi:
3531 
3532   case AMDGPU::V_LSHLREV_B16_e32:
3533   case AMDGPU::V_LSHLREV_B16_e64:
3534   case AMDGPU::V_LSHLREV_B16_e32_vi:
3535   case AMDGPU::V_LSHLREV_B16_e64_vi:
3536   case AMDGPU::V_LSHLREV_B16_gfx10:
3537 
3538   case AMDGPU::V_LSHRREV_B16_e32:
3539   case AMDGPU::V_LSHRREV_B16_e64:
3540   case AMDGPU::V_LSHRREV_B16_e32_vi:
3541   case AMDGPU::V_LSHRREV_B16_e64_vi:
3542   case AMDGPU::V_LSHRREV_B16_gfx10:
3543 
3544   case AMDGPU::V_ASHRREV_I16_e32:
3545   case AMDGPU::V_ASHRREV_I16_e64:
3546   case AMDGPU::V_ASHRREV_I16_e32_vi:
3547   case AMDGPU::V_ASHRREV_I16_e64_vi:
3548   case AMDGPU::V_ASHRREV_I16_gfx10:
3549 
3550   case AMDGPU::V_LSHLREV_B64:
3551   case AMDGPU::V_LSHLREV_B64_gfx10:
3552   case AMDGPU::V_LSHLREV_B64_vi:
3553 
3554   case AMDGPU::V_LSHRREV_B64:
3555   case AMDGPU::V_LSHRREV_B64_gfx10:
3556   case AMDGPU::V_LSHRREV_B64_vi:
3557 
3558   case AMDGPU::V_ASHRREV_I64:
3559   case AMDGPU::V_ASHRREV_I64_gfx10:
3560   case AMDGPU::V_ASHRREV_I64_vi:
3561 
3562   case AMDGPU::V_PK_LSHLREV_B16:
3563   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3564   case AMDGPU::V_PK_LSHLREV_B16_vi:
3565 
3566   case AMDGPU::V_PK_LSHRREV_B16:
3567   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3568   case AMDGPU::V_PK_LSHRREV_B16_vi:
3569   case AMDGPU::V_PK_ASHRREV_I16:
3570   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3571   case AMDGPU::V_PK_ASHRREV_I16_vi:
3572     return true;
3573   default:
3574     return false;
3575   }
3576 }
3577 
3578 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3579 
3580   using namespace SIInstrFlags;
3581   const unsigned Opcode = Inst.getOpcode();
3582   const MCInstrDesc &Desc = MII.get(Opcode);
3583 
3584   // lds_direct register is defined so that it can be used
3585   // with 9-bit operands only. Ignore encodings which do not accept these.
3586   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3587     return true;
3588 
3589   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3590   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3591   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3592 
3593   const int SrcIndices[] = { Src1Idx, Src2Idx };
3594 
3595   // lds_direct cannot be specified as either src1 or src2.
3596   for (int SrcIdx : SrcIndices) {
3597     if (SrcIdx == -1) break;
3598     const MCOperand &Src = Inst.getOperand(SrcIdx);
3599     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3600       return false;
3601     }
3602   }
3603 
3604   if (Src0Idx == -1)
3605     return true;
3606 
3607   const MCOperand &Src = Inst.getOperand(Src0Idx);
3608   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3609     return true;
3610 
3611   // lds_direct is specified as src0. Check additional limitations.
3612   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3613 }
3614 
3615 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3616   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3617     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3618     if (Op.isFlatOffset())
3619       return Op.getStartLoc();
3620   }
3621   return getLoc();
3622 }
3623 
3624 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3625                                          const OperandVector &Operands) {
3626   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3627   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3628     return true;
3629 
3630   auto Opcode = Inst.getOpcode();
3631   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3632   assert(OpNum != -1);
3633 
3634   const auto &Op = Inst.getOperand(OpNum);
3635   if (!hasFlatOffsets() && Op.getImm() != 0) {
3636     Error(getFlatOffsetLoc(Operands),
3637           "flat offset modifier is not supported on this GPU");
3638     return false;
3639   }
3640 
3641   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3642   // For FLAT segment the offset must be positive;
3643   // MSB is ignored and forced to zero.
3644   unsigned OffsetSize = isGFX9() ? 13 : 12;
3645   if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) {
3646     if (!isIntN(OffsetSize, Op.getImm())) {
3647       Error(getFlatOffsetLoc(Operands),
3648             isGFX9() ? "expected a 13-bit signed offset" :
3649                        "expected a 12-bit signed offset");
3650       return false;
3651     }
3652   } else {
3653     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3654       Error(getFlatOffsetLoc(Operands),
3655             isGFX9() ? "expected a 12-bit unsigned offset" :
3656                        "expected an 11-bit unsigned offset");
3657       return false;
3658     }
3659   }
3660 
3661   return true;
3662 }
3663 
3664 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3665   // Start with second operand because SMEM Offset cannot be dst or src0.
3666   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3667     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3668     if (Op.isSMEMOffset())
3669       return Op.getStartLoc();
3670   }
3671   return getLoc();
3672 }
3673 
3674 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3675                                          const OperandVector &Operands) {
3676   if (isCI() || isSI())
3677     return true;
3678 
3679   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3680   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3681     return true;
3682 
3683   auto Opcode = Inst.getOpcode();
3684   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3685   if (OpNum == -1)
3686     return true;
3687 
3688   const auto &Op = Inst.getOperand(OpNum);
3689   if (!Op.isImm())
3690     return true;
3691 
3692   uint64_t Offset = Op.getImm();
3693   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3694   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3695       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3696     return true;
3697 
3698   Error(getSMEMOffsetLoc(Operands),
3699         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3700                                "expected a 21-bit signed offset");
3701 
3702   return false;
3703 }
3704 
3705 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3706   unsigned Opcode = Inst.getOpcode();
3707   const MCInstrDesc &Desc = MII.get(Opcode);
3708   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3709     return true;
3710 
3711   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3712   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3713 
3714   const int OpIndices[] = { Src0Idx, Src1Idx };
3715 
3716   unsigned NumExprs = 0;
3717   unsigned NumLiterals = 0;
3718   uint32_t LiteralValue;
3719 
3720   for (int OpIdx : OpIndices) {
3721     if (OpIdx == -1) break;
3722 
3723     const MCOperand &MO = Inst.getOperand(OpIdx);
3724     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3725     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3726       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3727         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3728         if (NumLiterals == 0 || LiteralValue != Value) {
3729           LiteralValue = Value;
3730           ++NumLiterals;
3731         }
3732       } else if (MO.isExpr()) {
3733         ++NumExprs;
3734       }
3735     }
3736   }
3737 
3738   return NumLiterals + NumExprs <= 1;
3739 }
3740 
3741 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3742   const unsigned Opc = Inst.getOpcode();
3743   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3744       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3745     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3746     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3747 
3748     if (OpSel & ~3)
3749       return false;
3750   }
3751   return true;
3752 }
3753 
3754 // Check if VCC register matches wavefront size
3755 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3756   auto FB = getFeatureBits();
3757   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3758     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3759 }
3760 
3761 // VOP3 literal is only allowed in GFX10+ and only one can be used
3762 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3763                                           const OperandVector &Operands) {
3764   unsigned Opcode = Inst.getOpcode();
3765   const MCInstrDesc &Desc = MII.get(Opcode);
3766   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3767     return true;
3768 
3769   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3770   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3771   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3772 
3773   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3774 
3775   unsigned NumExprs = 0;
3776   unsigned NumLiterals = 0;
3777   uint32_t LiteralValue;
3778 
3779   for (int OpIdx : OpIndices) {
3780     if (OpIdx == -1) break;
3781 
3782     const MCOperand &MO = Inst.getOperand(OpIdx);
3783     if (!MO.isImm() && !MO.isExpr())
3784       continue;
3785     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3786       continue;
3787 
3788     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3789         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
3790       Error(getConstLoc(Operands),
3791             "inline constants are not allowed for this operand");
3792       return false;
3793     }
3794 
3795     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3796       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3797       if (NumLiterals == 0 || LiteralValue != Value) {
3798         LiteralValue = Value;
3799         ++NumLiterals;
3800       }
3801     } else if (MO.isExpr()) {
3802       ++NumExprs;
3803     }
3804   }
3805   NumLiterals += NumExprs;
3806 
3807   if (!NumLiterals)
3808     return true;
3809 
3810   if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
3811     Error(getLitLoc(Operands), "literal operands are not supported");
3812     return false;
3813   }
3814 
3815   if (NumLiterals > 1) {
3816     Error(getLitLoc(Operands), "only one literal operand is allowed");
3817     return false;
3818   }
3819 
3820   return true;
3821 }
3822 
3823 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
3824                                             const OperandVector &Operands,
3825                                             const SMLoc &IDLoc) {
3826   int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
3827                                           AMDGPU::OpName::glc1);
3828   if (GLCPos != -1) {
3829     // -1 is set by GLC_1 default operand. In all cases "glc" must be present
3830     // in the asm string, and the default value means it is not present.
3831     if (Inst.getOperand(GLCPos).getImm() == -1) {
3832       Error(IDLoc, "instruction must use glc");
3833       return false;
3834     }
3835   }
3836 
3837   return true;
3838 }
3839 
3840 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3841                                           const SMLoc &IDLoc,
3842                                           const OperandVector &Operands) {
3843   if (!validateLdsDirect(Inst)) {
3844     Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands),
3845       "invalid use of lds_direct");
3846     return false;
3847   }
3848   if (!validateSOPLiteral(Inst)) {
3849     Error(getLitLoc(Operands),
3850       "only one literal operand is allowed");
3851     return false;
3852   }
3853   if (!validateVOP3Literal(Inst, Operands)) {
3854     return false;
3855   }
3856   if (!validateConstantBusLimitations(Inst, Operands)) {
3857     return false;
3858   }
3859   if (!validateEarlyClobberLimitations(Inst, Operands)) {
3860     return false;
3861   }
3862   if (!validateIntClampSupported(Inst)) {
3863     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
3864       "integer clamping is not supported on this GPU");
3865     return false;
3866   }
3867   if (!validateOpSel(Inst)) {
3868     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
3869       "invalid op_sel operand");
3870     return false;
3871   }
3872   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3873   if (!validateMIMGD16(Inst)) {
3874     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
3875       "d16 modifier is not supported on this GPU");
3876     return false;
3877   }
3878   if (!validateMIMGDim(Inst)) {
3879     Error(IDLoc, "dim modifier is required on this GPU");
3880     return false;
3881   }
3882   if (!validateMIMGDataSize(Inst)) {
3883     Error(IDLoc,
3884       "image data size does not match dmask and tfe");
3885     return false;
3886   }
3887   if (!validateMIMGAddrSize(Inst)) {
3888     Error(IDLoc,
3889       "image address size does not match dim and a16");
3890     return false;
3891   }
3892   if (!validateMIMGAtomicDMask(Inst)) {
3893     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
3894       "invalid atomic image dmask");
3895     return false;
3896   }
3897   if (!validateMIMGGatherDMask(Inst)) {
3898     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
3899       "invalid image_gather dmask: only one bit must be set");
3900     return false;
3901   }
3902   if (!validateMovrels(Inst)) {
3903     Error(IDLoc, "source operand must be a VGPR");
3904     return false;
3905   }
3906   if (!validateFlatOffset(Inst, Operands)) {
3907     return false;
3908   }
3909   if (!validateSMEMOffset(Inst, Operands)) {
3910     return false;
3911   }
3912   if (!validateMAIAccWrite(Inst, Operands)) {
3913     return false;
3914   }
3915   if (!validateDivScale(Inst)) {
3916     Error(IDLoc, "ABS not allowed in VOP3B instructions");
3917     return false;
3918   }
3919   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
3920     return false;
3921   }
3922 
3923   return true;
3924 }
3925 
3926 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3927                                             const FeatureBitset &FBS,
3928                                             unsigned VariantID = 0);
3929 
3930 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
3931                                 const FeatureBitset &AvailableFeatures,
3932                                 unsigned VariantID);
3933 
3934 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3935                                        const FeatureBitset &FBS) {
3936   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
3937 }
3938 
3939 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3940                                        const FeatureBitset &FBS,
3941                                        ArrayRef<unsigned> Variants) {
3942   for (auto Variant : Variants) {
3943     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
3944       return true;
3945   }
3946 
3947   return false;
3948 }
3949 
3950 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
3951                                                   const SMLoc &IDLoc) {
3952   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3953 
3954   // Check if requested instruction variant is supported.
3955   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
3956     return false;
3957 
3958   // This instruction is not supported.
3959   // Clear any other pending errors because they are no longer relevant.
3960   getParser().clearPendingErrors();
3961 
3962   // Requested instruction variant is not supported.
3963   // Check if any other variants are supported.
3964   StringRef VariantName = getMatchedVariantName();
3965   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
3966     return Error(IDLoc,
3967                  Twine(VariantName,
3968                        " variant of this instruction is not supported"));
3969   }
3970 
3971   // Finally check if this instruction is supported on any other GPU.
3972   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
3973     return Error(IDLoc, "instruction not supported on this GPU");
3974   }
3975 
3976   // Instruction not supported on any GPU. Probably a typo.
3977   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
3978   return Error(IDLoc, "invalid instruction" + Suggestion);
3979 }
3980 
3981 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3982                                               OperandVector &Operands,
3983                                               MCStreamer &Out,
3984                                               uint64_t &ErrorInfo,
3985                                               bool MatchingInlineAsm) {
3986   MCInst Inst;
3987   unsigned Result = Match_Success;
3988   for (auto Variant : getMatchedVariants()) {
3989     uint64_t EI;
3990     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3991                                   Variant);
3992     // We order match statuses from least to most specific. We use most specific
3993     // status as resulting
3994     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3995     if ((R == Match_Success) ||
3996         (R == Match_PreferE32) ||
3997         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3998         (R == Match_InvalidOperand && Result != Match_MissingFeature
3999                                    && Result != Match_PreferE32) ||
4000         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4001                                    && Result != Match_MissingFeature
4002                                    && Result != Match_PreferE32)) {
4003       Result = R;
4004       ErrorInfo = EI;
4005     }
4006     if (R == Match_Success)
4007       break;
4008   }
4009 
4010   if (Result == Match_Success) {
4011     if (!validateInstruction(Inst, IDLoc, Operands)) {
4012       return true;
4013     }
4014     Inst.setLoc(IDLoc);
4015     Out.emitInstruction(Inst, getSTI());
4016     return false;
4017   }
4018 
4019   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4020   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4021     return true;
4022   }
4023 
4024   switch (Result) {
4025   default: break;
4026   case Match_MissingFeature:
4027     // It has been verified that the specified instruction
4028     // mnemonic is valid. A match was found but it requires
4029     // features which are not supported on this GPU.
4030     return Error(IDLoc, "operands are not valid for this GPU or mode");
4031 
4032   case Match_InvalidOperand: {
4033     SMLoc ErrorLoc = IDLoc;
4034     if (ErrorInfo != ~0ULL) {
4035       if (ErrorInfo >= Operands.size()) {
4036         return Error(IDLoc, "too few operands for instruction");
4037       }
4038       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4039       if (ErrorLoc == SMLoc())
4040         ErrorLoc = IDLoc;
4041     }
4042     return Error(ErrorLoc, "invalid operand for instruction");
4043   }
4044 
4045   case Match_PreferE32:
4046     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4047                         "should be encoded as e32");
4048   case Match_MnemonicFail:
4049     llvm_unreachable("Invalid instructions should have been handled already");
4050   }
4051   llvm_unreachable("Implement any new match types added!");
4052 }
4053 
4054 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4055   int64_t Tmp = -1;
4056   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
4057     return true;
4058   }
4059   if (getParser().parseAbsoluteExpression(Tmp)) {
4060     return true;
4061   }
4062   Ret = static_cast<uint32_t>(Tmp);
4063   return false;
4064 }
4065 
4066 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4067                                                uint32_t &Minor) {
4068   if (ParseAsAbsoluteExpression(Major))
4069     return TokError("invalid major version");
4070 
4071   if (getLexer().isNot(AsmToken::Comma))
4072     return TokError("minor version number required, comma expected");
4073   Lex();
4074 
4075   if (ParseAsAbsoluteExpression(Minor))
4076     return TokError("invalid minor version");
4077 
4078   return false;
4079 }
4080 
4081 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4082   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4083     return TokError("directive only supported for amdgcn architecture");
4084 
4085   std::string Target;
4086 
4087   SMLoc TargetStart = getTok().getLoc();
4088   if (getParser().parseEscapedString(Target))
4089     return true;
4090   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4091 
4092   std::string ExpectedTarget;
4093   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
4094   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
4095 
4096   if (Target != ExpectedTargetOS.str())
4097     return getParser().Error(TargetRange.Start, "target must match options",
4098                              TargetRange);
4099 
4100   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
4101   return false;
4102 }
4103 
4104 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4105   return getParser().Error(Range.Start, "value out of range", Range);
4106 }
4107 
4108 bool AMDGPUAsmParser::calculateGPRBlocks(
4109     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4110     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4111     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4112     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4113   // TODO(scott.linder): These calculations are duplicated from
4114   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4115   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4116 
4117   unsigned NumVGPRs = NextFreeVGPR;
4118   unsigned NumSGPRs = NextFreeSGPR;
4119 
4120   if (Version.Major >= 10)
4121     NumSGPRs = 0;
4122   else {
4123     unsigned MaxAddressableNumSGPRs =
4124         IsaInfo::getAddressableNumSGPRs(&getSTI());
4125 
4126     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4127         NumSGPRs > MaxAddressableNumSGPRs)
4128       return OutOfRangeError(SGPRRange);
4129 
4130     NumSGPRs +=
4131         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4132 
4133     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4134         NumSGPRs > MaxAddressableNumSGPRs)
4135       return OutOfRangeError(SGPRRange);
4136 
4137     if (Features.test(FeatureSGPRInitBug))
4138       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4139   }
4140 
4141   VGPRBlocks =
4142       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4143   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4144 
4145   return false;
4146 }
4147 
4148 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4149   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4150     return TokError("directive only supported for amdgcn architecture");
4151 
4152   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4153     return TokError("directive only supported for amdhsa OS");
4154 
4155   StringRef KernelName;
4156   if (getParser().parseIdentifier(KernelName))
4157     return true;
4158 
4159   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4160 
4161   StringSet<> Seen;
4162 
4163   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4164 
4165   SMRange VGPRRange;
4166   uint64_t NextFreeVGPR = 0;
4167   SMRange SGPRRange;
4168   uint64_t NextFreeSGPR = 0;
4169   unsigned UserSGPRCount = 0;
4170   bool ReserveVCC = true;
4171   bool ReserveFlatScr = true;
4172   bool ReserveXNACK = hasXNACK();
4173   Optional<bool> EnableWavefrontSize32;
4174 
4175   while (true) {
4176     while (getLexer().is(AsmToken::EndOfStatement))
4177       Lex();
4178 
4179     if (getLexer().isNot(AsmToken::Identifier))
4180       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
4181 
4182     StringRef ID = getTok().getIdentifier();
4183     SMRange IDRange = getTok().getLocRange();
4184     Lex();
4185 
4186     if (ID == ".end_amdhsa_kernel")
4187       break;
4188 
4189     if (Seen.find(ID) != Seen.end())
4190       return TokError(".amdhsa_ directives cannot be repeated");
4191     Seen.insert(ID);
4192 
4193     SMLoc ValStart = getTok().getLoc();
4194     int64_t IVal;
4195     if (getParser().parseAbsoluteExpression(IVal))
4196       return true;
4197     SMLoc ValEnd = getTok().getLoc();
4198     SMRange ValRange = SMRange(ValStart, ValEnd);
4199 
4200     if (IVal < 0)
4201       return OutOfRangeError(ValRange);
4202 
4203     uint64_t Val = IVal;
4204 
4205 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4206   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4207     return OutOfRangeError(RANGE);                                             \
4208   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4209 
4210     if (ID == ".amdhsa_group_segment_fixed_size") {
4211       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4212         return OutOfRangeError(ValRange);
4213       KD.group_segment_fixed_size = Val;
4214     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4215       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4216         return OutOfRangeError(ValRange);
4217       KD.private_segment_fixed_size = Val;
4218     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4219       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4220                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4221                        Val, ValRange);
4222       if (Val)
4223         UserSGPRCount += 4;
4224     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4225       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4226                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4227                        ValRange);
4228       if (Val)
4229         UserSGPRCount += 2;
4230     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4231       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4232                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4233                        ValRange);
4234       if (Val)
4235         UserSGPRCount += 2;
4236     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4237       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4238                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4239                        Val, ValRange);
4240       if (Val)
4241         UserSGPRCount += 2;
4242     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4243       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4244                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4245                        ValRange);
4246       if (Val)
4247         UserSGPRCount += 2;
4248     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4249       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4250                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4251                        ValRange);
4252       if (Val)
4253         UserSGPRCount += 2;
4254     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4255       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4256                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4257                        Val, ValRange);
4258       if (Val)
4259         UserSGPRCount += 1;
4260     } else if (ID == ".amdhsa_wavefront_size32") {
4261       if (IVersion.Major < 10)
4262         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4263                                  IDRange);
4264       EnableWavefrontSize32 = Val;
4265       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4266                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4267                        Val, ValRange);
4268     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4269       PARSE_BITS_ENTRY(
4270           KD.compute_pgm_rsrc2,
4271           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
4272           ValRange);
4273     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4274       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4275                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4276                        ValRange);
4277     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4278       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4279                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4280                        ValRange);
4281     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4282       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4283                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4284                        ValRange);
4285     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4286       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4287                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4288                        ValRange);
4289     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4290       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4291                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4292                        ValRange);
4293     } else if (ID == ".amdhsa_next_free_vgpr") {
4294       VGPRRange = ValRange;
4295       NextFreeVGPR = Val;
4296     } else if (ID == ".amdhsa_next_free_sgpr") {
4297       SGPRRange = ValRange;
4298       NextFreeSGPR = Val;
4299     } else if (ID == ".amdhsa_reserve_vcc") {
4300       if (!isUInt<1>(Val))
4301         return OutOfRangeError(ValRange);
4302       ReserveVCC = Val;
4303     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4304       if (IVersion.Major < 7)
4305         return getParser().Error(IDRange.Start, "directive requires gfx7+",
4306                                  IDRange);
4307       if (!isUInt<1>(Val))
4308         return OutOfRangeError(ValRange);
4309       ReserveFlatScr = Val;
4310     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4311       if (IVersion.Major < 8)
4312         return getParser().Error(IDRange.Start, "directive requires gfx8+",
4313                                  IDRange);
4314       if (!isUInt<1>(Val))
4315         return OutOfRangeError(ValRange);
4316       ReserveXNACK = Val;
4317     } else if (ID == ".amdhsa_float_round_mode_32") {
4318       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4319                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4320     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4321       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4322                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4323     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4324       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4325                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4326     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4327       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4328                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4329                        ValRange);
4330     } else if (ID == ".amdhsa_dx10_clamp") {
4331       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4332                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4333     } else if (ID == ".amdhsa_ieee_mode") {
4334       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4335                        Val, ValRange);
4336     } else if (ID == ".amdhsa_fp16_overflow") {
4337       if (IVersion.Major < 9)
4338         return getParser().Error(IDRange.Start, "directive requires gfx9+",
4339                                  IDRange);
4340       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4341                        ValRange);
4342     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4343       if (IVersion.Major < 10)
4344         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4345                                  IDRange);
4346       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4347                        ValRange);
4348     } else if (ID == ".amdhsa_memory_ordered") {
4349       if (IVersion.Major < 10)
4350         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4351                                  IDRange);
4352       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4353                        ValRange);
4354     } else if (ID == ".amdhsa_forward_progress") {
4355       if (IVersion.Major < 10)
4356         return getParser().Error(IDRange.Start, "directive requires gfx10+",
4357                                  IDRange);
4358       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4359                        ValRange);
4360     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4361       PARSE_BITS_ENTRY(
4362           KD.compute_pgm_rsrc2,
4363           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4364           ValRange);
4365     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4366       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4367                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4368                        Val, ValRange);
4369     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4370       PARSE_BITS_ENTRY(
4371           KD.compute_pgm_rsrc2,
4372           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4373           ValRange);
4374     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4375       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4376                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4377                        Val, ValRange);
4378     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4379       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4380                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4381                        Val, ValRange);
4382     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4383       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4384                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4385                        Val, ValRange);
4386     } else if (ID == ".amdhsa_exception_int_div_zero") {
4387       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4388                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4389                        Val, ValRange);
4390     } else {
4391       return getParser().Error(IDRange.Start,
4392                                "unknown .amdhsa_kernel directive", IDRange);
4393     }
4394 
4395 #undef PARSE_BITS_ENTRY
4396   }
4397 
4398   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4399     return TokError(".amdhsa_next_free_vgpr directive is required");
4400 
4401   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4402     return TokError(".amdhsa_next_free_sgpr directive is required");
4403 
4404   unsigned VGPRBlocks;
4405   unsigned SGPRBlocks;
4406   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4407                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4408                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4409                          SGPRBlocks))
4410     return true;
4411 
4412   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4413           VGPRBlocks))
4414     return OutOfRangeError(VGPRRange);
4415   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4416                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4417 
4418   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4419           SGPRBlocks))
4420     return OutOfRangeError(SGPRRange);
4421   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4422                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4423                   SGPRBlocks);
4424 
4425   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4426     return TokError("too many user SGPRs enabled");
4427   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4428                   UserSGPRCount);
4429 
4430   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4431       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4432       ReserveFlatScr, ReserveXNACK);
4433   return false;
4434 }
4435 
4436 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4437   uint32_t Major;
4438   uint32_t Minor;
4439 
4440   if (ParseDirectiveMajorMinor(Major, Minor))
4441     return true;
4442 
4443   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4444   return false;
4445 }
4446 
4447 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4448   uint32_t Major;
4449   uint32_t Minor;
4450   uint32_t Stepping;
4451   StringRef VendorName;
4452   StringRef ArchName;
4453 
4454   // If this directive has no arguments, then use the ISA version for the
4455   // targeted GPU.
4456   if (getLexer().is(AsmToken::EndOfStatement)) {
4457     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4458     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4459                                                       ISA.Stepping,
4460                                                       "AMD", "AMDGPU");
4461     return false;
4462   }
4463 
4464   if (ParseDirectiveMajorMinor(Major, Minor))
4465     return true;
4466 
4467   if (getLexer().isNot(AsmToken::Comma))
4468     return TokError("stepping version number required, comma expected");
4469   Lex();
4470 
4471   if (ParseAsAbsoluteExpression(Stepping))
4472     return TokError("invalid stepping version");
4473 
4474   if (getLexer().isNot(AsmToken::Comma))
4475     return TokError("vendor name required, comma expected");
4476   Lex();
4477 
4478   if (getLexer().isNot(AsmToken::String))
4479     return TokError("invalid vendor name");
4480 
4481   VendorName = getLexer().getTok().getStringContents();
4482   Lex();
4483 
4484   if (getLexer().isNot(AsmToken::Comma))
4485     return TokError("arch name required, comma expected");
4486   Lex();
4487 
4488   if (getLexer().isNot(AsmToken::String))
4489     return TokError("invalid arch name");
4490 
4491   ArchName = getLexer().getTok().getStringContents();
4492   Lex();
4493 
4494   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4495                                                     VendorName, ArchName);
4496   return false;
4497 }
4498 
4499 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4500                                                amd_kernel_code_t &Header) {
4501   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4502   // assembly for backwards compatibility.
4503   if (ID == "max_scratch_backing_memory_byte_size") {
4504     Parser.eatToEndOfStatement();
4505     return false;
4506   }
4507 
4508   SmallString<40> ErrStr;
4509   raw_svector_ostream Err(ErrStr);
4510   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4511     return TokError(Err.str());
4512   }
4513   Lex();
4514 
4515   if (ID == "enable_wavefront_size32") {
4516     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4517       if (!isGFX10Plus())
4518         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4519       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4520         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4521     } else {
4522       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4523         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4524     }
4525   }
4526 
4527   if (ID == "wavefront_size") {
4528     if (Header.wavefront_size == 5) {
4529       if (!isGFX10Plus())
4530         return TokError("wavefront_size=5 is only allowed on GFX10+");
4531       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4532         return TokError("wavefront_size=5 requires +WavefrontSize32");
4533     } else if (Header.wavefront_size == 6) {
4534       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4535         return TokError("wavefront_size=6 requires +WavefrontSize64");
4536     }
4537   }
4538 
4539   if (ID == "enable_wgp_mode") {
4540     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4541         !isGFX10Plus())
4542       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4543   }
4544 
4545   if (ID == "enable_mem_ordered") {
4546     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4547         !isGFX10Plus())
4548       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4549   }
4550 
4551   if (ID == "enable_fwd_progress") {
4552     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4553         !isGFX10Plus())
4554       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4555   }
4556 
4557   return false;
4558 }
4559 
4560 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4561   amd_kernel_code_t Header;
4562   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4563 
4564   while (true) {
4565     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4566     // will set the current token to EndOfStatement.
4567     while(getLexer().is(AsmToken::EndOfStatement))
4568       Lex();
4569 
4570     if (getLexer().isNot(AsmToken::Identifier))
4571       return TokError("expected value identifier or .end_amd_kernel_code_t");
4572 
4573     StringRef ID = getLexer().getTok().getIdentifier();
4574     Lex();
4575 
4576     if (ID == ".end_amd_kernel_code_t")
4577       break;
4578 
4579     if (ParseAMDKernelCodeTValue(ID, Header))
4580       return true;
4581   }
4582 
4583   getTargetStreamer().EmitAMDKernelCodeT(Header);
4584 
4585   return false;
4586 }
4587 
4588 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4589   if (getLexer().isNot(AsmToken::Identifier))
4590     return TokError("expected symbol name");
4591 
4592   StringRef KernelName = Parser.getTok().getString();
4593 
4594   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4595                                            ELF::STT_AMDGPU_HSA_KERNEL);
4596   Lex();
4597 
4598   KernelScope.initialize(getContext());
4599   return false;
4600 }
4601 
4602 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4603   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4604     return Error(getParser().getTok().getLoc(),
4605                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4606                  "architectures");
4607   }
4608 
4609   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4610 
4611   std::string ISAVersionStringFromSTI;
4612   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4613   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4614 
4615   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4616     return Error(getParser().getTok().getLoc(),
4617                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4618                  "arguments specified through the command line");
4619   }
4620 
4621   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4622   Lex();
4623 
4624   return false;
4625 }
4626 
4627 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4628   const char *AssemblerDirectiveBegin;
4629   const char *AssemblerDirectiveEnd;
4630   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4631       isHsaAbiVersion3(&getSTI())
4632           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4633                             HSAMD::V3::AssemblerDirectiveEnd)
4634           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4635                             HSAMD::AssemblerDirectiveEnd);
4636 
4637   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4638     return Error(getParser().getTok().getLoc(),
4639                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4640                  "not available on non-amdhsa OSes")).str());
4641   }
4642 
4643   std::string HSAMetadataString;
4644   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4645                           HSAMetadataString))
4646     return true;
4647 
4648   if (isHsaAbiVersion3(&getSTI())) {
4649     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4650       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4651   } else {
4652     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4653       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4654   }
4655 
4656   return false;
4657 }
4658 
4659 /// Common code to parse out a block of text (typically YAML) between start and
4660 /// end directives.
4661 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4662                                           const char *AssemblerDirectiveEnd,
4663                                           std::string &CollectString) {
4664 
4665   raw_string_ostream CollectStream(CollectString);
4666 
4667   getLexer().setSkipSpace(false);
4668 
4669   bool FoundEnd = false;
4670   while (!getLexer().is(AsmToken::Eof)) {
4671     while (getLexer().is(AsmToken::Space)) {
4672       CollectStream << getLexer().getTok().getString();
4673       Lex();
4674     }
4675 
4676     if (getLexer().is(AsmToken::Identifier)) {
4677       StringRef ID = getLexer().getTok().getIdentifier();
4678       if (ID == AssemblerDirectiveEnd) {
4679         Lex();
4680         FoundEnd = true;
4681         break;
4682       }
4683     }
4684 
4685     CollectStream << Parser.parseStringToEndOfStatement()
4686                   << getContext().getAsmInfo()->getSeparatorString();
4687 
4688     Parser.eatToEndOfStatement();
4689   }
4690 
4691   getLexer().setSkipSpace(true);
4692 
4693   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4694     return TokError(Twine("expected directive ") +
4695                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4696   }
4697 
4698   CollectStream.flush();
4699   return false;
4700 }
4701 
4702 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4703 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4704   std::string String;
4705   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4706                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4707     return true;
4708 
4709   auto PALMetadata = getTargetStreamer().getPALMetadata();
4710   if (!PALMetadata->setFromString(String))
4711     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4712   return false;
4713 }
4714 
4715 /// Parse the assembler directive for old linear-format PAL metadata.
4716 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4717   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4718     return Error(getParser().getTok().getLoc(),
4719                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4720                  "not available on non-amdpal OSes")).str());
4721   }
4722 
4723   auto PALMetadata = getTargetStreamer().getPALMetadata();
4724   PALMetadata->setLegacy();
4725   for (;;) {
4726     uint32_t Key, Value;
4727     if (ParseAsAbsoluteExpression(Key)) {
4728       return TokError(Twine("invalid value in ") +
4729                       Twine(PALMD::AssemblerDirective));
4730     }
4731     if (getLexer().isNot(AsmToken::Comma)) {
4732       return TokError(Twine("expected an even number of values in ") +
4733                       Twine(PALMD::AssemblerDirective));
4734     }
4735     Lex();
4736     if (ParseAsAbsoluteExpression(Value)) {
4737       return TokError(Twine("invalid value in ") +
4738                       Twine(PALMD::AssemblerDirective));
4739     }
4740     PALMetadata->setRegister(Key, Value);
4741     if (getLexer().isNot(AsmToken::Comma))
4742       break;
4743     Lex();
4744   }
4745   return false;
4746 }
4747 
4748 /// ParseDirectiveAMDGPULDS
4749 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4750 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4751   if (getParser().checkForValidSection())
4752     return true;
4753 
4754   StringRef Name;
4755   SMLoc NameLoc = getLexer().getLoc();
4756   if (getParser().parseIdentifier(Name))
4757     return TokError("expected identifier in directive");
4758 
4759   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4760   if (parseToken(AsmToken::Comma, "expected ','"))
4761     return true;
4762 
4763   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4764 
4765   int64_t Size;
4766   SMLoc SizeLoc = getLexer().getLoc();
4767   if (getParser().parseAbsoluteExpression(Size))
4768     return true;
4769   if (Size < 0)
4770     return Error(SizeLoc, "size must be non-negative");
4771   if (Size > LocalMemorySize)
4772     return Error(SizeLoc, "size is too large");
4773 
4774   int64_t Alignment = 4;
4775   if (getLexer().is(AsmToken::Comma)) {
4776     Lex();
4777     SMLoc AlignLoc = getLexer().getLoc();
4778     if (getParser().parseAbsoluteExpression(Alignment))
4779       return true;
4780     if (Alignment < 0 || !isPowerOf2_64(Alignment))
4781       return Error(AlignLoc, "alignment must be a power of two");
4782 
4783     // Alignment larger than the size of LDS is possible in theory, as long
4784     // as the linker manages to place to symbol at address 0, but we do want
4785     // to make sure the alignment fits nicely into a 32-bit integer.
4786     if (Alignment >= 1u << 31)
4787       return Error(AlignLoc, "alignment is too large");
4788   }
4789 
4790   if (parseToken(AsmToken::EndOfStatement,
4791                  "unexpected token in '.amdgpu_lds' directive"))
4792     return true;
4793 
4794   Symbol->redefineIfPossible();
4795   if (!Symbol->isUndefined())
4796     return Error(NameLoc, "invalid symbol redefinition");
4797 
4798   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
4799   return false;
4800 }
4801 
4802 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4803   StringRef IDVal = DirectiveID.getString();
4804 
4805   if (isHsaAbiVersion3(&getSTI())) {
4806     if (IDVal == ".amdgcn_target")
4807       return ParseDirectiveAMDGCNTarget();
4808 
4809     if (IDVal == ".amdhsa_kernel")
4810       return ParseDirectiveAMDHSAKernel();
4811 
4812     // TODO: Restructure/combine with PAL metadata directive.
4813     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4814       return ParseDirectiveHSAMetadata();
4815   } else {
4816     if (IDVal == ".hsa_code_object_version")
4817       return ParseDirectiveHSACodeObjectVersion();
4818 
4819     if (IDVal == ".hsa_code_object_isa")
4820       return ParseDirectiveHSACodeObjectISA();
4821 
4822     if (IDVal == ".amd_kernel_code_t")
4823       return ParseDirectiveAMDKernelCodeT();
4824 
4825     if (IDVal == ".amdgpu_hsa_kernel")
4826       return ParseDirectiveAMDGPUHsaKernel();
4827 
4828     if (IDVal == ".amd_amdgpu_isa")
4829       return ParseDirectiveISAVersion();
4830 
4831     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4832       return ParseDirectiveHSAMetadata();
4833   }
4834 
4835   if (IDVal == ".amdgpu_lds")
4836     return ParseDirectiveAMDGPULDS();
4837 
4838   if (IDVal == PALMD::AssemblerDirectiveBegin)
4839     return ParseDirectivePALMetadataBegin();
4840 
4841   if (IDVal == PALMD::AssemblerDirective)
4842     return ParseDirectivePALMetadata();
4843 
4844   return true;
4845 }
4846 
4847 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4848                                            unsigned RegNo) const {
4849 
4850   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4851        R.isValid(); ++R) {
4852     if (*R == RegNo)
4853       return isGFX9Plus();
4854   }
4855 
4856   // GFX10 has 2 more SGPRs 104 and 105.
4857   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4858        R.isValid(); ++R) {
4859     if (*R == RegNo)
4860       return hasSGPR104_SGPR105();
4861   }
4862 
4863   switch (RegNo) {
4864   case AMDGPU::SRC_SHARED_BASE:
4865   case AMDGPU::SRC_SHARED_LIMIT:
4866   case AMDGPU::SRC_PRIVATE_BASE:
4867   case AMDGPU::SRC_PRIVATE_LIMIT:
4868   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4869     return isGFX9Plus();
4870   case AMDGPU::TBA:
4871   case AMDGPU::TBA_LO:
4872   case AMDGPU::TBA_HI:
4873   case AMDGPU::TMA:
4874   case AMDGPU::TMA_LO:
4875   case AMDGPU::TMA_HI:
4876     return !isGFX9Plus();
4877   case AMDGPU::XNACK_MASK:
4878   case AMDGPU::XNACK_MASK_LO:
4879   case AMDGPU::XNACK_MASK_HI:
4880     return (isVI() || isGFX9()) && hasXNACK();
4881   case AMDGPU::SGPR_NULL:
4882     return isGFX10Plus();
4883   default:
4884     break;
4885   }
4886 
4887   if (isCI())
4888     return true;
4889 
4890   if (isSI() || isGFX10Plus()) {
4891     // No flat_scr on SI.
4892     // On GFX10 flat scratch is not a valid register operand and can only be
4893     // accessed with s_setreg/s_getreg.
4894     switch (RegNo) {
4895     case AMDGPU::FLAT_SCR:
4896     case AMDGPU::FLAT_SCR_LO:
4897     case AMDGPU::FLAT_SCR_HI:
4898       return false;
4899     default:
4900       return true;
4901     }
4902   }
4903 
4904   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4905   // SI/CI have.
4906   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4907        R.isValid(); ++R) {
4908     if (*R == RegNo)
4909       return hasSGPR102_SGPR103();
4910   }
4911 
4912   return true;
4913 }
4914 
4915 OperandMatchResultTy
4916 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4917                               OperandMode Mode) {
4918   // Try to parse with a custom parser
4919   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4920 
4921   // If we successfully parsed the operand or if there as an error parsing,
4922   // we are done.
4923   //
4924   // If we are parsing after we reach EndOfStatement then this means we
4925   // are appending default values to the Operands list.  This is only done
4926   // by custom parser, so we shouldn't continue on to the generic parsing.
4927   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4928       getLexer().is(AsmToken::EndOfStatement))
4929     return ResTy;
4930 
4931   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4932     unsigned Prefix = Operands.size();
4933     SMLoc LBraceLoc = getTok().getLoc();
4934     Parser.Lex(); // eat the '['
4935 
4936     for (;;) {
4937       ResTy = parseReg(Operands);
4938       if (ResTy != MatchOperand_Success)
4939         return ResTy;
4940 
4941       if (getLexer().is(AsmToken::RBrac))
4942         break;
4943 
4944       if (getLexer().isNot(AsmToken::Comma))
4945         return MatchOperand_ParseFail;
4946       Parser.Lex();
4947     }
4948 
4949     if (Operands.size() - Prefix > 1) {
4950       Operands.insert(Operands.begin() + Prefix,
4951                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4952       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4953                                                     getTok().getLoc()));
4954     }
4955 
4956     Parser.Lex(); // eat the ']'
4957     return MatchOperand_Success;
4958   }
4959 
4960   return parseRegOrImm(Operands);
4961 }
4962 
4963 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4964   // Clear any forced encodings from the previous instruction.
4965   setForcedEncodingSize(0);
4966   setForcedDPP(false);
4967   setForcedSDWA(false);
4968 
4969   if (Name.endswith("_e64")) {
4970     setForcedEncodingSize(64);
4971     return Name.substr(0, Name.size() - 4);
4972   } else if (Name.endswith("_e32")) {
4973     setForcedEncodingSize(32);
4974     return Name.substr(0, Name.size() - 4);
4975   } else if (Name.endswith("_dpp")) {
4976     setForcedDPP(true);
4977     return Name.substr(0, Name.size() - 4);
4978   } else if (Name.endswith("_sdwa")) {
4979     setForcedSDWA(true);
4980     return Name.substr(0, Name.size() - 5);
4981   }
4982   return Name;
4983 }
4984 
4985 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4986                                        StringRef Name,
4987                                        SMLoc NameLoc, OperandVector &Operands) {
4988   // Add the instruction mnemonic
4989   Name = parseMnemonicSuffix(Name);
4990   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4991 
4992   bool IsMIMG = Name.startswith("image_");
4993 
4994   while (!getLexer().is(AsmToken::EndOfStatement)) {
4995     OperandMode Mode = OperandMode_Default;
4996     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
4997       Mode = OperandMode_NSA;
4998     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4999 
5000     // Eat the comma or space if there is one.
5001     if (getLexer().is(AsmToken::Comma))
5002       Parser.Lex();
5003 
5004     if (Res != MatchOperand_Success) {
5005       checkUnsupportedInstruction(Name, NameLoc);
5006       if (!Parser.hasPendingError()) {
5007         // FIXME: use real operand location rather than the current location.
5008         StringRef Msg =
5009           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5010                                             "not a valid operand.";
5011         Error(getLexer().getLoc(), Msg);
5012       }
5013       while (!getLexer().is(AsmToken::EndOfStatement)) {
5014         Parser.Lex();
5015       }
5016       return true;
5017     }
5018   }
5019 
5020   return false;
5021 }
5022 
5023 //===----------------------------------------------------------------------===//
5024 // Utility functions
5025 //===----------------------------------------------------------------------===//
5026 
5027 OperandMatchResultTy
5028 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5029 
5030   if (!trySkipId(Prefix, AsmToken::Colon))
5031     return MatchOperand_NoMatch;
5032 
5033   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5034 }
5035 
5036 OperandMatchResultTy
5037 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5038                                     AMDGPUOperand::ImmTy ImmTy,
5039                                     bool (*ConvertResult)(int64_t&)) {
5040   SMLoc S = getLoc();
5041   int64_t Value = 0;
5042 
5043   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5044   if (Res != MatchOperand_Success)
5045     return Res;
5046 
5047   if (ConvertResult && !ConvertResult(Value)) {
5048     Error(S, "invalid " + StringRef(Prefix) + " value.");
5049   }
5050 
5051   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5052   return MatchOperand_Success;
5053 }
5054 
5055 OperandMatchResultTy
5056 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5057                                              OperandVector &Operands,
5058                                              AMDGPUOperand::ImmTy ImmTy,
5059                                              bool (*ConvertResult)(int64_t&)) {
5060   SMLoc S = getLoc();
5061   if (!trySkipId(Prefix, AsmToken::Colon))
5062     return MatchOperand_NoMatch;
5063 
5064   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5065     return MatchOperand_ParseFail;
5066 
5067   unsigned Val = 0;
5068   const unsigned MaxSize = 4;
5069 
5070   // FIXME: How to verify the number of elements matches the number of src
5071   // operands?
5072   for (int I = 0; ; ++I) {
5073     int64_t Op;
5074     SMLoc Loc = getLoc();
5075     if (!parseExpr(Op))
5076       return MatchOperand_ParseFail;
5077 
5078     if (Op != 0 && Op != 1) {
5079       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5080       return MatchOperand_ParseFail;
5081     }
5082 
5083     Val |= (Op << I);
5084 
5085     if (trySkipToken(AsmToken::RBrac))
5086       break;
5087 
5088     if (I + 1 == MaxSize) {
5089       Error(getLoc(), "expected a closing square bracket");
5090       return MatchOperand_ParseFail;
5091     }
5092 
5093     if (!skipToken(AsmToken::Comma, "expected a comma"))
5094       return MatchOperand_ParseFail;
5095   }
5096 
5097   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5098   return MatchOperand_Success;
5099 }
5100 
5101 OperandMatchResultTy
5102 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
5103                                AMDGPUOperand::ImmTy ImmTy) {
5104   int64_t Bit = 0;
5105   SMLoc S = Parser.getTok().getLoc();
5106 
5107   // We are at the end of the statement, and this is a default argument, so
5108   // use a default value.
5109   if (getLexer().isNot(AsmToken::EndOfStatement)) {
5110     switch(getLexer().getKind()) {
5111       case AsmToken::Identifier: {
5112         StringRef Tok = Parser.getTok().getString();
5113         if (Tok == Name) {
5114           if (Tok == "r128" && !hasMIMG_R128())
5115             Error(S, "r128 modifier is not supported on this GPU");
5116           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
5117             Error(S, "a16 modifier is not supported on this GPU");
5118           Bit = 1;
5119           Parser.Lex();
5120         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
5121           Bit = 0;
5122           Parser.Lex();
5123         } else {
5124           return MatchOperand_NoMatch;
5125         }
5126         break;
5127       }
5128       default:
5129         return MatchOperand_NoMatch;
5130     }
5131   }
5132 
5133   if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC)
5134     return MatchOperand_ParseFail;
5135 
5136   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5137     ImmTy = AMDGPUOperand::ImmTyR128A16;
5138 
5139   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5140   return MatchOperand_Success;
5141 }
5142 
5143 static void addOptionalImmOperand(
5144   MCInst& Inst, const OperandVector& Operands,
5145   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5146   AMDGPUOperand::ImmTy ImmT,
5147   int64_t Default = 0) {
5148   auto i = OptionalIdx.find(ImmT);
5149   if (i != OptionalIdx.end()) {
5150     unsigned Idx = i->second;
5151     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5152   } else {
5153     Inst.addOperand(MCOperand::createImm(Default));
5154   }
5155 }
5156 
5157 OperandMatchResultTy
5158 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
5159   if (getLexer().isNot(AsmToken::Identifier)) {
5160     return MatchOperand_NoMatch;
5161   }
5162   StringRef Tok = Parser.getTok().getString();
5163   if (Tok != Prefix) {
5164     return MatchOperand_NoMatch;
5165   }
5166 
5167   Parser.Lex();
5168   if (getLexer().isNot(AsmToken::Colon)) {
5169     return MatchOperand_ParseFail;
5170   }
5171 
5172   Parser.Lex();
5173   if (getLexer().isNot(AsmToken::Identifier)) {
5174     return MatchOperand_ParseFail;
5175   }
5176 
5177   Value = Parser.getTok().getString();
5178   return MatchOperand_Success;
5179 }
5180 
5181 //===----------------------------------------------------------------------===//
5182 // MTBUF format
5183 //===----------------------------------------------------------------------===//
5184 
5185 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5186                                   int64_t MaxVal,
5187                                   int64_t &Fmt) {
5188   int64_t Val;
5189   SMLoc Loc = getLoc();
5190 
5191   auto Res = parseIntWithPrefix(Pref, Val);
5192   if (Res == MatchOperand_ParseFail)
5193     return false;
5194   if (Res == MatchOperand_NoMatch)
5195     return true;
5196 
5197   if (Val < 0 || Val > MaxVal) {
5198     Error(Loc, Twine("out of range ", StringRef(Pref)));
5199     return false;
5200   }
5201 
5202   Fmt = Val;
5203   return true;
5204 }
5205 
5206 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5207 // values to live in a joint format operand in the MCInst encoding.
5208 OperandMatchResultTy
5209 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5210   using namespace llvm::AMDGPU::MTBUFFormat;
5211 
5212   int64_t Dfmt = DFMT_UNDEF;
5213   int64_t Nfmt = NFMT_UNDEF;
5214 
5215   // dfmt and nfmt can appear in either order, and each is optional.
5216   for (int I = 0; I < 2; ++I) {
5217     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5218       return MatchOperand_ParseFail;
5219 
5220     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5221       return MatchOperand_ParseFail;
5222     }
5223     // Skip optional comma between dfmt/nfmt
5224     // but guard against 2 commas following each other.
5225     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5226         !peekToken().is(AsmToken::Comma)) {
5227       trySkipToken(AsmToken::Comma);
5228     }
5229   }
5230 
5231   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5232     return MatchOperand_NoMatch;
5233 
5234   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5235   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5236 
5237   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5238   return MatchOperand_Success;
5239 }
5240 
5241 OperandMatchResultTy
5242 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5243   using namespace llvm::AMDGPU::MTBUFFormat;
5244 
5245   int64_t Fmt = UFMT_UNDEF;
5246 
5247   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5248     return MatchOperand_ParseFail;
5249 
5250   if (Fmt == UFMT_UNDEF)
5251     return MatchOperand_NoMatch;
5252 
5253   Format = Fmt;
5254   return MatchOperand_Success;
5255 }
5256 
5257 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5258                                     int64_t &Nfmt,
5259                                     StringRef FormatStr,
5260                                     SMLoc Loc) {
5261   using namespace llvm::AMDGPU::MTBUFFormat;
5262   int64_t Format;
5263 
5264   Format = getDfmt(FormatStr);
5265   if (Format != DFMT_UNDEF) {
5266     Dfmt = Format;
5267     return true;
5268   }
5269 
5270   Format = getNfmt(FormatStr, getSTI());
5271   if (Format != NFMT_UNDEF) {
5272     Nfmt = Format;
5273     return true;
5274   }
5275 
5276   Error(Loc, "unsupported format");
5277   return false;
5278 }
5279 
5280 OperandMatchResultTy
5281 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5282                                           SMLoc FormatLoc,
5283                                           int64_t &Format) {
5284   using namespace llvm::AMDGPU::MTBUFFormat;
5285 
5286   int64_t Dfmt = DFMT_UNDEF;
5287   int64_t Nfmt = NFMT_UNDEF;
5288   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5289     return MatchOperand_ParseFail;
5290 
5291   if (trySkipToken(AsmToken::Comma)) {
5292     StringRef Str;
5293     SMLoc Loc = getLoc();
5294     if (!parseId(Str, "expected a format string") ||
5295         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5296       return MatchOperand_ParseFail;
5297     }
5298     if (Dfmt == DFMT_UNDEF) {
5299       Error(Loc, "duplicate numeric format");
5300       return MatchOperand_ParseFail;
5301     } else if (Nfmt == NFMT_UNDEF) {
5302       Error(Loc, "duplicate data format");
5303       return MatchOperand_ParseFail;
5304     }
5305   }
5306 
5307   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5308   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5309 
5310   if (isGFX10Plus()) {
5311     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5312     if (Ufmt == UFMT_UNDEF) {
5313       Error(FormatLoc, "unsupported format");
5314       return MatchOperand_ParseFail;
5315     }
5316     Format = Ufmt;
5317   } else {
5318     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5319   }
5320 
5321   return MatchOperand_Success;
5322 }
5323 
5324 OperandMatchResultTy
5325 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5326                                             SMLoc Loc,
5327                                             int64_t &Format) {
5328   using namespace llvm::AMDGPU::MTBUFFormat;
5329 
5330   auto Id = getUnifiedFormat(FormatStr);
5331   if (Id == UFMT_UNDEF)
5332     return MatchOperand_NoMatch;
5333 
5334   if (!isGFX10Plus()) {
5335     Error(Loc, "unified format is not supported on this GPU");
5336     return MatchOperand_ParseFail;
5337   }
5338 
5339   Format = Id;
5340   return MatchOperand_Success;
5341 }
5342 
5343 OperandMatchResultTy
5344 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5345   using namespace llvm::AMDGPU::MTBUFFormat;
5346   SMLoc Loc = getLoc();
5347 
5348   if (!parseExpr(Format))
5349     return MatchOperand_ParseFail;
5350   if (!isValidFormatEncoding(Format, getSTI())) {
5351     Error(Loc, "out of range format");
5352     return MatchOperand_ParseFail;
5353   }
5354 
5355   return MatchOperand_Success;
5356 }
5357 
5358 OperandMatchResultTy
5359 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5360   using namespace llvm::AMDGPU::MTBUFFormat;
5361 
5362   if (!trySkipId("format", AsmToken::Colon))
5363     return MatchOperand_NoMatch;
5364 
5365   if (trySkipToken(AsmToken::LBrac)) {
5366     StringRef FormatStr;
5367     SMLoc Loc = getLoc();
5368     if (!parseId(FormatStr, "expected a format string"))
5369       return MatchOperand_ParseFail;
5370 
5371     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5372     if (Res == MatchOperand_NoMatch)
5373       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5374     if (Res != MatchOperand_Success)
5375       return Res;
5376 
5377     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5378       return MatchOperand_ParseFail;
5379 
5380     return MatchOperand_Success;
5381   }
5382 
5383   return parseNumericFormat(Format);
5384 }
5385 
5386 OperandMatchResultTy
5387 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5388   using namespace llvm::AMDGPU::MTBUFFormat;
5389 
5390   int64_t Format = getDefaultFormatEncoding(getSTI());
5391   OperandMatchResultTy Res;
5392   SMLoc Loc = getLoc();
5393 
5394   // Parse legacy format syntax.
5395   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5396   if (Res == MatchOperand_ParseFail)
5397     return Res;
5398 
5399   bool FormatFound = (Res == MatchOperand_Success);
5400 
5401   Operands.push_back(
5402     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5403 
5404   if (FormatFound)
5405     trySkipToken(AsmToken::Comma);
5406 
5407   if (isToken(AsmToken::EndOfStatement)) {
5408     // We are expecting an soffset operand,
5409     // but let matcher handle the error.
5410     return MatchOperand_Success;
5411   }
5412 
5413   // Parse soffset.
5414   Res = parseRegOrImm(Operands);
5415   if (Res != MatchOperand_Success)
5416     return Res;
5417 
5418   trySkipToken(AsmToken::Comma);
5419 
5420   if (!FormatFound) {
5421     Res = parseSymbolicOrNumericFormat(Format);
5422     if (Res == MatchOperand_ParseFail)
5423       return Res;
5424     if (Res == MatchOperand_Success) {
5425       auto Size = Operands.size();
5426       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5427       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5428       Op.setImm(Format);
5429     }
5430     return MatchOperand_Success;
5431   }
5432 
5433   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5434     Error(getLoc(), "duplicate format");
5435     return MatchOperand_ParseFail;
5436   }
5437   return MatchOperand_Success;
5438 }
5439 
5440 //===----------------------------------------------------------------------===//
5441 // ds
5442 //===----------------------------------------------------------------------===//
5443 
5444 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5445                                     const OperandVector &Operands) {
5446   OptionalImmIndexMap OptionalIdx;
5447 
5448   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5449     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5450 
5451     // Add the register arguments
5452     if (Op.isReg()) {
5453       Op.addRegOperands(Inst, 1);
5454       continue;
5455     }
5456 
5457     // Handle optional arguments
5458     OptionalIdx[Op.getImmTy()] = i;
5459   }
5460 
5461   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5462   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5463   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5464 
5465   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5466 }
5467 
5468 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5469                                 bool IsGdsHardcoded) {
5470   OptionalImmIndexMap OptionalIdx;
5471 
5472   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5473     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5474 
5475     // Add the register arguments
5476     if (Op.isReg()) {
5477       Op.addRegOperands(Inst, 1);
5478       continue;
5479     }
5480 
5481     if (Op.isToken() && Op.getToken() == "gds") {
5482       IsGdsHardcoded = true;
5483       continue;
5484     }
5485 
5486     // Handle optional arguments
5487     OptionalIdx[Op.getImmTy()] = i;
5488   }
5489 
5490   AMDGPUOperand::ImmTy OffsetType =
5491     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5492      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5493      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5494                                                       AMDGPUOperand::ImmTyOffset;
5495 
5496   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5497 
5498   if (!IsGdsHardcoded) {
5499     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5500   }
5501   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5502 }
5503 
5504 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5505   OptionalImmIndexMap OptionalIdx;
5506 
5507   unsigned OperandIdx[4];
5508   unsigned EnMask = 0;
5509   int SrcIdx = 0;
5510 
5511   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5512     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5513 
5514     // Add the register arguments
5515     if (Op.isReg()) {
5516       assert(SrcIdx < 4);
5517       OperandIdx[SrcIdx] = Inst.size();
5518       Op.addRegOperands(Inst, 1);
5519       ++SrcIdx;
5520       continue;
5521     }
5522 
5523     if (Op.isOff()) {
5524       assert(SrcIdx < 4);
5525       OperandIdx[SrcIdx] = Inst.size();
5526       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5527       ++SrcIdx;
5528       continue;
5529     }
5530 
5531     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5532       Op.addImmOperands(Inst, 1);
5533       continue;
5534     }
5535 
5536     if (Op.isToken() && Op.getToken() == "done")
5537       continue;
5538 
5539     // Handle optional arguments
5540     OptionalIdx[Op.getImmTy()] = i;
5541   }
5542 
5543   assert(SrcIdx == 4);
5544 
5545   bool Compr = false;
5546   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5547     Compr = true;
5548     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5549     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5550     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5551   }
5552 
5553   for (auto i = 0; i < SrcIdx; ++i) {
5554     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5555       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5556     }
5557   }
5558 
5559   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5560   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5561 
5562   Inst.addOperand(MCOperand::createImm(EnMask));
5563 }
5564 
5565 //===----------------------------------------------------------------------===//
5566 // s_waitcnt
5567 //===----------------------------------------------------------------------===//
5568 
5569 static bool
5570 encodeCnt(
5571   const AMDGPU::IsaVersion ISA,
5572   int64_t &IntVal,
5573   int64_t CntVal,
5574   bool Saturate,
5575   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5576   unsigned (*decode)(const IsaVersion &Version, unsigned))
5577 {
5578   bool Failed = false;
5579 
5580   IntVal = encode(ISA, IntVal, CntVal);
5581   if (CntVal != decode(ISA, IntVal)) {
5582     if (Saturate) {
5583       IntVal = encode(ISA, IntVal, -1);
5584     } else {
5585       Failed = true;
5586     }
5587   }
5588   return Failed;
5589 }
5590 
5591 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5592 
5593   SMLoc CntLoc = getLoc();
5594   StringRef CntName = getTokenStr();
5595 
5596   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5597       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5598     return false;
5599 
5600   int64_t CntVal;
5601   SMLoc ValLoc = getLoc();
5602   if (!parseExpr(CntVal))
5603     return false;
5604 
5605   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5606 
5607   bool Failed = true;
5608   bool Sat = CntName.endswith("_sat");
5609 
5610   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5611     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5612   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5613     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5614   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5615     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5616   } else {
5617     Error(CntLoc, "invalid counter name " + CntName);
5618     return false;
5619   }
5620 
5621   if (Failed) {
5622     Error(ValLoc, "too large value for " + CntName);
5623     return false;
5624   }
5625 
5626   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5627     return false;
5628 
5629   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5630     if (isToken(AsmToken::EndOfStatement)) {
5631       Error(getLoc(), "expected a counter name");
5632       return false;
5633     }
5634   }
5635 
5636   return true;
5637 }
5638 
5639 OperandMatchResultTy
5640 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5641   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5642   int64_t Waitcnt = getWaitcntBitMask(ISA);
5643   SMLoc S = getLoc();
5644 
5645   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5646     while (!isToken(AsmToken::EndOfStatement)) {
5647       if (!parseCnt(Waitcnt))
5648         return MatchOperand_ParseFail;
5649     }
5650   } else {
5651     if (!parseExpr(Waitcnt))
5652       return MatchOperand_ParseFail;
5653   }
5654 
5655   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5656   return MatchOperand_Success;
5657 }
5658 
5659 bool
5660 AMDGPUOperand::isSWaitCnt() const {
5661   return isImm();
5662 }
5663 
5664 //===----------------------------------------------------------------------===//
5665 // hwreg
5666 //===----------------------------------------------------------------------===//
5667 
5668 bool
5669 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5670                                 OperandInfoTy &Offset,
5671                                 OperandInfoTy &Width) {
5672   using namespace llvm::AMDGPU::Hwreg;
5673 
5674   // The register may be specified by name or using a numeric code
5675   HwReg.Loc = getLoc();
5676   if (isToken(AsmToken::Identifier) &&
5677       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5678     HwReg.IsSymbolic = true;
5679     lex(); // skip message name
5680   } else if (!parseExpr(HwReg.Id)) {
5681     return false;
5682   }
5683 
5684   if (trySkipToken(AsmToken::RParen))
5685     return true;
5686 
5687   // parse optional params
5688   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
5689     return false;
5690 
5691   Offset.Loc = getLoc();
5692   if (!parseExpr(Offset.Id))
5693     return false;
5694 
5695   if (!skipToken(AsmToken::Comma, "expected a comma"))
5696     return false;
5697 
5698   Width.Loc = getLoc();
5699   return parseExpr(Width.Id) &&
5700          skipToken(AsmToken::RParen, "expected a closing parenthesis");
5701 }
5702 
5703 bool
5704 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5705                                const OperandInfoTy &Offset,
5706                                const OperandInfoTy &Width) {
5707 
5708   using namespace llvm::AMDGPU::Hwreg;
5709 
5710   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5711     Error(HwReg.Loc,
5712           "specified hardware register is not supported on this GPU");
5713     return false;
5714   }
5715   if (!isValidHwreg(HwReg.Id)) {
5716     Error(HwReg.Loc,
5717           "invalid code of hardware register: only 6-bit values are legal");
5718     return false;
5719   }
5720   if (!isValidHwregOffset(Offset.Id)) {
5721     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
5722     return false;
5723   }
5724   if (!isValidHwregWidth(Width.Id)) {
5725     Error(Width.Loc,
5726           "invalid bitfield width: only values from 1 to 32 are legal");
5727     return false;
5728   }
5729   return true;
5730 }
5731 
5732 OperandMatchResultTy
5733 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5734   using namespace llvm::AMDGPU::Hwreg;
5735 
5736   int64_t ImmVal = 0;
5737   SMLoc Loc = getLoc();
5738 
5739   if (trySkipId("hwreg", AsmToken::LParen)) {
5740     OperandInfoTy HwReg(ID_UNKNOWN_);
5741     OperandInfoTy Offset(OFFSET_DEFAULT_);
5742     OperandInfoTy Width(WIDTH_DEFAULT_);
5743     if (parseHwregBody(HwReg, Offset, Width) &&
5744         validateHwreg(HwReg, Offset, Width)) {
5745       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
5746     } else {
5747       return MatchOperand_ParseFail;
5748     }
5749   } else if (parseExpr(ImmVal)) {
5750     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5751       Error(Loc, "invalid immediate: only 16-bit values are legal");
5752       return MatchOperand_ParseFail;
5753     }
5754   } else {
5755     return MatchOperand_ParseFail;
5756   }
5757 
5758   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5759   return MatchOperand_Success;
5760 }
5761 
5762 bool AMDGPUOperand::isHwreg() const {
5763   return isImmTy(ImmTyHwreg);
5764 }
5765 
5766 //===----------------------------------------------------------------------===//
5767 // sendmsg
5768 //===----------------------------------------------------------------------===//
5769 
5770 bool
5771 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5772                                   OperandInfoTy &Op,
5773                                   OperandInfoTy &Stream) {
5774   using namespace llvm::AMDGPU::SendMsg;
5775 
5776   Msg.Loc = getLoc();
5777   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5778     Msg.IsSymbolic = true;
5779     lex(); // skip message name
5780   } else if (!parseExpr(Msg.Id)) {
5781     return false;
5782   }
5783 
5784   if (trySkipToken(AsmToken::Comma)) {
5785     Op.IsDefined = true;
5786     Op.Loc = getLoc();
5787     if (isToken(AsmToken::Identifier) &&
5788         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5789       lex(); // skip operation name
5790     } else if (!parseExpr(Op.Id)) {
5791       return false;
5792     }
5793 
5794     if (trySkipToken(AsmToken::Comma)) {
5795       Stream.IsDefined = true;
5796       Stream.Loc = getLoc();
5797       if (!parseExpr(Stream.Id))
5798         return false;
5799     }
5800   }
5801 
5802   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5803 }
5804 
5805 bool
5806 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5807                                  const OperandInfoTy &Op,
5808                                  const OperandInfoTy &Stream) {
5809   using namespace llvm::AMDGPU::SendMsg;
5810 
5811   // Validation strictness depends on whether message is specified
5812   // in a symbolc or in a numeric form. In the latter case
5813   // only encoding possibility is checked.
5814   bool Strict = Msg.IsSymbolic;
5815 
5816   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5817     Error(Msg.Loc, "invalid message id");
5818     return false;
5819   }
5820   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5821     if (Op.IsDefined) {
5822       Error(Op.Loc, "message does not support operations");
5823     } else {
5824       Error(Msg.Loc, "missing message operation");
5825     }
5826     return false;
5827   }
5828   if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5829     Error(Op.Loc, "invalid operation id");
5830     return false;
5831   }
5832   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5833     Error(Stream.Loc, "message operation does not support streams");
5834     return false;
5835   }
5836   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5837     Error(Stream.Loc, "invalid message stream id");
5838     return false;
5839   }
5840   return true;
5841 }
5842 
5843 OperandMatchResultTy
5844 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5845   using namespace llvm::AMDGPU::SendMsg;
5846 
5847   int64_t ImmVal = 0;
5848   SMLoc Loc = getLoc();
5849 
5850   if (trySkipId("sendmsg", AsmToken::LParen)) {
5851     OperandInfoTy Msg(ID_UNKNOWN_);
5852     OperandInfoTy Op(OP_NONE_);
5853     OperandInfoTy Stream(STREAM_ID_NONE_);
5854     if (parseSendMsgBody(Msg, Op, Stream) &&
5855         validateSendMsg(Msg, Op, Stream)) {
5856       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5857     } else {
5858       return MatchOperand_ParseFail;
5859     }
5860   } else if (parseExpr(ImmVal)) {
5861     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5862       Error(Loc, "invalid immediate: only 16-bit values are legal");
5863       return MatchOperand_ParseFail;
5864     }
5865   } else {
5866     return MatchOperand_ParseFail;
5867   }
5868 
5869   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5870   return MatchOperand_Success;
5871 }
5872 
5873 bool AMDGPUOperand::isSendMsg() const {
5874   return isImmTy(ImmTySendMsg);
5875 }
5876 
5877 //===----------------------------------------------------------------------===//
5878 // v_interp
5879 //===----------------------------------------------------------------------===//
5880 
5881 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5882   if (getLexer().getKind() != AsmToken::Identifier)
5883     return MatchOperand_NoMatch;
5884 
5885   StringRef Str = Parser.getTok().getString();
5886   int Slot = StringSwitch<int>(Str)
5887     .Case("p10", 0)
5888     .Case("p20", 1)
5889     .Case("p0", 2)
5890     .Default(-1);
5891 
5892   SMLoc S = Parser.getTok().getLoc();
5893   if (Slot == -1)
5894     return MatchOperand_ParseFail;
5895 
5896   Parser.Lex();
5897   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5898                                               AMDGPUOperand::ImmTyInterpSlot));
5899   return MatchOperand_Success;
5900 }
5901 
5902 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5903   if (getLexer().getKind() != AsmToken::Identifier)
5904     return MatchOperand_NoMatch;
5905 
5906   StringRef Str = Parser.getTok().getString();
5907   if (!Str.startswith("attr"))
5908     return MatchOperand_NoMatch;
5909 
5910   StringRef Chan = Str.take_back(2);
5911   int AttrChan = StringSwitch<int>(Chan)
5912     .Case(".x", 0)
5913     .Case(".y", 1)
5914     .Case(".z", 2)
5915     .Case(".w", 3)
5916     .Default(-1);
5917   if (AttrChan == -1)
5918     return MatchOperand_ParseFail;
5919 
5920   Str = Str.drop_back(2).drop_front(4);
5921 
5922   uint8_t Attr;
5923   if (Str.getAsInteger(10, Attr))
5924     return MatchOperand_ParseFail;
5925 
5926   SMLoc S = Parser.getTok().getLoc();
5927   Parser.Lex();
5928   if (Attr > 63) {
5929     Error(S, "out of bounds attr");
5930     return MatchOperand_ParseFail;
5931   }
5932 
5933   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5934 
5935   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5936                                               AMDGPUOperand::ImmTyInterpAttr));
5937   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5938                                               AMDGPUOperand::ImmTyAttrChan));
5939   return MatchOperand_Success;
5940 }
5941 
5942 //===----------------------------------------------------------------------===//
5943 // exp
5944 //===----------------------------------------------------------------------===//
5945 
5946 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5947                                                       uint8_t &Val) {
5948   if (Str == "null") {
5949     Val = Exp::ET_NULL;
5950     return MatchOperand_Success;
5951   }
5952 
5953   if (Str.startswith("mrt")) {
5954     Str = Str.drop_front(3);
5955     if (Str == "z") { // == mrtz
5956       Val = Exp::ET_MRTZ;
5957       return MatchOperand_Success;
5958     }
5959 
5960     if (Str.getAsInteger(10, Val))
5961       return MatchOperand_ParseFail;
5962 
5963     if (Val > Exp::ET_MRT7)
5964       return MatchOperand_ParseFail;
5965 
5966     return MatchOperand_Success;
5967   }
5968 
5969   if (Str.startswith("pos")) {
5970     Str = Str.drop_front(3);
5971     if (Str.getAsInteger(10, Val))
5972       return MatchOperand_ParseFail;
5973 
5974     if (Val > (isGFX10Plus() ? 4 : 3))
5975       return MatchOperand_ParseFail;
5976 
5977     Val += Exp::ET_POS0;
5978     return MatchOperand_Success;
5979   }
5980 
5981   if (isGFX10Plus() && Str == "prim") {
5982     Val = Exp::ET_PRIM;
5983     return MatchOperand_Success;
5984   }
5985 
5986   if (Str.startswith("param")) {
5987     Str = Str.drop_front(5);
5988     if (Str.getAsInteger(10, Val))
5989       return MatchOperand_ParseFail;
5990 
5991     if (Val >= 32)
5992       return MatchOperand_ParseFail;
5993 
5994     Val += Exp::ET_PARAM0;
5995     return MatchOperand_Success;
5996   }
5997 
5998   return MatchOperand_ParseFail;
5999 }
6000 
6001 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6002   if (!isToken(AsmToken::Identifier))
6003     return MatchOperand_NoMatch;
6004 
6005   SMLoc S = getLoc();
6006 
6007   uint8_t Val;
6008   auto Res = parseExpTgtImpl(getTokenStr(), Val);
6009   if (Res != MatchOperand_Success) {
6010     Error(S, "invalid exp target");
6011     return Res;
6012   }
6013 
6014   Parser.Lex();
6015   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
6016                                               AMDGPUOperand::ImmTyExpTgt));
6017   return MatchOperand_Success;
6018 }
6019 
6020 //===----------------------------------------------------------------------===//
6021 // parser helpers
6022 //===----------------------------------------------------------------------===//
6023 
6024 bool
6025 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6026   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6027 }
6028 
6029 bool
6030 AMDGPUAsmParser::isId(const StringRef Id) const {
6031   return isId(getToken(), Id);
6032 }
6033 
6034 bool
6035 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6036   return getTokenKind() == Kind;
6037 }
6038 
6039 bool
6040 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6041   if (isId(Id)) {
6042     lex();
6043     return true;
6044   }
6045   return false;
6046 }
6047 
6048 bool
6049 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6050   if (isId(Id) && peekToken().is(Kind)) {
6051     lex();
6052     lex();
6053     return true;
6054   }
6055   return false;
6056 }
6057 
6058 bool
6059 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6060   if (isToken(Kind)) {
6061     lex();
6062     return true;
6063   }
6064   return false;
6065 }
6066 
6067 bool
6068 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6069                            const StringRef ErrMsg) {
6070   if (!trySkipToken(Kind)) {
6071     Error(getLoc(), ErrMsg);
6072     return false;
6073   }
6074   return true;
6075 }
6076 
6077 bool
6078 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
6079   return !getParser().parseAbsoluteExpression(Imm);
6080 }
6081 
6082 bool
6083 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6084   SMLoc S = getLoc();
6085 
6086   const MCExpr *Expr;
6087   if (Parser.parseExpression(Expr))
6088     return false;
6089 
6090   int64_t IntVal;
6091   if (Expr->evaluateAsAbsolute(IntVal)) {
6092     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6093   } else {
6094     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6095   }
6096   return true;
6097 }
6098 
6099 bool
6100 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6101   if (isToken(AsmToken::String)) {
6102     Val = getToken().getStringContents();
6103     lex();
6104     return true;
6105   } else {
6106     Error(getLoc(), ErrMsg);
6107     return false;
6108   }
6109 }
6110 
6111 bool
6112 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6113   if (isToken(AsmToken::Identifier)) {
6114     Val = getTokenStr();
6115     lex();
6116     return true;
6117   } else {
6118     Error(getLoc(), ErrMsg);
6119     return false;
6120   }
6121 }
6122 
6123 AsmToken
6124 AMDGPUAsmParser::getToken() const {
6125   return Parser.getTok();
6126 }
6127 
6128 AsmToken
6129 AMDGPUAsmParser::peekToken() {
6130   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6131 }
6132 
6133 void
6134 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6135   auto TokCount = getLexer().peekTokens(Tokens);
6136 
6137   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6138     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6139 }
6140 
6141 AsmToken::TokenKind
6142 AMDGPUAsmParser::getTokenKind() const {
6143   return getLexer().getKind();
6144 }
6145 
6146 SMLoc
6147 AMDGPUAsmParser::getLoc() const {
6148   return getToken().getLoc();
6149 }
6150 
6151 StringRef
6152 AMDGPUAsmParser::getTokenStr() const {
6153   return getToken().getString();
6154 }
6155 
6156 void
6157 AMDGPUAsmParser::lex() {
6158   Parser.Lex();
6159 }
6160 
6161 SMLoc
6162 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6163                                const OperandVector &Operands) const {
6164   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6165     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6166     if (Test(Op))
6167       return Op.getStartLoc();
6168   }
6169   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6170 }
6171 
6172 SMLoc
6173 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6174                            const OperandVector &Operands) const {
6175   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6176   return getOperandLoc(Test, Operands);
6177 }
6178 
6179 SMLoc
6180 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6181                            const OperandVector &Operands) const {
6182   auto Test = [=](const AMDGPUOperand& Op) {
6183     return Op.isRegKind() && Op.getReg() == Reg;
6184   };
6185   return getOperandLoc(Test, Operands);
6186 }
6187 
6188 SMLoc
6189 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6190   auto Test = [](const AMDGPUOperand& Op) {
6191     return Op.IsImmKindLiteral() || Op.isExpr();
6192   };
6193   return getOperandLoc(Test, Operands);
6194 }
6195 
6196 SMLoc
6197 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6198   auto Test = [](const AMDGPUOperand& Op) {
6199     return Op.isImmKindConst();
6200   };
6201   return getOperandLoc(Test, Operands);
6202 }
6203 
6204 //===----------------------------------------------------------------------===//
6205 // swizzle
6206 //===----------------------------------------------------------------------===//
6207 
6208 LLVM_READNONE
6209 static unsigned
6210 encodeBitmaskPerm(const unsigned AndMask,
6211                   const unsigned OrMask,
6212                   const unsigned XorMask) {
6213   using namespace llvm::AMDGPU::Swizzle;
6214 
6215   return BITMASK_PERM_ENC |
6216          (AndMask << BITMASK_AND_SHIFT) |
6217          (OrMask  << BITMASK_OR_SHIFT)  |
6218          (XorMask << BITMASK_XOR_SHIFT);
6219 }
6220 
6221 bool
6222 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6223                                      const unsigned MinVal,
6224                                      const unsigned MaxVal,
6225                                      const StringRef ErrMsg,
6226                                      SMLoc &Loc) {
6227   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6228     return false;
6229   }
6230   Loc = Parser.getTok().getLoc();
6231   if (!parseExpr(Op)) {
6232     return false;
6233   }
6234   if (Op < MinVal || Op > MaxVal) {
6235     Error(Loc, ErrMsg);
6236     return false;
6237   }
6238 
6239   return true;
6240 }
6241 
6242 bool
6243 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6244                                       const unsigned MinVal,
6245                                       const unsigned MaxVal,
6246                                       const StringRef ErrMsg) {
6247   SMLoc Loc;
6248   for (unsigned i = 0; i < OpNum; ++i) {
6249     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6250       return false;
6251   }
6252 
6253   return true;
6254 }
6255 
6256 bool
6257 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6258   using namespace llvm::AMDGPU::Swizzle;
6259 
6260   int64_t Lane[LANE_NUM];
6261   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6262                            "expected a 2-bit lane id")) {
6263     Imm = QUAD_PERM_ENC;
6264     for (unsigned I = 0; I < LANE_NUM; ++I) {
6265       Imm |= Lane[I] << (LANE_SHIFT * I);
6266     }
6267     return true;
6268   }
6269   return false;
6270 }
6271 
6272 bool
6273 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6274   using namespace llvm::AMDGPU::Swizzle;
6275 
6276   SMLoc Loc;
6277   int64_t GroupSize;
6278   int64_t LaneIdx;
6279 
6280   if (!parseSwizzleOperand(GroupSize,
6281                            2, 32,
6282                            "group size must be in the interval [2,32]",
6283                            Loc)) {
6284     return false;
6285   }
6286   if (!isPowerOf2_64(GroupSize)) {
6287     Error(Loc, "group size must be a power of two");
6288     return false;
6289   }
6290   if (parseSwizzleOperand(LaneIdx,
6291                           0, GroupSize - 1,
6292                           "lane id must be in the interval [0,group size - 1]",
6293                           Loc)) {
6294     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6295     return true;
6296   }
6297   return false;
6298 }
6299 
6300 bool
6301 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6302   using namespace llvm::AMDGPU::Swizzle;
6303 
6304   SMLoc Loc;
6305   int64_t GroupSize;
6306 
6307   if (!parseSwizzleOperand(GroupSize,
6308                            2, 32,
6309                            "group size must be in the interval [2,32]",
6310                            Loc)) {
6311     return false;
6312   }
6313   if (!isPowerOf2_64(GroupSize)) {
6314     Error(Loc, "group size must be a power of two");
6315     return false;
6316   }
6317 
6318   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6319   return true;
6320 }
6321 
6322 bool
6323 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6324   using namespace llvm::AMDGPU::Swizzle;
6325 
6326   SMLoc Loc;
6327   int64_t GroupSize;
6328 
6329   if (!parseSwizzleOperand(GroupSize,
6330                            1, 16,
6331                            "group size must be in the interval [1,16]",
6332                            Loc)) {
6333     return false;
6334   }
6335   if (!isPowerOf2_64(GroupSize)) {
6336     Error(Loc, "group size must be a power of two");
6337     return false;
6338   }
6339 
6340   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6341   return true;
6342 }
6343 
6344 bool
6345 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6346   using namespace llvm::AMDGPU::Swizzle;
6347 
6348   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6349     return false;
6350   }
6351 
6352   StringRef Ctl;
6353   SMLoc StrLoc = Parser.getTok().getLoc();
6354   if (!parseString(Ctl)) {
6355     return false;
6356   }
6357   if (Ctl.size() != BITMASK_WIDTH) {
6358     Error(StrLoc, "expected a 5-character mask");
6359     return false;
6360   }
6361 
6362   unsigned AndMask = 0;
6363   unsigned OrMask = 0;
6364   unsigned XorMask = 0;
6365 
6366   for (size_t i = 0; i < Ctl.size(); ++i) {
6367     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6368     switch(Ctl[i]) {
6369     default:
6370       Error(StrLoc, "invalid mask");
6371       return false;
6372     case '0':
6373       break;
6374     case '1':
6375       OrMask |= Mask;
6376       break;
6377     case 'p':
6378       AndMask |= Mask;
6379       break;
6380     case 'i':
6381       AndMask |= Mask;
6382       XorMask |= Mask;
6383       break;
6384     }
6385   }
6386 
6387   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6388   return true;
6389 }
6390 
6391 bool
6392 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6393 
6394   SMLoc OffsetLoc = Parser.getTok().getLoc();
6395 
6396   if (!parseExpr(Imm)) {
6397     return false;
6398   }
6399   if (!isUInt<16>(Imm)) {
6400     Error(OffsetLoc, "expected a 16-bit offset");
6401     return false;
6402   }
6403   return true;
6404 }
6405 
6406 bool
6407 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6408   using namespace llvm::AMDGPU::Swizzle;
6409 
6410   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6411 
6412     SMLoc ModeLoc = Parser.getTok().getLoc();
6413     bool Ok = false;
6414 
6415     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6416       Ok = parseSwizzleQuadPerm(Imm);
6417     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6418       Ok = parseSwizzleBitmaskPerm(Imm);
6419     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6420       Ok = parseSwizzleBroadcast(Imm);
6421     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6422       Ok = parseSwizzleSwap(Imm);
6423     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6424       Ok = parseSwizzleReverse(Imm);
6425     } else {
6426       Error(ModeLoc, "expected a swizzle mode");
6427     }
6428 
6429     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6430   }
6431 
6432   return false;
6433 }
6434 
6435 OperandMatchResultTy
6436 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6437   SMLoc S = Parser.getTok().getLoc();
6438   int64_t Imm = 0;
6439 
6440   if (trySkipId("offset")) {
6441 
6442     bool Ok = false;
6443     if (skipToken(AsmToken::Colon, "expected a colon")) {
6444       if (trySkipId("swizzle")) {
6445         Ok = parseSwizzleMacro(Imm);
6446       } else {
6447         Ok = parseSwizzleOffset(Imm);
6448       }
6449     }
6450 
6451     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6452 
6453     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6454   } else {
6455     // Swizzle "offset" operand is optional.
6456     // If it is omitted, try parsing other optional operands.
6457     return parseOptionalOpr(Operands);
6458   }
6459 }
6460 
6461 bool
6462 AMDGPUOperand::isSwizzle() const {
6463   return isImmTy(ImmTySwizzle);
6464 }
6465 
6466 //===----------------------------------------------------------------------===//
6467 // VGPR Index Mode
6468 //===----------------------------------------------------------------------===//
6469 
6470 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6471 
6472   using namespace llvm::AMDGPU::VGPRIndexMode;
6473 
6474   if (trySkipToken(AsmToken::RParen)) {
6475     return OFF;
6476   }
6477 
6478   int64_t Imm = 0;
6479 
6480   while (true) {
6481     unsigned Mode = 0;
6482     SMLoc S = Parser.getTok().getLoc();
6483 
6484     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6485       if (trySkipId(IdSymbolic[ModeId])) {
6486         Mode = 1 << ModeId;
6487         break;
6488       }
6489     }
6490 
6491     if (Mode == 0) {
6492       Error(S, (Imm == 0)?
6493                "expected a VGPR index mode or a closing parenthesis" :
6494                "expected a VGPR index mode");
6495       return UNDEF;
6496     }
6497 
6498     if (Imm & Mode) {
6499       Error(S, "duplicate VGPR index mode");
6500       return UNDEF;
6501     }
6502     Imm |= Mode;
6503 
6504     if (trySkipToken(AsmToken::RParen))
6505       break;
6506     if (!skipToken(AsmToken::Comma,
6507                    "expected a comma or a closing parenthesis"))
6508       return UNDEF;
6509   }
6510 
6511   return Imm;
6512 }
6513 
6514 OperandMatchResultTy
6515 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6516 
6517   using namespace llvm::AMDGPU::VGPRIndexMode;
6518 
6519   int64_t Imm = 0;
6520   SMLoc S = Parser.getTok().getLoc();
6521 
6522   if (getLexer().getKind() == AsmToken::Identifier &&
6523       Parser.getTok().getString() == "gpr_idx" &&
6524       getLexer().peekTok().is(AsmToken::LParen)) {
6525 
6526     Parser.Lex();
6527     Parser.Lex();
6528 
6529     Imm = parseGPRIdxMacro();
6530     if (Imm == UNDEF)
6531       return MatchOperand_ParseFail;
6532 
6533   } else {
6534     if (getParser().parseAbsoluteExpression(Imm))
6535       return MatchOperand_ParseFail;
6536     if (Imm < 0 || !isUInt<4>(Imm)) {
6537       Error(S, "invalid immediate: only 4-bit values are legal");
6538       return MatchOperand_ParseFail;
6539     }
6540   }
6541 
6542   Operands.push_back(
6543       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6544   return MatchOperand_Success;
6545 }
6546 
6547 bool AMDGPUOperand::isGPRIdxMode() const {
6548   return isImmTy(ImmTyGprIdxMode);
6549 }
6550 
6551 //===----------------------------------------------------------------------===//
6552 // sopp branch targets
6553 //===----------------------------------------------------------------------===//
6554 
6555 OperandMatchResultTy
6556 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6557 
6558   // Make sure we are not parsing something
6559   // that looks like a label or an expression but is not.
6560   // This will improve error messages.
6561   if (isRegister() || isModifier())
6562     return MatchOperand_NoMatch;
6563 
6564   if (!parseExpr(Operands))
6565     return MatchOperand_ParseFail;
6566 
6567   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6568   assert(Opr.isImm() || Opr.isExpr());
6569   SMLoc Loc = Opr.getStartLoc();
6570 
6571   // Currently we do not support arbitrary expressions as branch targets.
6572   // Only labels and absolute expressions are accepted.
6573   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6574     Error(Loc, "expected an absolute expression or a label");
6575   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6576     Error(Loc, "expected a 16-bit signed jump offset");
6577   }
6578 
6579   return MatchOperand_Success;
6580 }
6581 
6582 //===----------------------------------------------------------------------===//
6583 // Boolean holding registers
6584 //===----------------------------------------------------------------------===//
6585 
6586 OperandMatchResultTy
6587 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6588   return parseReg(Operands);
6589 }
6590 
6591 //===----------------------------------------------------------------------===//
6592 // mubuf
6593 //===----------------------------------------------------------------------===//
6594 
6595 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
6596   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
6597 }
6598 
6599 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
6600   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
6601 }
6602 
6603 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const {
6604   return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC);
6605 }
6606 
6607 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
6608   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
6609 }
6610 
6611 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6612                                const OperandVector &Operands,
6613                                bool IsAtomic,
6614                                bool IsAtomicReturn,
6615                                bool IsLds) {
6616   bool IsLdsOpcode = IsLds;
6617   bool HasLdsModifier = false;
6618   OptionalImmIndexMap OptionalIdx;
6619   assert(IsAtomicReturn ? IsAtomic : true);
6620   unsigned FirstOperandIdx = 1;
6621 
6622   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6623     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6624 
6625     // Add the register arguments
6626     if (Op.isReg()) {
6627       Op.addRegOperands(Inst, 1);
6628       // Insert a tied src for atomic return dst.
6629       // This cannot be postponed as subsequent calls to
6630       // addImmOperands rely on correct number of MC operands.
6631       if (IsAtomicReturn && i == FirstOperandIdx)
6632         Op.addRegOperands(Inst, 1);
6633       continue;
6634     }
6635 
6636     // Handle the case where soffset is an immediate
6637     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6638       Op.addImmOperands(Inst, 1);
6639       continue;
6640     }
6641 
6642     HasLdsModifier |= Op.isLDS();
6643 
6644     // Handle tokens like 'offen' which are sometimes hard-coded into the
6645     // asm string.  There are no MCInst operands for these.
6646     if (Op.isToken()) {
6647       continue;
6648     }
6649     assert(Op.isImm());
6650 
6651     // Handle optional arguments
6652     OptionalIdx[Op.getImmTy()] = i;
6653   }
6654 
6655   // This is a workaround for an llvm quirk which may result in an
6656   // incorrect instruction selection. Lds and non-lds versions of
6657   // MUBUF instructions are identical except that lds versions
6658   // have mandatory 'lds' modifier. However this modifier follows
6659   // optional modifiers and llvm asm matcher regards this 'lds'
6660   // modifier as an optional one. As a result, an lds version
6661   // of opcode may be selected even if it has no 'lds' modifier.
6662   if (IsLdsOpcode && !HasLdsModifier) {
6663     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6664     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6665       Inst.setOpcode(NoLdsOpcode);
6666       IsLdsOpcode = false;
6667     }
6668   }
6669 
6670   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6671   if (!IsAtomic || IsAtomicReturn) {
6672     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6673   }
6674   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6675 
6676   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6677     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6678   }
6679 
6680   if (isGFX10Plus())
6681     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6682 }
6683 
6684 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6685   OptionalImmIndexMap OptionalIdx;
6686 
6687   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6688     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6689 
6690     // Add the register arguments
6691     if (Op.isReg()) {
6692       Op.addRegOperands(Inst, 1);
6693       continue;
6694     }
6695 
6696     // Handle the case where soffset is an immediate
6697     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6698       Op.addImmOperands(Inst, 1);
6699       continue;
6700     }
6701 
6702     // Handle tokens like 'offen' which are sometimes hard-coded into the
6703     // asm string.  There are no MCInst operands for these.
6704     if (Op.isToken()) {
6705       continue;
6706     }
6707     assert(Op.isImm());
6708 
6709     // Handle optional arguments
6710     OptionalIdx[Op.getImmTy()] = i;
6711   }
6712 
6713   addOptionalImmOperand(Inst, Operands, OptionalIdx,
6714                         AMDGPUOperand::ImmTyOffset);
6715   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6716   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6717   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6718   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6719 
6720   if (isGFX10Plus())
6721     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6722 }
6723 
6724 //===----------------------------------------------------------------------===//
6725 // mimg
6726 //===----------------------------------------------------------------------===//
6727 
6728 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6729                               bool IsAtomic) {
6730   unsigned I = 1;
6731   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6732   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6733     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6734   }
6735 
6736   if (IsAtomic) {
6737     // Add src, same as dst
6738     assert(Desc.getNumDefs() == 1);
6739     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6740   }
6741 
6742   OptionalImmIndexMap OptionalIdx;
6743 
6744   for (unsigned E = Operands.size(); I != E; ++I) {
6745     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6746 
6747     // Add the register arguments
6748     if (Op.isReg()) {
6749       Op.addRegOperands(Inst, 1);
6750     } else if (Op.isImmModifier()) {
6751       OptionalIdx[Op.getImmTy()] = I;
6752     } else if (!Op.isToken()) {
6753       llvm_unreachable("unexpected operand type");
6754     }
6755   }
6756 
6757   bool IsGFX10Plus = isGFX10Plus();
6758 
6759   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6760   if (IsGFX10Plus)
6761     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6762   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6763   if (IsGFX10Plus)
6764     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6765   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6766   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6767   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6768   if (IsGFX10Plus)
6769     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6770   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6771   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6772   if (!IsGFX10Plus)
6773     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6774   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6775 }
6776 
6777 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6778   cvtMIMG(Inst, Operands, true);
6779 }
6780 
6781 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
6782                                       const OperandVector &Operands) {
6783   for (unsigned I = 1; I < Operands.size(); ++I) {
6784     auto &Operand = (AMDGPUOperand &)*Operands[I];
6785     if (Operand.isReg())
6786       Operand.addRegOperands(Inst, 1);
6787   }
6788 
6789   Inst.addOperand(MCOperand::createImm(1)); // a16
6790 }
6791 
6792 //===----------------------------------------------------------------------===//
6793 // smrd
6794 //===----------------------------------------------------------------------===//
6795 
6796 bool AMDGPUOperand::isSMRDOffset8() const {
6797   return isImm() && isUInt<8>(getImm());
6798 }
6799 
6800 bool AMDGPUOperand::isSMEMOffset() const {
6801   return isImm(); // Offset range is checked later by validator.
6802 }
6803 
6804 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6805   // 32-bit literals are only supported on CI and we only want to use them
6806   // when the offset is > 8-bits.
6807   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6808 }
6809 
6810 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6811   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6812 }
6813 
6814 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
6815   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6816 }
6817 
6818 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6819   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6820 }
6821 
6822 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6823   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6824 }
6825 
6826 //===----------------------------------------------------------------------===//
6827 // vop3
6828 //===----------------------------------------------------------------------===//
6829 
6830 static bool ConvertOmodMul(int64_t &Mul) {
6831   if (Mul != 1 && Mul != 2 && Mul != 4)
6832     return false;
6833 
6834   Mul >>= 1;
6835   return true;
6836 }
6837 
6838 static bool ConvertOmodDiv(int64_t &Div) {
6839   if (Div == 1) {
6840     Div = 0;
6841     return true;
6842   }
6843 
6844   if (Div == 2) {
6845     Div = 3;
6846     return true;
6847   }
6848 
6849   return false;
6850 }
6851 
6852 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6853   if (BoundCtrl == 0) {
6854     BoundCtrl = 1;
6855     return true;
6856   }
6857 
6858   if (BoundCtrl == -1) {
6859     BoundCtrl = 0;
6860     return true;
6861   }
6862 
6863   return false;
6864 }
6865 
6866 // Note: the order in this table matches the order of operands in AsmString.
6867 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6868   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6869   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6870   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6871   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6872   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6873   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6874   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6875   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6876   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6877   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6878   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6879   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6880   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6881   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6882   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6883   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6884   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6885   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6886   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6887   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6888   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6889   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6890   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6891   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6892   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6893   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6894   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6895   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6896   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6897   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6898   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6899   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6900   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6901   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6902   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6903   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6904   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6905   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6906   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6907   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6908   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6909   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6910   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6911 };
6912 
6913 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6914 
6915   OperandMatchResultTy res = parseOptionalOpr(Operands);
6916 
6917   // This is a hack to enable hardcoded mandatory operands which follow
6918   // optional operands.
6919   //
6920   // Current design assumes that all operands after the first optional operand
6921   // are also optional. However implementation of some instructions violates
6922   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6923   //
6924   // To alleviate this problem, we have to (implicitly) parse extra operands
6925   // to make sure autogenerated parser of custom operands never hit hardcoded
6926   // mandatory operands.
6927 
6928   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6929     if (res != MatchOperand_Success ||
6930         isToken(AsmToken::EndOfStatement))
6931       break;
6932 
6933     trySkipToken(AsmToken::Comma);
6934     res = parseOptionalOpr(Operands);
6935   }
6936 
6937   return res;
6938 }
6939 
6940 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6941   OperandMatchResultTy res;
6942   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6943     // try to parse any optional operand here
6944     if (Op.IsBit) {
6945       res = parseNamedBit(Op.Name, Operands, Op.Type);
6946     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6947       res = parseOModOperand(Operands);
6948     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6949                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6950                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6951       res = parseSDWASel(Operands, Op.Name, Op.Type);
6952     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6953       res = parseSDWADstUnused(Operands);
6954     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6955                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6956                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6957                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6958       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6959                                         Op.ConvertResult);
6960     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6961       res = parseDim(Operands);
6962     } else {
6963       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6964     }
6965     if (res != MatchOperand_NoMatch) {
6966       return res;
6967     }
6968   }
6969   return MatchOperand_NoMatch;
6970 }
6971 
6972 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6973   StringRef Name = Parser.getTok().getString();
6974   if (Name == "mul") {
6975     return parseIntWithPrefix("mul", Operands,
6976                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6977   }
6978 
6979   if (Name == "div") {
6980     return parseIntWithPrefix("div", Operands,
6981                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6982   }
6983 
6984   return MatchOperand_NoMatch;
6985 }
6986 
6987 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6988   cvtVOP3P(Inst, Operands);
6989 
6990   int Opc = Inst.getOpcode();
6991 
6992   int SrcNum;
6993   const int Ops[] = { AMDGPU::OpName::src0,
6994                       AMDGPU::OpName::src1,
6995                       AMDGPU::OpName::src2 };
6996   for (SrcNum = 0;
6997        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6998        ++SrcNum);
6999   assert(SrcNum > 0);
7000 
7001   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7002   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7003 
7004   if ((OpSel & (1 << SrcNum)) != 0) {
7005     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7006     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7007     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7008   }
7009 }
7010 
7011 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7012       // 1. This operand is input modifiers
7013   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7014       // 2. This is not last operand
7015       && Desc.NumOperands > (OpNum + 1)
7016       // 3. Next operand is register class
7017       && Desc.OpInfo[OpNum + 1].RegClass != -1
7018       // 4. Next register is not tied to any other operand
7019       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7020 }
7021 
7022 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7023 {
7024   OptionalImmIndexMap OptionalIdx;
7025   unsigned Opc = Inst.getOpcode();
7026 
7027   unsigned I = 1;
7028   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7029   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7030     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7031   }
7032 
7033   for (unsigned E = Operands.size(); I != E; ++I) {
7034     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7035     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7036       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7037     } else if (Op.isInterpSlot() ||
7038                Op.isInterpAttr() ||
7039                Op.isAttrChan()) {
7040       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7041     } else if (Op.isImmModifier()) {
7042       OptionalIdx[Op.getImmTy()] = I;
7043     } else {
7044       llvm_unreachable("unhandled operand type");
7045     }
7046   }
7047 
7048   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7049     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7050   }
7051 
7052   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7053     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7054   }
7055 
7056   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7057     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7058   }
7059 }
7060 
7061 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7062                               OptionalImmIndexMap &OptionalIdx) {
7063   unsigned Opc = Inst.getOpcode();
7064 
7065   unsigned I = 1;
7066   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7067   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7068     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7069   }
7070 
7071   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7072     // This instruction has src modifiers
7073     for (unsigned E = Operands.size(); I != E; ++I) {
7074       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7075       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7076         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7077       } else if (Op.isImmModifier()) {
7078         OptionalIdx[Op.getImmTy()] = I;
7079       } else if (Op.isRegOrImm()) {
7080         Op.addRegOrImmOperands(Inst, 1);
7081       } else {
7082         llvm_unreachable("unhandled operand type");
7083       }
7084     }
7085   } else {
7086     // No src modifiers
7087     for (unsigned E = Operands.size(); I != E; ++I) {
7088       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7089       if (Op.isMod()) {
7090         OptionalIdx[Op.getImmTy()] = I;
7091       } else {
7092         Op.addRegOrImmOperands(Inst, 1);
7093       }
7094     }
7095   }
7096 
7097   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7098     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7099   }
7100 
7101   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7102     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7103   }
7104 
7105   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7106   // it has src2 register operand that is tied to dst operand
7107   // we don't allow modifiers for this operand in assembler so src2_modifiers
7108   // should be 0.
7109   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7110       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7111       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7112       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7113       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7114       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7115       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7116       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7117       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7118       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7119     auto it = Inst.begin();
7120     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7121     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7122     ++it;
7123     // Copy the operand to ensure it's not invalidated when Inst grows.
7124     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7125   }
7126 }
7127 
7128 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7129   OptionalImmIndexMap OptionalIdx;
7130   cvtVOP3(Inst, Operands, OptionalIdx);
7131 }
7132 
7133 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
7134                                const OperandVector &Operands) {
7135   OptionalImmIndexMap OptIdx;
7136   const int Opc = Inst.getOpcode();
7137   const MCInstrDesc &Desc = MII.get(Opc);
7138 
7139   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7140 
7141   cvtVOP3(Inst, Operands, OptIdx);
7142 
7143   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7144     assert(!IsPacked);
7145     Inst.addOperand(Inst.getOperand(0));
7146   }
7147 
7148   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7149   // instruction, and then figure out where to actually put the modifiers
7150 
7151   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7152 
7153   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7154   if (OpSelHiIdx != -1) {
7155     int DefaultVal = IsPacked ? -1 : 0;
7156     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7157                           DefaultVal);
7158   }
7159 
7160   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7161   if (NegLoIdx != -1) {
7162     assert(IsPacked);
7163     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7164     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7165   }
7166 
7167   const int Ops[] = { AMDGPU::OpName::src0,
7168                       AMDGPU::OpName::src1,
7169                       AMDGPU::OpName::src2 };
7170   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7171                          AMDGPU::OpName::src1_modifiers,
7172                          AMDGPU::OpName::src2_modifiers };
7173 
7174   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7175 
7176   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7177   unsigned OpSelHi = 0;
7178   unsigned NegLo = 0;
7179   unsigned NegHi = 0;
7180 
7181   if (OpSelHiIdx != -1) {
7182     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7183   }
7184 
7185   if (NegLoIdx != -1) {
7186     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7187     NegLo = Inst.getOperand(NegLoIdx).getImm();
7188     NegHi = Inst.getOperand(NegHiIdx).getImm();
7189   }
7190 
7191   for (int J = 0; J < 3; ++J) {
7192     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7193     if (OpIdx == -1)
7194       break;
7195 
7196     uint32_t ModVal = 0;
7197 
7198     if ((OpSel & (1 << J)) != 0)
7199       ModVal |= SISrcMods::OP_SEL_0;
7200 
7201     if ((OpSelHi & (1 << J)) != 0)
7202       ModVal |= SISrcMods::OP_SEL_1;
7203 
7204     if ((NegLo & (1 << J)) != 0)
7205       ModVal |= SISrcMods::NEG;
7206 
7207     if ((NegHi & (1 << J)) != 0)
7208       ModVal |= SISrcMods::NEG_HI;
7209 
7210     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7211 
7212     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7213   }
7214 }
7215 
7216 //===----------------------------------------------------------------------===//
7217 // dpp
7218 //===----------------------------------------------------------------------===//
7219 
7220 bool AMDGPUOperand::isDPP8() const {
7221   return isImmTy(ImmTyDPP8);
7222 }
7223 
7224 bool AMDGPUOperand::isDPPCtrl() const {
7225   using namespace AMDGPU::DPP;
7226 
7227   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7228   if (result) {
7229     int64_t Imm = getImm();
7230     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7231            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7232            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7233            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7234            (Imm == DppCtrl::WAVE_SHL1) ||
7235            (Imm == DppCtrl::WAVE_ROL1) ||
7236            (Imm == DppCtrl::WAVE_SHR1) ||
7237            (Imm == DppCtrl::WAVE_ROR1) ||
7238            (Imm == DppCtrl::ROW_MIRROR) ||
7239            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7240            (Imm == DppCtrl::BCAST15) ||
7241            (Imm == DppCtrl::BCAST31) ||
7242            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7243            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7244   }
7245   return false;
7246 }
7247 
7248 //===----------------------------------------------------------------------===//
7249 // mAI
7250 //===----------------------------------------------------------------------===//
7251 
7252 bool AMDGPUOperand::isBLGP() const {
7253   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7254 }
7255 
7256 bool AMDGPUOperand::isCBSZ() const {
7257   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7258 }
7259 
7260 bool AMDGPUOperand::isABID() const {
7261   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7262 }
7263 
7264 bool AMDGPUOperand::isS16Imm() const {
7265   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7266 }
7267 
7268 bool AMDGPUOperand::isU16Imm() const {
7269   return isImm() && isUInt<16>(getImm());
7270 }
7271 
7272 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7273   if (!isGFX10Plus())
7274     return MatchOperand_NoMatch;
7275 
7276   SMLoc S = Parser.getTok().getLoc();
7277 
7278   if (getLexer().isNot(AsmToken::Identifier))
7279     return MatchOperand_NoMatch;
7280   if (getLexer().getTok().getString() != "dim")
7281     return MatchOperand_NoMatch;
7282 
7283   Parser.Lex();
7284   if (getLexer().isNot(AsmToken::Colon))
7285     return MatchOperand_ParseFail;
7286 
7287   Parser.Lex();
7288 
7289   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
7290   // integer.
7291   std::string Token;
7292   if (getLexer().is(AsmToken::Integer)) {
7293     SMLoc Loc = getLexer().getTok().getEndLoc();
7294     Token = std::string(getLexer().getTok().getString());
7295     Parser.Lex();
7296     if (getLexer().getTok().getLoc() != Loc)
7297       return MatchOperand_ParseFail;
7298   }
7299   if (getLexer().isNot(AsmToken::Identifier))
7300     return MatchOperand_ParseFail;
7301   Token += getLexer().getTok().getString();
7302 
7303   StringRef DimId = Token;
7304   if (DimId.startswith("SQ_RSRC_IMG_"))
7305     DimId = DimId.substr(12);
7306 
7307   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7308   if (!DimInfo)
7309     return MatchOperand_ParseFail;
7310 
7311   Parser.Lex();
7312 
7313   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
7314                                               AMDGPUOperand::ImmTyDim));
7315   return MatchOperand_Success;
7316 }
7317 
7318 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7319   SMLoc S = Parser.getTok().getLoc();
7320   StringRef Prefix;
7321 
7322   if (getLexer().getKind() == AsmToken::Identifier) {
7323     Prefix = Parser.getTok().getString();
7324   } else {
7325     return MatchOperand_NoMatch;
7326   }
7327 
7328   if (Prefix != "dpp8")
7329     return parseDPPCtrl(Operands);
7330   if (!isGFX10Plus())
7331     return MatchOperand_NoMatch;
7332 
7333   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7334 
7335   int64_t Sels[8];
7336 
7337   Parser.Lex();
7338   if (getLexer().isNot(AsmToken::Colon))
7339     return MatchOperand_ParseFail;
7340 
7341   Parser.Lex();
7342   if (getLexer().isNot(AsmToken::LBrac))
7343     return MatchOperand_ParseFail;
7344 
7345   Parser.Lex();
7346   if (getParser().parseAbsoluteExpression(Sels[0]))
7347     return MatchOperand_ParseFail;
7348   if (0 > Sels[0] || 7 < Sels[0])
7349     return MatchOperand_ParseFail;
7350 
7351   for (size_t i = 1; i < 8; ++i) {
7352     if (getLexer().isNot(AsmToken::Comma))
7353       return MatchOperand_ParseFail;
7354 
7355     Parser.Lex();
7356     if (getParser().parseAbsoluteExpression(Sels[i]))
7357       return MatchOperand_ParseFail;
7358     if (0 > Sels[i] || 7 < Sels[i])
7359       return MatchOperand_ParseFail;
7360   }
7361 
7362   if (getLexer().isNot(AsmToken::RBrac))
7363     return MatchOperand_ParseFail;
7364   Parser.Lex();
7365 
7366   unsigned DPP8 = 0;
7367   for (size_t i = 0; i < 8; ++i)
7368     DPP8 |= (Sels[i] << (i * 3));
7369 
7370   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7371   return MatchOperand_Success;
7372 }
7373 
7374 OperandMatchResultTy
7375 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7376   using namespace AMDGPU::DPP;
7377 
7378   SMLoc S = Parser.getTok().getLoc();
7379   StringRef Prefix;
7380   int64_t Int;
7381 
7382   if (getLexer().getKind() == AsmToken::Identifier) {
7383     Prefix = Parser.getTok().getString();
7384   } else {
7385     return MatchOperand_NoMatch;
7386   }
7387 
7388   if (Prefix == "row_mirror") {
7389     Int = DppCtrl::ROW_MIRROR;
7390     Parser.Lex();
7391   } else if (Prefix == "row_half_mirror") {
7392     Int = DppCtrl::ROW_HALF_MIRROR;
7393     Parser.Lex();
7394   } else {
7395     // Check to prevent parseDPPCtrlOps from eating invalid tokens
7396     if (Prefix != "quad_perm"
7397         && Prefix != "row_shl"
7398         && Prefix != "row_shr"
7399         && Prefix != "row_ror"
7400         && Prefix != "wave_shl"
7401         && Prefix != "wave_rol"
7402         && Prefix != "wave_shr"
7403         && Prefix != "wave_ror"
7404         && Prefix != "row_bcast"
7405         && Prefix != "row_share"
7406         && Prefix != "row_xmask") {
7407       return MatchOperand_NoMatch;
7408     }
7409 
7410     if (!isGFX10Plus() && (Prefix == "row_share" || Prefix == "row_xmask"))
7411       return MatchOperand_NoMatch;
7412 
7413     if (!isVI() && !isGFX9() &&
7414         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
7415          Prefix == "wave_rol" || Prefix == "wave_ror" ||
7416          Prefix == "row_bcast"))
7417       return MatchOperand_NoMatch;
7418 
7419     Parser.Lex();
7420     if (getLexer().isNot(AsmToken::Colon))
7421       return MatchOperand_ParseFail;
7422 
7423     if (Prefix == "quad_perm") {
7424       // quad_perm:[%d,%d,%d,%d]
7425       Parser.Lex();
7426       if (getLexer().isNot(AsmToken::LBrac))
7427         return MatchOperand_ParseFail;
7428       Parser.Lex();
7429 
7430       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
7431         return MatchOperand_ParseFail;
7432 
7433       for (int i = 0; i < 3; ++i) {
7434         if (getLexer().isNot(AsmToken::Comma))
7435           return MatchOperand_ParseFail;
7436         Parser.Lex();
7437 
7438         int64_t Temp;
7439         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
7440           return MatchOperand_ParseFail;
7441         const int shift = i*2 + 2;
7442         Int += (Temp << shift);
7443       }
7444 
7445       if (getLexer().isNot(AsmToken::RBrac))
7446         return MatchOperand_ParseFail;
7447       Parser.Lex();
7448     } else {
7449       // sel:%d
7450       Parser.Lex();
7451       if (getParser().parseAbsoluteExpression(Int))
7452         return MatchOperand_ParseFail;
7453 
7454       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
7455         Int |= DppCtrl::ROW_SHL0;
7456       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
7457         Int |= DppCtrl::ROW_SHR0;
7458       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
7459         Int |= DppCtrl::ROW_ROR0;
7460       } else if (Prefix == "wave_shl" && 1 == Int) {
7461         Int = DppCtrl::WAVE_SHL1;
7462       } else if (Prefix == "wave_rol" && 1 == Int) {
7463         Int = DppCtrl::WAVE_ROL1;
7464       } else if (Prefix == "wave_shr" && 1 == Int) {
7465         Int = DppCtrl::WAVE_SHR1;
7466       } else if (Prefix == "wave_ror" && 1 == Int) {
7467         Int = DppCtrl::WAVE_ROR1;
7468       } else if (Prefix == "row_bcast") {
7469         if (Int == 15) {
7470           Int = DppCtrl::BCAST15;
7471         } else if (Int == 31) {
7472           Int = DppCtrl::BCAST31;
7473         } else {
7474           return MatchOperand_ParseFail;
7475         }
7476       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
7477         Int |= DppCtrl::ROW_SHARE_FIRST;
7478       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
7479         Int |= DppCtrl::ROW_XMASK_FIRST;
7480       } else {
7481         return MatchOperand_ParseFail;
7482       }
7483     }
7484   }
7485 
7486   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
7487   return MatchOperand_Success;
7488 }
7489 
7490 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7491   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7492 }
7493 
7494 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7495   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7496 }
7497 
7498 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7499   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7500 }
7501 
7502 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7503   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7504 }
7505 
7506 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7507   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7508 }
7509 
7510 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7511   OptionalImmIndexMap OptionalIdx;
7512 
7513   unsigned I = 1;
7514   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7515   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7516     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7517   }
7518 
7519   int Fi = 0;
7520   for (unsigned E = Operands.size(); I != E; ++I) {
7521     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7522                                             MCOI::TIED_TO);
7523     if (TiedTo != -1) {
7524       assert((unsigned)TiedTo < Inst.getNumOperands());
7525       // handle tied old or src2 for MAC instructions
7526       Inst.addOperand(Inst.getOperand(TiedTo));
7527     }
7528     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7529     // Add the register arguments
7530     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7531       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7532       // Skip it.
7533       continue;
7534     }
7535 
7536     if (IsDPP8) {
7537       if (Op.isDPP8()) {
7538         Op.addImmOperands(Inst, 1);
7539       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7540         Op.addRegWithFPInputModsOperands(Inst, 2);
7541       } else if (Op.isFI()) {
7542         Fi = Op.getImm();
7543       } else if (Op.isReg()) {
7544         Op.addRegOperands(Inst, 1);
7545       } else {
7546         llvm_unreachable("Invalid operand type");
7547       }
7548     } else {
7549       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7550         Op.addRegWithFPInputModsOperands(Inst, 2);
7551       } else if (Op.isDPPCtrl()) {
7552         Op.addImmOperands(Inst, 1);
7553       } else if (Op.isImm()) {
7554         // Handle optional arguments
7555         OptionalIdx[Op.getImmTy()] = I;
7556       } else {
7557         llvm_unreachable("Invalid operand type");
7558       }
7559     }
7560   }
7561 
7562   if (IsDPP8) {
7563     using namespace llvm::AMDGPU::DPP;
7564     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7565   } else {
7566     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7567     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7568     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7569     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7570       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7571     }
7572   }
7573 }
7574 
7575 //===----------------------------------------------------------------------===//
7576 // sdwa
7577 //===----------------------------------------------------------------------===//
7578 
7579 OperandMatchResultTy
7580 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7581                               AMDGPUOperand::ImmTy Type) {
7582   using namespace llvm::AMDGPU::SDWA;
7583 
7584   SMLoc S = Parser.getTok().getLoc();
7585   StringRef Value;
7586   OperandMatchResultTy res;
7587 
7588   res = parseStringWithPrefix(Prefix, Value);
7589   if (res != MatchOperand_Success) {
7590     return res;
7591   }
7592 
7593   int64_t Int;
7594   Int = StringSwitch<int64_t>(Value)
7595         .Case("BYTE_0", SdwaSel::BYTE_0)
7596         .Case("BYTE_1", SdwaSel::BYTE_1)
7597         .Case("BYTE_2", SdwaSel::BYTE_2)
7598         .Case("BYTE_3", SdwaSel::BYTE_3)
7599         .Case("WORD_0", SdwaSel::WORD_0)
7600         .Case("WORD_1", SdwaSel::WORD_1)
7601         .Case("DWORD", SdwaSel::DWORD)
7602         .Default(0xffffffff);
7603   Parser.Lex(); // eat last token
7604 
7605   if (Int == 0xffffffff) {
7606     return MatchOperand_ParseFail;
7607   }
7608 
7609   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
7610   return MatchOperand_Success;
7611 }
7612 
7613 OperandMatchResultTy
7614 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
7615   using namespace llvm::AMDGPU::SDWA;
7616 
7617   SMLoc S = Parser.getTok().getLoc();
7618   StringRef Value;
7619   OperandMatchResultTy res;
7620 
7621   res = parseStringWithPrefix("dst_unused", Value);
7622   if (res != MatchOperand_Success) {
7623     return res;
7624   }
7625 
7626   int64_t Int;
7627   Int = StringSwitch<int64_t>(Value)
7628         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
7629         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
7630         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
7631         .Default(0xffffffff);
7632   Parser.Lex(); // eat last token
7633 
7634   if (Int == 0xffffffff) {
7635     return MatchOperand_ParseFail;
7636   }
7637 
7638   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
7639   return MatchOperand_Success;
7640 }
7641 
7642 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
7643   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
7644 }
7645 
7646 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
7647   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
7648 }
7649 
7650 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
7651   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
7652 }
7653 
7654 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
7655   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
7656 }
7657 
7658 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
7659   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
7660 }
7661 
7662 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
7663                               uint64_t BasicInstType,
7664                               bool SkipDstVcc,
7665                               bool SkipSrcVcc) {
7666   using namespace llvm::AMDGPU::SDWA;
7667 
7668   OptionalImmIndexMap OptionalIdx;
7669   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
7670   bool SkippedVcc = false;
7671 
7672   unsigned I = 1;
7673   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7674   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7675     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7676   }
7677 
7678   for (unsigned E = Operands.size(); I != E; ++I) {
7679     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7680     if (SkipVcc && !SkippedVcc && Op.isReg() &&
7681         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7682       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7683       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7684       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7685       // Skip VCC only if we didn't skip it on previous iteration.
7686       // Note that src0 and src1 occupy 2 slots each because of modifiers.
7687       if (BasicInstType == SIInstrFlags::VOP2 &&
7688           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7689            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7690         SkippedVcc = true;
7691         continue;
7692       } else if (BasicInstType == SIInstrFlags::VOPC &&
7693                  Inst.getNumOperands() == 0) {
7694         SkippedVcc = true;
7695         continue;
7696       }
7697     }
7698     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7699       Op.addRegOrImmWithInputModsOperands(Inst, 2);
7700     } else if (Op.isImm()) {
7701       // Handle optional arguments
7702       OptionalIdx[Op.getImmTy()] = I;
7703     } else {
7704       llvm_unreachable("Invalid operand type");
7705     }
7706     SkippedVcc = false;
7707   }
7708 
7709   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7710       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7711       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7712     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7713     switch (BasicInstType) {
7714     case SIInstrFlags::VOP1:
7715       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7716       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7717         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7718       }
7719       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7720       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7721       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7722       break;
7723 
7724     case SIInstrFlags::VOP2:
7725       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7726       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7727         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7728       }
7729       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7730       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7731       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7732       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7733       break;
7734 
7735     case SIInstrFlags::VOPC:
7736       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7737         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7738       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7739       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7740       break;
7741 
7742     default:
7743       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7744     }
7745   }
7746 
7747   // special case v_mac_{f16, f32}:
7748   // it has src2 register operand that is tied to dst operand
7749   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7750       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7751     auto it = Inst.begin();
7752     std::advance(
7753       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7754     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7755   }
7756 }
7757 
7758 //===----------------------------------------------------------------------===//
7759 // mAI
7760 //===----------------------------------------------------------------------===//
7761 
7762 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7763   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7764 }
7765 
7766 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7767   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7768 }
7769 
7770 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7771   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7772 }
7773 
7774 /// Force static initialization.
7775 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7776   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7777   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7778 }
7779 
7780 #define GET_REGISTER_MATCHER
7781 #define GET_MATCHER_IMPLEMENTATION
7782 #define GET_MNEMONIC_SPELL_CHECKER
7783 #define GET_MNEMONIC_CHECKER
7784 #include "AMDGPUGenAsmMatcher.inc"
7785 
7786 // This fuction should be defined after auto-generated include so that we have
7787 // MatchClassKind enum defined
7788 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7789                                                      unsigned Kind) {
7790   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7791   // But MatchInstructionImpl() expects to meet token and fails to validate
7792   // operand. This method checks if we are given immediate operand but expect to
7793   // get corresponding token.
7794   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7795   switch (Kind) {
7796   case MCK_addr64:
7797     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7798   case MCK_gds:
7799     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7800   case MCK_lds:
7801     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7802   case MCK_glc:
7803     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7804   case MCK_idxen:
7805     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7806   case MCK_offen:
7807     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7808   case MCK_SSrcB32:
7809     // When operands have expression values, they will return true for isToken,
7810     // because it is not possible to distinguish between a token and an
7811     // expression at parse time. MatchInstructionImpl() will always try to
7812     // match an operand as a token, when isToken returns true, and when the
7813     // name of the expression is not a valid token, the match will fail,
7814     // so we need to handle it here.
7815     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7816   case MCK_SSrcF32:
7817     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7818   case MCK_SoppBrTarget:
7819     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7820   case MCK_VReg32OrOff:
7821     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7822   case MCK_InterpSlot:
7823     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7824   case MCK_Attr:
7825     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7826   case MCK_AttrChan:
7827     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7828   case MCK_ImmSMEMOffset:
7829     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
7830   case MCK_SReg_64:
7831   case MCK_SReg_64_XEXEC:
7832     // Null is defined as a 32-bit register but
7833     // it should also be enabled with 64-bit operands.
7834     // The following code enables it for SReg_64 operands
7835     // used as source and destination. Remaining source
7836     // operands are handled in isInlinableImm.
7837     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7838   default:
7839     return Match_InvalidOperand;
7840   }
7841 }
7842 
7843 //===----------------------------------------------------------------------===//
7844 // endpgm
7845 //===----------------------------------------------------------------------===//
7846 
7847 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7848   SMLoc S = Parser.getTok().getLoc();
7849   int64_t Imm = 0;
7850 
7851   if (!parseExpr(Imm)) {
7852     // The operand is optional, if not present default to 0
7853     Imm = 0;
7854   }
7855 
7856   if (!isUInt<16>(Imm)) {
7857     Error(S, "expected a 16-bit value");
7858     return MatchOperand_ParseFail;
7859   }
7860 
7861   Operands.push_back(
7862       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7863   return MatchOperand_Success;
7864 }
7865 
7866 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7867