1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTyTFE,
147     ImmTyD16,
148     ImmTyClampSI,
149     ImmTyOModSI,
150     ImmTyDPP8,
151     ImmTyDppCtrl,
152     ImmTyDppRowMask,
153     ImmTyDppBankMask,
154     ImmTyDppBoundCtrl,
155     ImmTyDppFi,
156     ImmTySdwaDstSel,
157     ImmTySdwaSrc0Sel,
158     ImmTySdwaSrc1Sel,
159     ImmTySdwaDstUnused,
160     ImmTyDMask,
161     ImmTyDim,
162     ImmTyUNorm,
163     ImmTyDA,
164     ImmTyR128A16,
165     ImmTyLWE,
166     ImmTyExpTgt,
167     ImmTyExpCompr,
168     ImmTyExpVM,
169     ImmTyFORMAT,
170     ImmTyHwreg,
171     ImmTyOff,
172     ImmTySendMsg,
173     ImmTyInterpSlot,
174     ImmTyInterpAttr,
175     ImmTyAttrChan,
176     ImmTyOpSel,
177     ImmTyOpSelHi,
178     ImmTyNegLo,
179     ImmTyNegHi,
180     ImmTySwizzle,
181     ImmTyGprIdxMode,
182     ImmTyHigh,
183     ImmTyBLGP,
184     ImmTyCBSZ,
185     ImmTyABID,
186     ImmTyEndpgm,
187   };
188 
189 private:
190   struct TokOp {
191     const char *Data;
192     unsigned Length;
193   };
194 
195   struct ImmOp {
196     int64_t Val;
197     ImmTy Type;
198     bool IsFPImm;
199     Modifiers Mods;
200   };
201 
202   struct RegOp {
203     unsigned RegNo;
204     Modifiers Mods;
205   };
206 
207   union {
208     TokOp Tok;
209     ImmOp Imm;
210     RegOp Reg;
211     const MCExpr *Expr;
212   };
213 
214 public:
215   bool isToken() const override {
216     if (Kind == Token)
217       return true;
218 
219     if (Kind != Expression || !Expr)
220       return false;
221 
222     // When parsing operands, we can't always tell if something was meant to be
223     // a token, like 'gds', or an expression that references a global variable.
224     // In this case, we assume the string is an expression, and if we need to
225     // interpret is a token, then we treat the symbol name as the token.
226     return isa<MCSymbolRefExpr>(Expr);
227   }
228 
229   bool isImm() const override {
230     return Kind == Immediate;
231   }
232 
233   bool isInlinableImm(MVT type) const;
234   bool isLiteralImm(MVT type) const;
235 
236   bool isRegKind() const {
237     return Kind == Register;
238   }
239 
240   bool isReg() const override {
241     return isRegKind() && !hasModifiers();
242   }
243 
244   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
245     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
246   }
247 
248   bool isRegOrImmWithInt16InputMods() const {
249     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
250   }
251 
252   bool isRegOrImmWithInt32InputMods() const {
253     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
254   }
255 
256   bool isRegOrImmWithInt64InputMods() const {
257     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
258   }
259 
260   bool isRegOrImmWithFP16InputMods() const {
261     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
262   }
263 
264   bool isRegOrImmWithFP32InputMods() const {
265     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
266   }
267 
268   bool isRegOrImmWithFP64InputMods() const {
269     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
270   }
271 
272   bool isVReg() const {
273     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
274            isRegClass(AMDGPU::VReg_64RegClassID) ||
275            isRegClass(AMDGPU::VReg_96RegClassID) ||
276            isRegClass(AMDGPU::VReg_128RegClassID) ||
277            isRegClass(AMDGPU::VReg_256RegClassID) ||
278            isRegClass(AMDGPU::VReg_512RegClassID);
279   }
280 
281   bool isVReg32() const {
282     return isRegClass(AMDGPU::VGPR_32RegClassID);
283   }
284 
285   bool isVReg32OrOff() const {
286     return isOff() || isVReg32();
287   }
288 
289   bool isSDWAOperand(MVT type) const;
290   bool isSDWAFP16Operand() const;
291   bool isSDWAFP32Operand() const;
292   bool isSDWAInt16Operand() const;
293   bool isSDWAInt32Operand() const;
294 
295   bool isImmTy(ImmTy ImmT) const {
296     return isImm() && Imm.Type == ImmT;
297   }
298 
299   bool isImmModifier() const {
300     return isImm() && Imm.Type != ImmTyNone;
301   }
302 
303   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
304   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
305   bool isDMask() const { return isImmTy(ImmTyDMask); }
306   bool isDim() const { return isImmTy(ImmTyDim); }
307   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
308   bool isDA() const { return isImmTy(ImmTyDA); }
309   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
310   bool isLWE() const { return isImmTy(ImmTyLWE); }
311   bool isOff() const { return isImmTy(ImmTyOff); }
312   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
313   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
314   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
315   bool isOffen() const { return isImmTy(ImmTyOffen); }
316   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
317   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
318   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
319   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
320   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
321 
322   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
323   bool isGDS() const { return isImmTy(ImmTyGDS); }
324   bool isLDS() const { return isImmTy(ImmTyLDS); }
325   bool isDLC() const { return isImmTy(ImmTyDLC); }
326   bool isGLC() const { return isImmTy(ImmTyGLC); }
327   bool isSLC() const { return isImmTy(ImmTySLC); }
328   bool isTFE() const { return isImmTy(ImmTyTFE); }
329   bool isD16() const { return isImmTy(ImmTyD16); }
330   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
331   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
332   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
333   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
334   bool isFI() const { return isImmTy(ImmTyDppFi); }
335   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
336   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
337   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
338   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
339   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
340   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
341   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
342   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
343   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
344   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
345   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
346   bool isHigh() const { return isImmTy(ImmTyHigh); }
347 
348   bool isMod() const {
349     return isClampSI() || isOModSI();
350   }
351 
352   bool isRegOrImm() const {
353     return isReg() || isImm();
354   }
355 
356   bool isRegClass(unsigned RCID) const;
357 
358   bool isInlineValue() const;
359 
360   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
361     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
362   }
363 
364   bool isSCSrcB16() const {
365     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
366   }
367 
368   bool isSCSrcV2B16() const {
369     return isSCSrcB16();
370   }
371 
372   bool isSCSrcB32() const {
373     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
374   }
375 
376   bool isSCSrcB64() const {
377     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
378   }
379 
380   bool isBoolReg() const;
381 
382   bool isSCSrcF16() const {
383     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
384   }
385 
386   bool isSCSrcV2F16() const {
387     return isSCSrcF16();
388   }
389 
390   bool isSCSrcF32() const {
391     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
392   }
393 
394   bool isSCSrcF64() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
396   }
397 
398   bool isSSrcB32() const {
399     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
400   }
401 
402   bool isSSrcB16() const {
403     return isSCSrcB16() || isLiteralImm(MVT::i16);
404   }
405 
406   bool isSSrcV2B16() const {
407     llvm_unreachable("cannot happen");
408     return isSSrcB16();
409   }
410 
411   bool isSSrcB64() const {
412     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
413     // See isVSrc64().
414     return isSCSrcB64() || isLiteralImm(MVT::i64);
415   }
416 
417   bool isSSrcF32() const {
418     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
419   }
420 
421   bool isSSrcF64() const {
422     return isSCSrcB64() || isLiteralImm(MVT::f64);
423   }
424 
425   bool isSSrcF16() const {
426     return isSCSrcB16() || isLiteralImm(MVT::f16);
427   }
428 
429   bool isSSrcV2F16() const {
430     llvm_unreachable("cannot happen");
431     return isSSrcF16();
432   }
433 
434   bool isSSrcOrLdsB32() const {
435     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
436            isLiteralImm(MVT::i32) || isExpr();
437   }
438 
439   bool isVCSrcB32() const {
440     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
441   }
442 
443   bool isVCSrcB64() const {
444     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
445   }
446 
447   bool isVCSrcB16() const {
448     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
449   }
450 
451   bool isVCSrcV2B16() const {
452     return isVCSrcB16();
453   }
454 
455   bool isVCSrcF32() const {
456     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
457   }
458 
459   bool isVCSrcF64() const {
460     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
461   }
462 
463   bool isVCSrcF16() const {
464     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
465   }
466 
467   bool isVCSrcV2F16() const {
468     return isVCSrcF16();
469   }
470 
471   bool isVSrcB32() const {
472     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
473   }
474 
475   bool isVSrcB64() const {
476     return isVCSrcF64() || isLiteralImm(MVT::i64);
477   }
478 
479   bool isVSrcB16() const {
480     return isVCSrcF16() || isLiteralImm(MVT::i16);
481   }
482 
483   bool isVSrcV2B16() const {
484     return isVSrcB16() || isLiteralImm(MVT::v2i16);
485   }
486 
487   bool isVSrcF32() const {
488     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
489   }
490 
491   bool isVSrcF64() const {
492     return isVCSrcF64() || isLiteralImm(MVT::f64);
493   }
494 
495   bool isVSrcF16() const {
496     return isVCSrcF16() || isLiteralImm(MVT::f16);
497   }
498 
499   bool isVSrcV2F16() const {
500     return isVSrcF16() || isLiteralImm(MVT::v2f16);
501   }
502 
503   bool isVISrcB32() const {
504     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
505   }
506 
507   bool isVISrcB16() const {
508     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
509   }
510 
511   bool isVISrcV2B16() const {
512     return isVISrcB16();
513   }
514 
515   bool isVISrcF32() const {
516     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
517   }
518 
519   bool isVISrcF16() const {
520     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
521   }
522 
523   bool isVISrcV2F16() const {
524     return isVISrcF16() || isVISrcB32();
525   }
526 
527   bool isAISrcB32() const {
528     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
529   }
530 
531   bool isAISrcB16() const {
532     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
533   }
534 
535   bool isAISrcV2B16() const {
536     return isAISrcB16();
537   }
538 
539   bool isAISrcF32() const {
540     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
541   }
542 
543   bool isAISrcF16() const {
544     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
545   }
546 
547   bool isAISrcV2F16() const {
548     return isAISrcF16() || isAISrcB32();
549   }
550 
551   bool isAISrc_128B32() const {
552     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
553   }
554 
555   bool isAISrc_128B16() const {
556     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
557   }
558 
559   bool isAISrc_128V2B16() const {
560     return isAISrc_128B16();
561   }
562 
563   bool isAISrc_128F32() const {
564     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
565   }
566 
567   bool isAISrc_128F16() const {
568     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
569   }
570 
571   bool isAISrc_128V2F16() const {
572     return isAISrc_128F16() || isAISrc_128B32();
573   }
574 
575   bool isAISrc_512B32() const {
576     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
577   }
578 
579   bool isAISrc_512B16() const {
580     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
581   }
582 
583   bool isAISrc_512V2B16() const {
584     return isAISrc_512B16();
585   }
586 
587   bool isAISrc_512F32() const {
588     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
589   }
590 
591   bool isAISrc_512F16() const {
592     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
593   }
594 
595   bool isAISrc_512V2F16() const {
596     return isAISrc_512F16() || isAISrc_512B32();
597   }
598 
599   bool isAISrc_1024B32() const {
600     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
601   }
602 
603   bool isAISrc_1024B16() const {
604     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
605   }
606 
607   bool isAISrc_1024V2B16() const {
608     return isAISrc_1024B16();
609   }
610 
611   bool isAISrc_1024F32() const {
612     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
613   }
614 
615   bool isAISrc_1024F16() const {
616     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
617   }
618 
619   bool isAISrc_1024V2F16() const {
620     return isAISrc_1024F16() || isAISrc_1024B32();
621   }
622 
623   bool isKImmFP32() const {
624     return isLiteralImm(MVT::f32);
625   }
626 
627   bool isKImmFP16() const {
628     return isLiteralImm(MVT::f16);
629   }
630 
631   bool isMem() const override {
632     return false;
633   }
634 
635   bool isExpr() const {
636     return Kind == Expression;
637   }
638 
639   bool isSoppBrTarget() const {
640     return isExpr() || isImm();
641   }
642 
643   bool isSWaitCnt() const;
644   bool isHwreg() const;
645   bool isSendMsg() const;
646   bool isSwizzle() const;
647   bool isSMRDOffset8() const;
648   bool isSMRDOffset20() const;
649   bool isSMRDLiteralOffset() const;
650   bool isDPP8() const;
651   bool isDPPCtrl() const;
652   bool isBLGP() const;
653   bool isCBSZ() const;
654   bool isABID() const;
655   bool isGPRIdxMode() const;
656   bool isS16Imm() const;
657   bool isU16Imm() const;
658   bool isEndpgm() const;
659 
660   StringRef getExpressionAsToken() const {
661     assert(isExpr());
662     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
663     return S->getSymbol().getName();
664   }
665 
666   StringRef getToken() const {
667     assert(isToken());
668 
669     if (Kind == Expression)
670       return getExpressionAsToken();
671 
672     return StringRef(Tok.Data, Tok.Length);
673   }
674 
675   int64_t getImm() const {
676     assert(isImm());
677     return Imm.Val;
678   }
679 
680   ImmTy getImmTy() const {
681     assert(isImm());
682     return Imm.Type;
683   }
684 
685   unsigned getReg() const override {
686     assert(isRegKind());
687     return Reg.RegNo;
688   }
689 
690   SMLoc getStartLoc() const override {
691     return StartLoc;
692   }
693 
694   SMLoc getEndLoc() const override {
695     return EndLoc;
696   }
697 
698   SMRange getLocRange() const {
699     return SMRange(StartLoc, EndLoc);
700   }
701 
702   Modifiers getModifiers() const {
703     assert(isRegKind() || isImmTy(ImmTyNone));
704     return isRegKind() ? Reg.Mods : Imm.Mods;
705   }
706 
707   void setModifiers(Modifiers Mods) {
708     assert(isRegKind() || isImmTy(ImmTyNone));
709     if (isRegKind())
710       Reg.Mods = Mods;
711     else
712       Imm.Mods = Mods;
713   }
714 
715   bool hasModifiers() const {
716     return getModifiers().hasModifiers();
717   }
718 
719   bool hasFPModifiers() const {
720     return getModifiers().hasFPModifiers();
721   }
722 
723   bool hasIntModifiers() const {
724     return getModifiers().hasIntModifiers();
725   }
726 
727   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
728 
729   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
730 
731   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
732 
733   template <unsigned Bitwidth>
734   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
735 
736   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
737     addKImmFPOperands<16>(Inst, N);
738   }
739 
740   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
741     addKImmFPOperands<32>(Inst, N);
742   }
743 
744   void addRegOperands(MCInst &Inst, unsigned N) const;
745 
746   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
747     addRegOperands(Inst, N);
748   }
749 
750   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
751     if (isRegKind())
752       addRegOperands(Inst, N);
753     else if (isExpr())
754       Inst.addOperand(MCOperand::createExpr(Expr));
755     else
756       addImmOperands(Inst, N);
757   }
758 
759   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
760     Modifiers Mods = getModifiers();
761     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
762     if (isRegKind()) {
763       addRegOperands(Inst, N);
764     } else {
765       addImmOperands(Inst, N, false);
766     }
767   }
768 
769   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
770     assert(!hasIntModifiers());
771     addRegOrImmWithInputModsOperands(Inst, N);
772   }
773 
774   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
775     assert(!hasFPModifiers());
776     addRegOrImmWithInputModsOperands(Inst, N);
777   }
778 
779   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
780     Modifiers Mods = getModifiers();
781     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
782     assert(isRegKind());
783     addRegOperands(Inst, N);
784   }
785 
786   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
787     assert(!hasIntModifiers());
788     addRegWithInputModsOperands(Inst, N);
789   }
790 
791   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
792     assert(!hasFPModifiers());
793     addRegWithInputModsOperands(Inst, N);
794   }
795 
796   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
797     if (isImm())
798       addImmOperands(Inst, N);
799     else {
800       assert(isExpr());
801       Inst.addOperand(MCOperand::createExpr(Expr));
802     }
803   }
804 
805   static void printImmTy(raw_ostream& OS, ImmTy Type) {
806     switch (Type) {
807     case ImmTyNone: OS << "None"; break;
808     case ImmTyGDS: OS << "GDS"; break;
809     case ImmTyLDS: OS << "LDS"; break;
810     case ImmTyOffen: OS << "Offen"; break;
811     case ImmTyIdxen: OS << "Idxen"; break;
812     case ImmTyAddr64: OS << "Addr64"; break;
813     case ImmTyOffset: OS << "Offset"; break;
814     case ImmTyInstOffset: OS << "InstOffset"; break;
815     case ImmTyOffset0: OS << "Offset0"; break;
816     case ImmTyOffset1: OS << "Offset1"; break;
817     case ImmTyDLC: OS << "DLC"; break;
818     case ImmTyGLC: OS << "GLC"; break;
819     case ImmTySLC: OS << "SLC"; break;
820     case ImmTyTFE: OS << "TFE"; break;
821     case ImmTyD16: OS << "D16"; break;
822     case ImmTyFORMAT: OS << "FORMAT"; break;
823     case ImmTyClampSI: OS << "ClampSI"; break;
824     case ImmTyOModSI: OS << "OModSI"; break;
825     case ImmTyDPP8: OS << "DPP8"; break;
826     case ImmTyDppCtrl: OS << "DppCtrl"; break;
827     case ImmTyDppRowMask: OS << "DppRowMask"; break;
828     case ImmTyDppBankMask: OS << "DppBankMask"; break;
829     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
830     case ImmTyDppFi: OS << "FI"; break;
831     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
832     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
833     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
834     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
835     case ImmTyDMask: OS << "DMask"; break;
836     case ImmTyDim: OS << "Dim"; break;
837     case ImmTyUNorm: OS << "UNorm"; break;
838     case ImmTyDA: OS << "DA"; break;
839     case ImmTyR128A16: OS << "R128A16"; break;
840     case ImmTyLWE: OS << "LWE"; break;
841     case ImmTyOff: OS << "Off"; break;
842     case ImmTyExpTgt: OS << "ExpTgt"; break;
843     case ImmTyExpCompr: OS << "ExpCompr"; break;
844     case ImmTyExpVM: OS << "ExpVM"; break;
845     case ImmTyHwreg: OS << "Hwreg"; break;
846     case ImmTySendMsg: OS << "SendMsg"; break;
847     case ImmTyInterpSlot: OS << "InterpSlot"; break;
848     case ImmTyInterpAttr: OS << "InterpAttr"; break;
849     case ImmTyAttrChan: OS << "AttrChan"; break;
850     case ImmTyOpSel: OS << "OpSel"; break;
851     case ImmTyOpSelHi: OS << "OpSelHi"; break;
852     case ImmTyNegLo: OS << "NegLo"; break;
853     case ImmTyNegHi: OS << "NegHi"; break;
854     case ImmTySwizzle: OS << "Swizzle"; break;
855     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
856     case ImmTyHigh: OS << "High"; break;
857     case ImmTyBLGP: OS << "BLGP"; break;
858     case ImmTyCBSZ: OS << "CBSZ"; break;
859     case ImmTyABID: OS << "ABID"; break;
860     case ImmTyEndpgm: OS << "Endpgm"; break;
861     }
862   }
863 
864   void print(raw_ostream &OS) const override {
865     switch (Kind) {
866     case Register:
867       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
868       break;
869     case Immediate:
870       OS << '<' << getImm();
871       if (getImmTy() != ImmTyNone) {
872         OS << " type: "; printImmTy(OS, getImmTy());
873       }
874       OS << " mods: " << Imm.Mods << '>';
875       break;
876     case Token:
877       OS << '\'' << getToken() << '\'';
878       break;
879     case Expression:
880       OS << "<expr " << *Expr << '>';
881       break;
882     }
883   }
884 
885   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
886                                       int64_t Val, SMLoc Loc,
887                                       ImmTy Type = ImmTyNone,
888                                       bool IsFPImm = false) {
889     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
890     Op->Imm.Val = Val;
891     Op->Imm.IsFPImm = IsFPImm;
892     Op->Imm.Type = Type;
893     Op->Imm.Mods = Modifiers();
894     Op->StartLoc = Loc;
895     Op->EndLoc = Loc;
896     return Op;
897   }
898 
899   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
900                                         StringRef Str, SMLoc Loc,
901                                         bool HasExplicitEncodingSize = true) {
902     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
903     Res->Tok.Data = Str.data();
904     Res->Tok.Length = Str.size();
905     Res->StartLoc = Loc;
906     Res->EndLoc = Loc;
907     return Res;
908   }
909 
910   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
911                                       unsigned RegNo, SMLoc S,
912                                       SMLoc E) {
913     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
914     Op->Reg.RegNo = RegNo;
915     Op->Reg.Mods = Modifiers();
916     Op->StartLoc = S;
917     Op->EndLoc = E;
918     return Op;
919   }
920 
921   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
922                                        const class MCExpr *Expr, SMLoc S) {
923     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
924     Op->Expr = Expr;
925     Op->StartLoc = S;
926     Op->EndLoc = S;
927     return Op;
928   }
929 };
930 
931 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
932   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
933   return OS;
934 }
935 
936 //===----------------------------------------------------------------------===//
937 // AsmParser
938 //===----------------------------------------------------------------------===//
939 
940 // Holds info related to the current kernel, e.g. count of SGPRs used.
941 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
942 // .amdgpu_hsa_kernel or at EOF.
943 class KernelScopeInfo {
944   int SgprIndexUnusedMin = -1;
945   int VgprIndexUnusedMin = -1;
946   MCContext *Ctx = nullptr;
947 
948   void usesSgprAt(int i) {
949     if (i >= SgprIndexUnusedMin) {
950       SgprIndexUnusedMin = ++i;
951       if (Ctx) {
952         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
953         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
954       }
955     }
956   }
957 
958   void usesVgprAt(int i) {
959     if (i >= VgprIndexUnusedMin) {
960       VgprIndexUnusedMin = ++i;
961       if (Ctx) {
962         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
963         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
964       }
965     }
966   }
967 
968 public:
969   KernelScopeInfo() = default;
970 
971   void initialize(MCContext &Context) {
972     Ctx = &Context;
973     usesSgprAt(SgprIndexUnusedMin = -1);
974     usesVgprAt(VgprIndexUnusedMin = -1);
975   }
976 
977   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
978     switch (RegKind) {
979       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
980       case IS_AGPR: // fall through
981       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
982       default: break;
983     }
984   }
985 };
986 
987 class AMDGPUAsmParser : public MCTargetAsmParser {
988   MCAsmParser &Parser;
989 
990   // Number of extra operands parsed after the first optional operand.
991   // This may be necessary to skip hardcoded mandatory operands.
992   static const unsigned MAX_OPR_LOOKAHEAD = 8;
993 
994   unsigned ForcedEncodingSize = 0;
995   bool ForcedDPP = false;
996   bool ForcedSDWA = false;
997   KernelScopeInfo KernelScope;
998 
999   /// @name Auto-generated Match Functions
1000   /// {
1001 
1002 #define GET_ASSEMBLER_HEADER
1003 #include "AMDGPUGenAsmMatcher.inc"
1004 
1005   /// }
1006 
1007 private:
1008   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1009   bool OutOfRangeError(SMRange Range);
1010   /// Calculate VGPR/SGPR blocks required for given target, reserved
1011   /// registers, and user-specified NextFreeXGPR values.
1012   ///
1013   /// \param Features [in] Target features, used for bug corrections.
1014   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1015   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1016   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1017   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1018   /// descriptor field, if valid.
1019   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1020   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1021   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1022   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1023   /// \param VGPRBlocks [out] Result VGPR block count.
1024   /// \param SGPRBlocks [out] Result SGPR block count.
1025   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1026                           bool FlatScrUsed, bool XNACKUsed,
1027                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1028                           SMRange VGPRRange, unsigned NextFreeSGPR,
1029                           SMRange SGPRRange, unsigned &VGPRBlocks,
1030                           unsigned &SGPRBlocks);
1031   bool ParseDirectiveAMDGCNTarget();
1032   bool ParseDirectiveAMDHSAKernel();
1033   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1034   bool ParseDirectiveHSACodeObjectVersion();
1035   bool ParseDirectiveHSACodeObjectISA();
1036   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1037   bool ParseDirectiveAMDKernelCodeT();
1038   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1039   bool ParseDirectiveAMDGPUHsaKernel();
1040 
1041   bool ParseDirectiveISAVersion();
1042   bool ParseDirectiveHSAMetadata();
1043   bool ParseDirectivePALMetadataBegin();
1044   bool ParseDirectivePALMetadata();
1045   bool ParseDirectiveAMDGPULDS();
1046 
1047   /// Common code to parse out a block of text (typically YAML) between start and
1048   /// end directives.
1049   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1050                            const char *AssemblerDirectiveEnd,
1051                            std::string &CollectString);
1052 
1053   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1054                              RegisterKind RegKind, unsigned Reg1,
1055                              unsigned RegNum);
1056   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
1057                            unsigned& RegNum, unsigned& RegWidth,
1058                            unsigned *DwordRegIndex);
1059   bool isRegister();
1060   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1061   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1062   void initializeGprCountSymbol(RegisterKind RegKind);
1063   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1064                              unsigned RegWidth);
1065   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1066                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1067   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1068                  bool IsGdsHardcoded);
1069 
1070 public:
1071   enum AMDGPUMatchResultTy {
1072     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1073   };
1074   enum OperandMode {
1075     OperandMode_Default,
1076     OperandMode_NSA,
1077   };
1078 
1079   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1080 
1081   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1082                const MCInstrInfo &MII,
1083                const MCTargetOptions &Options)
1084       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1085     MCAsmParserExtension::Initialize(Parser);
1086 
1087     if (getFeatureBits().none()) {
1088       // Set default features.
1089       copySTI().ToggleFeature("southern-islands");
1090     }
1091 
1092     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1093 
1094     {
1095       // TODO: make those pre-defined variables read-only.
1096       // Currently there is none suitable machinery in the core llvm-mc for this.
1097       // MCSymbol::isRedefinable is intended for another purpose, and
1098       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1099       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1100       MCContext &Ctx = getContext();
1101       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1102         MCSymbol *Sym =
1103             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1104         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1105         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1106         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1107         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1108         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1109       } else {
1110         MCSymbol *Sym =
1111             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1112         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1113         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1114         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1115         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1116         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1117       }
1118       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1119         initializeGprCountSymbol(IS_VGPR);
1120         initializeGprCountSymbol(IS_SGPR);
1121       } else
1122         KernelScope.initialize(getContext());
1123     }
1124   }
1125 
1126   bool hasXNACK() const {
1127     return AMDGPU::hasXNACK(getSTI());
1128   }
1129 
1130   bool hasMIMG_R128() const {
1131     return AMDGPU::hasMIMG_R128(getSTI());
1132   }
1133 
1134   bool hasPackedD16() const {
1135     return AMDGPU::hasPackedD16(getSTI());
1136   }
1137 
1138   bool isSI() const {
1139     return AMDGPU::isSI(getSTI());
1140   }
1141 
1142   bool isCI() const {
1143     return AMDGPU::isCI(getSTI());
1144   }
1145 
1146   bool isVI() const {
1147     return AMDGPU::isVI(getSTI());
1148   }
1149 
1150   bool isGFX9() const {
1151     return AMDGPU::isGFX9(getSTI());
1152   }
1153 
1154   bool isGFX10() const {
1155     return AMDGPU::isGFX10(getSTI());
1156   }
1157 
1158   bool hasInv2PiInlineImm() const {
1159     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1160   }
1161 
1162   bool hasFlatOffsets() const {
1163     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1164   }
1165 
1166   bool hasSGPR102_SGPR103() const {
1167     return !isVI() && !isGFX9();
1168   }
1169 
1170   bool hasSGPR104_SGPR105() const {
1171     return isGFX10();
1172   }
1173 
1174   bool hasIntClamp() const {
1175     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1176   }
1177 
1178   AMDGPUTargetStreamer &getTargetStreamer() {
1179     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1180     return static_cast<AMDGPUTargetStreamer &>(TS);
1181   }
1182 
1183   const MCRegisterInfo *getMRI() const {
1184     // We need this const_cast because for some reason getContext() is not const
1185     // in MCAsmParser.
1186     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1187   }
1188 
1189   const MCInstrInfo *getMII() const {
1190     return &MII;
1191   }
1192 
1193   const FeatureBitset &getFeatureBits() const {
1194     return getSTI().getFeatureBits();
1195   }
1196 
1197   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1198   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1199   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1200 
1201   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1202   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1203   bool isForcedDPP() const { return ForcedDPP; }
1204   bool isForcedSDWA() const { return ForcedSDWA; }
1205   ArrayRef<unsigned> getMatchedVariants() const;
1206 
1207   std::unique_ptr<AMDGPUOperand> parseRegister();
1208   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1209   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1210   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1211                                       unsigned Kind) override;
1212   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1213                                OperandVector &Operands, MCStreamer &Out,
1214                                uint64_t &ErrorInfo,
1215                                bool MatchingInlineAsm) override;
1216   bool ParseDirective(AsmToken DirectiveID) override;
1217   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1218                                     OperandMode Mode = OperandMode_Default);
1219   StringRef parseMnemonicSuffix(StringRef Name);
1220   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1221                         SMLoc NameLoc, OperandVector &Operands) override;
1222   //bool ProcessInstruction(MCInst &Inst);
1223 
1224   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1225 
1226   OperandMatchResultTy
1227   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1228                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1229                      bool (*ConvertResult)(int64_t &) = nullptr);
1230 
1231   OperandMatchResultTy
1232   parseOperandArrayWithPrefix(const char *Prefix,
1233                               OperandVector &Operands,
1234                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1235                               bool (*ConvertResult)(int64_t&) = nullptr);
1236 
1237   OperandMatchResultTy
1238   parseNamedBit(const char *Name, OperandVector &Operands,
1239                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1240   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1241                                              StringRef &Value);
1242 
1243   bool isModifier();
1244   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1245   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1246   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1247   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1248   bool parseSP3NegModifier();
1249   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1250   OperandMatchResultTy parseReg(OperandVector &Operands);
1251   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1252   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1253   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1254   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1255   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1256   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1257   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1258 
1259   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1260   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1261   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1262   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1263 
1264   bool parseCnt(int64_t &IntVal);
1265   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1266   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1267 
1268 private:
1269   struct OperandInfoTy {
1270     int64_t Id;
1271     bool IsSymbolic = false;
1272     bool IsDefined = false;
1273 
1274     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1275   };
1276 
1277   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1278   bool validateSendMsg(const OperandInfoTy &Msg,
1279                        const OperandInfoTy &Op,
1280                        const OperandInfoTy &Stream,
1281                        const SMLoc Loc);
1282 
1283   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1284   bool validateHwreg(const OperandInfoTy &HwReg,
1285                      const int64_t Offset,
1286                      const int64_t Width,
1287                      const SMLoc Loc);
1288 
1289   void errorExpTgt();
1290   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1291   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1292 
1293   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1294   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1295   bool validateSOPLiteral(const MCInst &Inst) const;
1296   bool validateConstantBusLimitations(const MCInst &Inst);
1297   bool validateEarlyClobberLimitations(const MCInst &Inst);
1298   bool validateIntClampSupported(const MCInst &Inst);
1299   bool validateMIMGAtomicDMask(const MCInst &Inst);
1300   bool validateMIMGGatherDMask(const MCInst &Inst);
1301   bool validateMIMGDataSize(const MCInst &Inst);
1302   bool validateMIMGAddrSize(const MCInst &Inst);
1303   bool validateMIMGD16(const MCInst &Inst);
1304   bool validateMIMGDim(const MCInst &Inst);
1305   bool validateLdsDirect(const MCInst &Inst);
1306   bool validateOpSel(const MCInst &Inst);
1307   bool validateVccOperand(unsigned Reg) const;
1308   bool validateVOP3Literal(const MCInst &Inst) const;
1309   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1310   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1311   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1312 
1313   bool isId(const StringRef Id) const;
1314   bool isId(const AsmToken &Token, const StringRef Id) const;
1315   bool isToken(const AsmToken::TokenKind Kind) const;
1316   bool trySkipId(const StringRef Id);
1317   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1318   bool trySkipToken(const AsmToken::TokenKind Kind);
1319   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1320   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1321   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1322   AsmToken::TokenKind getTokenKind() const;
1323   bool parseExpr(int64_t &Imm);
1324   StringRef getTokenStr() const;
1325   AsmToken peekToken();
1326   AsmToken getToken() const;
1327   SMLoc getLoc() const;
1328   void lex();
1329 
1330 public:
1331   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1332   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1333 
1334   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1335   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1336   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1337   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1338   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1339   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1340 
1341   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1342                             const unsigned MinVal,
1343                             const unsigned MaxVal,
1344                             const StringRef ErrMsg);
1345   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1346   bool parseSwizzleOffset(int64_t &Imm);
1347   bool parseSwizzleMacro(int64_t &Imm);
1348   bool parseSwizzleQuadPerm(int64_t &Imm);
1349   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1350   bool parseSwizzleBroadcast(int64_t &Imm);
1351   bool parseSwizzleSwap(int64_t &Imm);
1352   bool parseSwizzleReverse(int64_t &Imm);
1353 
1354   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1355   int64_t parseGPRIdxMacro();
1356 
1357   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1358   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1359   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1360   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1361   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1362 
1363   AMDGPUOperand::Ptr defaultDLC() const;
1364   AMDGPUOperand::Ptr defaultGLC() const;
1365   AMDGPUOperand::Ptr defaultSLC() const;
1366 
1367   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1368   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1369   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1370   AMDGPUOperand::Ptr defaultFlatOffset() const;
1371 
1372   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1373 
1374   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1375                OptionalImmIndexMap &OptionalIdx);
1376   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1377   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1378   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1379 
1380   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1381 
1382   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1383                bool IsAtomic = false);
1384   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1385 
1386   OperandMatchResultTy parseDim(OperandVector &Operands);
1387   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1388   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1389   AMDGPUOperand::Ptr defaultRowMask() const;
1390   AMDGPUOperand::Ptr defaultBankMask() const;
1391   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1392   AMDGPUOperand::Ptr defaultFI() const;
1393   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1394   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1395 
1396   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1397                                     AMDGPUOperand::ImmTy Type);
1398   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1399   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1400   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1401   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1402   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1403   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1404                 uint64_t BasicInstType, bool skipVcc = false);
1405 
1406   AMDGPUOperand::Ptr defaultBLGP() const;
1407   AMDGPUOperand::Ptr defaultCBSZ() const;
1408   AMDGPUOperand::Ptr defaultABID() const;
1409 
1410   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1411   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1412 };
1413 
1414 struct OptionalOperand {
1415   const char *Name;
1416   AMDGPUOperand::ImmTy Type;
1417   bool IsBit;
1418   bool (*ConvertResult)(int64_t&);
1419 };
1420 
1421 } // end anonymous namespace
1422 
1423 // May be called with integer type with equivalent bitwidth.
1424 static const fltSemantics *getFltSemantics(unsigned Size) {
1425   switch (Size) {
1426   case 4:
1427     return &APFloat::IEEEsingle();
1428   case 8:
1429     return &APFloat::IEEEdouble();
1430   case 2:
1431     return &APFloat::IEEEhalf();
1432   default:
1433     llvm_unreachable("unsupported fp type");
1434   }
1435 }
1436 
1437 static const fltSemantics *getFltSemantics(MVT VT) {
1438   return getFltSemantics(VT.getSizeInBits() / 8);
1439 }
1440 
1441 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1442   switch (OperandType) {
1443   case AMDGPU::OPERAND_REG_IMM_INT32:
1444   case AMDGPU::OPERAND_REG_IMM_FP32:
1445   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1446   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1447   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1448   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1449     return &APFloat::IEEEsingle();
1450   case AMDGPU::OPERAND_REG_IMM_INT64:
1451   case AMDGPU::OPERAND_REG_IMM_FP64:
1452   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1453   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1454     return &APFloat::IEEEdouble();
1455   case AMDGPU::OPERAND_REG_IMM_INT16:
1456   case AMDGPU::OPERAND_REG_IMM_FP16:
1457   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1458   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1459   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1460   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1461   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1462   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1463   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1464   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1465   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1466   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1467     return &APFloat::IEEEhalf();
1468   default:
1469     llvm_unreachable("unsupported fp type");
1470   }
1471 }
1472 
1473 //===----------------------------------------------------------------------===//
1474 // Operand
1475 //===----------------------------------------------------------------------===//
1476 
1477 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1478   bool Lost;
1479 
1480   // Convert literal to single precision
1481   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1482                                                APFloat::rmNearestTiesToEven,
1483                                                &Lost);
1484   // We allow precision lost but not overflow or underflow
1485   if (Status != APFloat::opOK &&
1486       Lost &&
1487       ((Status & APFloat::opOverflow)  != 0 ||
1488        (Status & APFloat::opUnderflow) != 0)) {
1489     return false;
1490   }
1491 
1492   return true;
1493 }
1494 
1495 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1496   return isUIntN(Size, Val) || isIntN(Size, Val);
1497 }
1498 
1499 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1500 
1501   // This is a hack to enable named inline values like
1502   // shared_base with both 32-bit and 64-bit operands.
1503   // Note that these values are defined as
1504   // 32-bit operands only.
1505   if (isInlineValue()) {
1506     return true;
1507   }
1508 
1509   if (!isImmTy(ImmTyNone)) {
1510     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1511     return false;
1512   }
1513   // TODO: We should avoid using host float here. It would be better to
1514   // check the float bit values which is what a few other places do.
1515   // We've had bot failures before due to weird NaN support on mips hosts.
1516 
1517   APInt Literal(64, Imm.Val);
1518 
1519   if (Imm.IsFPImm) { // We got fp literal token
1520     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1521       return AMDGPU::isInlinableLiteral64(Imm.Val,
1522                                           AsmParser->hasInv2PiInlineImm());
1523     }
1524 
1525     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1526     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1527       return false;
1528 
1529     if (type.getScalarSizeInBits() == 16) {
1530       return AMDGPU::isInlinableLiteral16(
1531         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1532         AsmParser->hasInv2PiInlineImm());
1533     }
1534 
1535     // Check if single precision literal is inlinable
1536     return AMDGPU::isInlinableLiteral32(
1537       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1538       AsmParser->hasInv2PiInlineImm());
1539   }
1540 
1541   // We got int literal token.
1542   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1543     return AMDGPU::isInlinableLiteral64(Imm.Val,
1544                                         AsmParser->hasInv2PiInlineImm());
1545   }
1546 
1547   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1548     return false;
1549   }
1550 
1551   if (type.getScalarSizeInBits() == 16) {
1552     return AMDGPU::isInlinableLiteral16(
1553       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1554       AsmParser->hasInv2PiInlineImm());
1555   }
1556 
1557   return AMDGPU::isInlinableLiteral32(
1558     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1559     AsmParser->hasInv2PiInlineImm());
1560 }
1561 
1562 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1563   // Check that this immediate can be added as literal
1564   if (!isImmTy(ImmTyNone)) {
1565     return false;
1566   }
1567 
1568   if (!Imm.IsFPImm) {
1569     // We got int literal token.
1570 
1571     if (type == MVT::f64 && hasFPModifiers()) {
1572       // Cannot apply fp modifiers to int literals preserving the same semantics
1573       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1574       // disable these cases.
1575       return false;
1576     }
1577 
1578     unsigned Size = type.getSizeInBits();
1579     if (Size == 64)
1580       Size = 32;
1581 
1582     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1583     // types.
1584     return isSafeTruncation(Imm.Val, Size);
1585   }
1586 
1587   // We got fp literal token
1588   if (type == MVT::f64) { // Expected 64-bit fp operand
1589     // We would set low 64-bits of literal to zeroes but we accept this literals
1590     return true;
1591   }
1592 
1593   if (type == MVT::i64) { // Expected 64-bit int operand
1594     // We don't allow fp literals in 64-bit integer instructions. It is
1595     // unclear how we should encode them.
1596     return false;
1597   }
1598 
1599   // We allow fp literals with f16x2 operands assuming that the specified
1600   // literal goes into the lower half and the upper half is zero. We also
1601   // require that the literal may be losslesly converted to f16.
1602   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1603                      (type == MVT::v2i16)? MVT::i16 : type;
1604 
1605   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1606   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1607 }
1608 
1609 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1610   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1611 }
1612 
1613 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1614   if (AsmParser->isVI())
1615     return isVReg32();
1616   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1617     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1618   else
1619     return false;
1620 }
1621 
1622 bool AMDGPUOperand::isSDWAFP16Operand() const {
1623   return isSDWAOperand(MVT::f16);
1624 }
1625 
1626 bool AMDGPUOperand::isSDWAFP32Operand() const {
1627   return isSDWAOperand(MVT::f32);
1628 }
1629 
1630 bool AMDGPUOperand::isSDWAInt16Operand() const {
1631   return isSDWAOperand(MVT::i16);
1632 }
1633 
1634 bool AMDGPUOperand::isSDWAInt32Operand() const {
1635   return isSDWAOperand(MVT::i32);
1636 }
1637 
1638 bool AMDGPUOperand::isBoolReg() const {
1639   return AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
1640     isSCSrcB64() : isSCSrcB32();
1641 }
1642 
1643 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1644 {
1645   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1646   assert(Size == 2 || Size == 4 || Size == 8);
1647 
1648   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1649 
1650   if (Imm.Mods.Abs) {
1651     Val &= ~FpSignMask;
1652   }
1653   if (Imm.Mods.Neg) {
1654     Val ^= FpSignMask;
1655   }
1656 
1657   return Val;
1658 }
1659 
1660 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1661   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1662                              Inst.getNumOperands())) {
1663     addLiteralImmOperand(Inst, Imm.Val,
1664                          ApplyModifiers &
1665                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1666   } else {
1667     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1668     Inst.addOperand(MCOperand::createImm(Imm.Val));
1669   }
1670 }
1671 
1672 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1673   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1674   auto OpNum = Inst.getNumOperands();
1675   // Check that this operand accepts literals
1676   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1677 
1678   if (ApplyModifiers) {
1679     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1680     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1681     Val = applyInputFPModifiers(Val, Size);
1682   }
1683 
1684   APInt Literal(64, Val);
1685   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1686 
1687   if (Imm.IsFPImm) { // We got fp literal token
1688     switch (OpTy) {
1689     case AMDGPU::OPERAND_REG_IMM_INT64:
1690     case AMDGPU::OPERAND_REG_IMM_FP64:
1691     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1692     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1693       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1694                                        AsmParser->hasInv2PiInlineImm())) {
1695         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1696         return;
1697       }
1698 
1699       // Non-inlineable
1700       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1701         // For fp operands we check if low 32 bits are zeros
1702         if (Literal.getLoBits(32) != 0) {
1703           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1704           "Can't encode literal as exact 64-bit floating-point operand. "
1705           "Low 32-bits will be set to zero");
1706         }
1707 
1708         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1709         return;
1710       }
1711 
1712       // We don't allow fp literals in 64-bit integer instructions. It is
1713       // unclear how we should encode them. This case should be checked earlier
1714       // in predicate methods (isLiteralImm())
1715       llvm_unreachable("fp literal in 64-bit integer instruction.");
1716 
1717     case AMDGPU::OPERAND_REG_IMM_INT32:
1718     case AMDGPU::OPERAND_REG_IMM_FP32:
1719     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1720     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1721     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1722     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1723     case AMDGPU::OPERAND_REG_IMM_INT16:
1724     case AMDGPU::OPERAND_REG_IMM_FP16:
1725     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1726     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1727     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1728     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1729     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1730     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1731     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1732     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1733     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1734     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1735       bool lost;
1736       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1737       // Convert literal to single precision
1738       FPLiteral.convert(*getOpFltSemantics(OpTy),
1739                         APFloat::rmNearestTiesToEven, &lost);
1740       // We allow precision lost but not overflow or underflow. This should be
1741       // checked earlier in isLiteralImm()
1742 
1743       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1744       Inst.addOperand(MCOperand::createImm(ImmVal));
1745       return;
1746     }
1747     default:
1748       llvm_unreachable("invalid operand size");
1749     }
1750 
1751     return;
1752   }
1753 
1754   // We got int literal token.
1755   // Only sign extend inline immediates.
1756   switch (OpTy) {
1757   case AMDGPU::OPERAND_REG_IMM_INT32:
1758   case AMDGPU::OPERAND_REG_IMM_FP32:
1759   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1760   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1761   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1762   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1763   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1764   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1765     if (isSafeTruncation(Val, 32) &&
1766         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1767                                      AsmParser->hasInv2PiInlineImm())) {
1768       Inst.addOperand(MCOperand::createImm(Val));
1769       return;
1770     }
1771 
1772     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1773     return;
1774 
1775   case AMDGPU::OPERAND_REG_IMM_INT64:
1776   case AMDGPU::OPERAND_REG_IMM_FP64:
1777   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1778   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1779     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1780       Inst.addOperand(MCOperand::createImm(Val));
1781       return;
1782     }
1783 
1784     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1785     return;
1786 
1787   case AMDGPU::OPERAND_REG_IMM_INT16:
1788   case AMDGPU::OPERAND_REG_IMM_FP16:
1789   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1790   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1791   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1792   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1793     if (isSafeTruncation(Val, 16) &&
1794         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1795                                      AsmParser->hasInv2PiInlineImm())) {
1796       Inst.addOperand(MCOperand::createImm(Val));
1797       return;
1798     }
1799 
1800     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1801     return;
1802 
1803   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1804   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1805   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1806   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1807     assert(isSafeTruncation(Val, 16));
1808     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1809                                         AsmParser->hasInv2PiInlineImm()));
1810 
1811     Inst.addOperand(MCOperand::createImm(Val));
1812     return;
1813   }
1814   default:
1815     llvm_unreachable("invalid operand size");
1816   }
1817 }
1818 
1819 template <unsigned Bitwidth>
1820 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1821   APInt Literal(64, Imm.Val);
1822 
1823   if (!Imm.IsFPImm) {
1824     // We got int literal token.
1825     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1826     return;
1827   }
1828 
1829   bool Lost;
1830   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1831   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1832                     APFloat::rmNearestTiesToEven, &Lost);
1833   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1834 }
1835 
1836 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1837   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1838 }
1839 
1840 static bool isInlineValue(unsigned Reg) {
1841   switch (Reg) {
1842   case AMDGPU::SRC_SHARED_BASE:
1843   case AMDGPU::SRC_SHARED_LIMIT:
1844   case AMDGPU::SRC_PRIVATE_BASE:
1845   case AMDGPU::SRC_PRIVATE_LIMIT:
1846   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1847     return true;
1848   case AMDGPU::SRC_VCCZ:
1849   case AMDGPU::SRC_EXECZ:
1850   case AMDGPU::SRC_SCC:
1851     return true;
1852   default:
1853     return false;
1854   }
1855 }
1856 
1857 bool AMDGPUOperand::isInlineValue() const {
1858   return isRegKind() && ::isInlineValue(getReg());
1859 }
1860 
1861 //===----------------------------------------------------------------------===//
1862 // AsmParser
1863 //===----------------------------------------------------------------------===//
1864 
1865 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1866   if (Is == IS_VGPR) {
1867     switch (RegWidth) {
1868       default: return -1;
1869       case 1: return AMDGPU::VGPR_32RegClassID;
1870       case 2: return AMDGPU::VReg_64RegClassID;
1871       case 3: return AMDGPU::VReg_96RegClassID;
1872       case 4: return AMDGPU::VReg_128RegClassID;
1873       case 8: return AMDGPU::VReg_256RegClassID;
1874       case 16: return AMDGPU::VReg_512RegClassID;
1875     }
1876   } else if (Is == IS_TTMP) {
1877     switch (RegWidth) {
1878       default: return -1;
1879       case 1: return AMDGPU::TTMP_32RegClassID;
1880       case 2: return AMDGPU::TTMP_64RegClassID;
1881       case 4: return AMDGPU::TTMP_128RegClassID;
1882       case 8: return AMDGPU::TTMP_256RegClassID;
1883       case 16: return AMDGPU::TTMP_512RegClassID;
1884     }
1885   } else if (Is == IS_SGPR) {
1886     switch (RegWidth) {
1887       default: return -1;
1888       case 1: return AMDGPU::SGPR_32RegClassID;
1889       case 2: return AMDGPU::SGPR_64RegClassID;
1890       case 4: return AMDGPU::SGPR_128RegClassID;
1891       case 8: return AMDGPU::SGPR_256RegClassID;
1892       case 16: return AMDGPU::SGPR_512RegClassID;
1893     }
1894   } else if (Is == IS_AGPR) {
1895     switch (RegWidth) {
1896       default: return -1;
1897       case 1: return AMDGPU::AGPR_32RegClassID;
1898       case 2: return AMDGPU::AReg_64RegClassID;
1899       case 4: return AMDGPU::AReg_128RegClassID;
1900       case 16: return AMDGPU::AReg_512RegClassID;
1901       case 32: return AMDGPU::AReg_1024RegClassID;
1902     }
1903   }
1904   return -1;
1905 }
1906 
1907 static unsigned getSpecialRegForName(StringRef RegName) {
1908   return StringSwitch<unsigned>(RegName)
1909     .Case("exec", AMDGPU::EXEC)
1910     .Case("vcc", AMDGPU::VCC)
1911     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1912     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1913     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1914     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1915     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1916     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1917     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1918     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1919     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1920     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1921     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1922     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1923     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1924     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1925     .Case("m0", AMDGPU::M0)
1926     .Case("vccz", AMDGPU::SRC_VCCZ)
1927     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1928     .Case("execz", AMDGPU::SRC_EXECZ)
1929     .Case("src_execz", AMDGPU::SRC_EXECZ)
1930     .Case("scc", AMDGPU::SRC_SCC)
1931     .Case("src_scc", AMDGPU::SRC_SCC)
1932     .Case("tba", AMDGPU::TBA)
1933     .Case("tma", AMDGPU::TMA)
1934     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1935     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1936     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1937     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1938     .Case("vcc_lo", AMDGPU::VCC_LO)
1939     .Case("vcc_hi", AMDGPU::VCC_HI)
1940     .Case("exec_lo", AMDGPU::EXEC_LO)
1941     .Case("exec_hi", AMDGPU::EXEC_HI)
1942     .Case("tma_lo", AMDGPU::TMA_LO)
1943     .Case("tma_hi", AMDGPU::TMA_HI)
1944     .Case("tba_lo", AMDGPU::TBA_LO)
1945     .Case("tba_hi", AMDGPU::TBA_HI)
1946     .Case("null", AMDGPU::SGPR_NULL)
1947     .Default(0);
1948 }
1949 
1950 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1951                                     SMLoc &EndLoc) {
1952   auto R = parseRegister();
1953   if (!R) return true;
1954   assert(R->isReg());
1955   RegNo = R->getReg();
1956   StartLoc = R->getStartLoc();
1957   EndLoc = R->getEndLoc();
1958   return false;
1959 }
1960 
1961 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1962                                             RegisterKind RegKind, unsigned Reg1,
1963                                             unsigned RegNum) {
1964   switch (RegKind) {
1965   case IS_SPECIAL:
1966     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1967       Reg = AMDGPU::EXEC;
1968       RegWidth = 2;
1969       return true;
1970     }
1971     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1972       Reg = AMDGPU::FLAT_SCR;
1973       RegWidth = 2;
1974       return true;
1975     }
1976     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1977       Reg = AMDGPU::XNACK_MASK;
1978       RegWidth = 2;
1979       return true;
1980     }
1981     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1982       Reg = AMDGPU::VCC;
1983       RegWidth = 2;
1984       return true;
1985     }
1986     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1987       Reg = AMDGPU::TBA;
1988       RegWidth = 2;
1989       return true;
1990     }
1991     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1992       Reg = AMDGPU::TMA;
1993       RegWidth = 2;
1994       return true;
1995     }
1996     return false;
1997   case IS_VGPR:
1998   case IS_SGPR:
1999   case IS_AGPR:
2000   case IS_TTMP:
2001     if (Reg1 != Reg + RegWidth) {
2002       return false;
2003     }
2004     RegWidth++;
2005     return true;
2006   default:
2007     llvm_unreachable("unexpected register kind");
2008   }
2009 }
2010 
2011 static const StringRef Registers[] = {
2012   { "v" },
2013   { "s" },
2014   { "ttmp" },
2015   { "acc" },
2016   { "a" },
2017 };
2018 
2019 bool
2020 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2021                             const AsmToken &NextToken) const {
2022 
2023   // A list of consecutive registers: [s0,s1,s2,s3]
2024   if (Token.is(AsmToken::LBrac))
2025     return true;
2026 
2027   if (!Token.is(AsmToken::Identifier))
2028     return false;
2029 
2030   // A single register like s0 or a range of registers like s[0:1]
2031 
2032   StringRef RegName = Token.getString();
2033 
2034   for (StringRef Reg : Registers) {
2035     if (RegName.startswith(Reg)) {
2036       if (Reg.size() < RegName.size()) {
2037         unsigned RegNum;
2038         // A single register with an index: rXX
2039         if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
2040           return true;
2041       } else {
2042         // A range of registers: r[XX:YY].
2043         if (NextToken.is(AsmToken::LBrac))
2044           return true;
2045       }
2046     }
2047   }
2048 
2049   return getSpecialRegForName(RegName);
2050 }
2051 
2052 bool
2053 AMDGPUAsmParser::isRegister()
2054 {
2055   return isRegister(getToken(), peekToken());
2056 }
2057 
2058 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2059                                           unsigned &RegNum, unsigned &RegWidth,
2060                                           unsigned *DwordRegIndex) {
2061   if (DwordRegIndex) { *DwordRegIndex = 0; }
2062   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2063   if (getLexer().is(AsmToken::Identifier)) {
2064     StringRef RegName = Parser.getTok().getString();
2065     if ((Reg = getSpecialRegForName(RegName))) {
2066       Parser.Lex();
2067       RegKind = IS_SPECIAL;
2068     } else {
2069       unsigned RegNumIndex = 0;
2070       if (RegName[0] == 'v') {
2071         RegNumIndex = 1;
2072         RegKind = IS_VGPR;
2073       } else if (RegName[0] == 's') {
2074         RegNumIndex = 1;
2075         RegKind = IS_SGPR;
2076       } else if (RegName[0] == 'a') {
2077         RegNumIndex = RegName.startswith("acc") ? 3 : 1;
2078         RegKind = IS_AGPR;
2079       } else if (RegName.startswith("ttmp")) {
2080         RegNumIndex = strlen("ttmp");
2081         RegKind = IS_TTMP;
2082       } else {
2083         return false;
2084       }
2085       if (RegName.size() > RegNumIndex) {
2086         // Single 32-bit register: vXX.
2087         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
2088           return false;
2089         Parser.Lex();
2090         RegWidth = 1;
2091       } else {
2092         // Range of registers: v[XX:YY]. ":YY" is optional.
2093         Parser.Lex();
2094         int64_t RegLo, RegHi;
2095         if (getLexer().isNot(AsmToken::LBrac))
2096           return false;
2097         Parser.Lex();
2098 
2099         if (getParser().parseAbsoluteExpression(RegLo))
2100           return false;
2101 
2102         const bool isRBrace = getLexer().is(AsmToken::RBrac);
2103         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
2104           return false;
2105         Parser.Lex();
2106 
2107         if (isRBrace) {
2108           RegHi = RegLo;
2109         } else {
2110           if (getParser().parseAbsoluteExpression(RegHi))
2111             return false;
2112 
2113           if (getLexer().isNot(AsmToken::RBrac))
2114             return false;
2115           Parser.Lex();
2116         }
2117         RegNum = (unsigned) RegLo;
2118         RegWidth = (RegHi - RegLo) + 1;
2119       }
2120     }
2121   } else if (getLexer().is(AsmToken::LBrac)) {
2122     // List of consecutive registers: [s0,s1,s2,s3]
2123     Parser.Lex();
2124     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
2125       return false;
2126     if (RegWidth != 1)
2127       return false;
2128     RegisterKind RegKind1;
2129     unsigned Reg1, RegNum1, RegWidth1;
2130     do {
2131       if (getLexer().is(AsmToken::Comma)) {
2132         Parser.Lex();
2133       } else if (getLexer().is(AsmToken::RBrac)) {
2134         Parser.Lex();
2135         break;
2136       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
2137         if (RegWidth1 != 1) {
2138           return false;
2139         }
2140         if (RegKind1 != RegKind) {
2141           return false;
2142         }
2143         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
2144           return false;
2145         }
2146       } else {
2147         return false;
2148       }
2149     } while (true);
2150   } else {
2151     return false;
2152   }
2153   switch (RegKind) {
2154   case IS_SPECIAL:
2155     RegNum = 0;
2156     RegWidth = 1;
2157     break;
2158   case IS_VGPR:
2159   case IS_SGPR:
2160   case IS_AGPR:
2161   case IS_TTMP:
2162   {
2163     unsigned Size = 1;
2164     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2165       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
2166       Size = std::min(RegWidth, 4u);
2167     }
2168     if (RegNum % Size != 0)
2169       return false;
2170     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
2171     RegNum = RegNum / Size;
2172     int RCID = getRegClass(RegKind, RegWidth);
2173     if (RCID == -1)
2174       return false;
2175     const MCRegisterClass RC = TRI->getRegClass(RCID);
2176     if (RegNum >= RC.getNumRegs())
2177       return false;
2178     Reg = RC.getRegister(RegNum);
2179     break;
2180   }
2181 
2182   default:
2183     llvm_unreachable("unexpected register kind");
2184   }
2185 
2186   if (!subtargetHasRegister(*TRI, Reg))
2187     return false;
2188   return true;
2189 }
2190 
2191 Optional<StringRef>
2192 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2193   switch (RegKind) {
2194   case IS_VGPR:
2195     return StringRef(".amdgcn.next_free_vgpr");
2196   case IS_SGPR:
2197     return StringRef(".amdgcn.next_free_sgpr");
2198   default:
2199     return None;
2200   }
2201 }
2202 
2203 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2204   auto SymbolName = getGprCountSymbolName(RegKind);
2205   assert(SymbolName && "initializing invalid register kind");
2206   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2207   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2208 }
2209 
2210 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2211                                             unsigned DwordRegIndex,
2212                                             unsigned RegWidth) {
2213   // Symbols are only defined for GCN targets
2214   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2215     return true;
2216 
2217   auto SymbolName = getGprCountSymbolName(RegKind);
2218   if (!SymbolName)
2219     return true;
2220   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2221 
2222   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2223   int64_t OldCount;
2224 
2225   if (!Sym->isVariable())
2226     return !Error(getParser().getTok().getLoc(),
2227                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2228   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2229     return !Error(
2230         getParser().getTok().getLoc(),
2231         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2232 
2233   if (OldCount <= NewMax)
2234     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2235 
2236   return true;
2237 }
2238 
2239 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2240   const auto &Tok = Parser.getTok();
2241   SMLoc StartLoc = Tok.getLoc();
2242   SMLoc EndLoc = Tok.getEndLoc();
2243   RegisterKind RegKind;
2244   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2245 
2246   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2247     //FIXME: improve error messages (bug 41303).
2248     Error(StartLoc, "not a valid operand.");
2249     return nullptr;
2250   }
2251   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2252     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2253       return nullptr;
2254   } else
2255     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2256   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2257 }
2258 
2259 OperandMatchResultTy
2260 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2261   // TODO: add syntactic sugar for 1/(2*PI)
2262 
2263   assert(!isRegister());
2264   assert(!isModifier());
2265 
2266   const auto& Tok = getToken();
2267   const auto& NextTok = peekToken();
2268   bool IsReal = Tok.is(AsmToken::Real);
2269   SMLoc S = getLoc();
2270   bool Negate = false;
2271 
2272   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2273     lex();
2274     IsReal = true;
2275     Negate = true;
2276   }
2277 
2278   if (IsReal) {
2279     // Floating-point expressions are not supported.
2280     // Can only allow floating-point literals with an
2281     // optional sign.
2282 
2283     StringRef Num = getTokenStr();
2284     lex();
2285 
2286     APFloat RealVal(APFloat::IEEEdouble());
2287     auto roundMode = APFloat::rmNearestTiesToEven;
2288     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2289       return MatchOperand_ParseFail;
2290     }
2291     if (Negate)
2292       RealVal.changeSign();
2293 
2294     Operands.push_back(
2295       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2296                                AMDGPUOperand::ImmTyNone, true));
2297 
2298     return MatchOperand_Success;
2299 
2300   } else {
2301     int64_t IntVal;
2302     const MCExpr *Expr;
2303     SMLoc S = getLoc();
2304 
2305     if (HasSP3AbsModifier) {
2306       // This is a workaround for handling expressions
2307       // as arguments of SP3 'abs' modifier, for example:
2308       //     |1.0|
2309       //     |-1|
2310       //     |1+x|
2311       // This syntax is not compatible with syntax of standard
2312       // MC expressions (due to the trailing '|').
2313       SMLoc EndLoc;
2314       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2315         return MatchOperand_ParseFail;
2316     } else {
2317       if (Parser.parseExpression(Expr))
2318         return MatchOperand_ParseFail;
2319     }
2320 
2321     if (Expr->evaluateAsAbsolute(IntVal)) {
2322       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2323     } else {
2324       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2325     }
2326 
2327     return MatchOperand_Success;
2328   }
2329 
2330   return MatchOperand_NoMatch;
2331 }
2332 
2333 OperandMatchResultTy
2334 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2335   if (!isRegister())
2336     return MatchOperand_NoMatch;
2337 
2338   if (auto R = parseRegister()) {
2339     assert(R->isReg());
2340     Operands.push_back(std::move(R));
2341     return MatchOperand_Success;
2342   }
2343   return MatchOperand_ParseFail;
2344 }
2345 
2346 OperandMatchResultTy
2347 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2348   auto res = parseReg(Operands);
2349   if (res != MatchOperand_NoMatch) {
2350     return res;
2351   } else if (isModifier()) {
2352     return MatchOperand_NoMatch;
2353   } else {
2354     return parseImm(Operands, HasSP3AbsMod);
2355   }
2356 }
2357 
2358 bool
2359 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2360   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2361     const auto &str = Token.getString();
2362     return str == "abs" || str == "neg" || str == "sext";
2363   }
2364   return false;
2365 }
2366 
2367 bool
2368 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2369   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2370 }
2371 
2372 bool
2373 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2374   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2375 }
2376 
2377 bool
2378 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2379   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2380 }
2381 
2382 // Check if this is an operand modifier or an opcode modifier
2383 // which may look like an expression but it is not. We should
2384 // avoid parsing these modifiers as expressions. Currently
2385 // recognized sequences are:
2386 //   |...|
2387 //   abs(...)
2388 //   neg(...)
2389 //   sext(...)
2390 //   -reg
2391 //   -|...|
2392 //   -abs(...)
2393 //   name:...
2394 // Note that simple opcode modifiers like 'gds' may be parsed as
2395 // expressions; this is a special case. See getExpressionAsToken.
2396 //
2397 bool
2398 AMDGPUAsmParser::isModifier() {
2399 
2400   AsmToken Tok = getToken();
2401   AsmToken NextToken[2];
2402   peekTokens(NextToken);
2403 
2404   return isOperandModifier(Tok, NextToken[0]) ||
2405          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2406          isOpcodeModifierWithVal(Tok, NextToken[0]);
2407 }
2408 
2409 // Check if the current token is an SP3 'neg' modifier.
2410 // Currently this modifier is allowed in the following context:
2411 //
2412 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2413 // 2. Before an 'abs' modifier: -abs(...)
2414 // 3. Before an SP3 'abs' modifier: -|...|
2415 //
2416 // In all other cases "-" is handled as a part
2417 // of an expression that follows the sign.
2418 //
2419 // Note: When "-" is followed by an integer literal,
2420 // this is interpreted as integer negation rather
2421 // than a floating-point NEG modifier applied to N.
2422 // Beside being contr-intuitive, such use of floating-point
2423 // NEG modifier would have resulted in different meaning
2424 // of integer literals used with VOP1/2/C and VOP3,
2425 // for example:
2426 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2427 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2428 // Negative fp literals with preceding "-" are
2429 // handled likewise for unifomtity
2430 //
2431 bool
2432 AMDGPUAsmParser::parseSP3NegModifier() {
2433 
2434   AsmToken NextToken[2];
2435   peekTokens(NextToken);
2436 
2437   if (isToken(AsmToken::Minus) &&
2438       (isRegister(NextToken[0], NextToken[1]) ||
2439        NextToken[0].is(AsmToken::Pipe) ||
2440        isId(NextToken[0], "abs"))) {
2441     lex();
2442     return true;
2443   }
2444 
2445   return false;
2446 }
2447 
2448 OperandMatchResultTy
2449 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2450                                               bool AllowImm) {
2451   bool Neg, SP3Neg;
2452   bool Abs, SP3Abs;
2453   SMLoc Loc;
2454 
2455   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2456   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2457     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2458     return MatchOperand_ParseFail;
2459   }
2460 
2461   SP3Neg = parseSP3NegModifier();
2462 
2463   Loc = getLoc();
2464   Neg = trySkipId("neg");
2465   if (Neg && SP3Neg) {
2466     Error(Loc, "expected register or immediate");
2467     return MatchOperand_ParseFail;
2468   }
2469   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2470     return MatchOperand_ParseFail;
2471 
2472   Abs = trySkipId("abs");
2473   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2474     return MatchOperand_ParseFail;
2475 
2476   Loc = getLoc();
2477   SP3Abs = trySkipToken(AsmToken::Pipe);
2478   if (Abs && SP3Abs) {
2479     Error(Loc, "expected register or immediate");
2480     return MatchOperand_ParseFail;
2481   }
2482 
2483   OperandMatchResultTy Res;
2484   if (AllowImm) {
2485     Res = parseRegOrImm(Operands, SP3Abs);
2486   } else {
2487     Res = parseReg(Operands);
2488   }
2489   if (Res != MatchOperand_Success) {
2490     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2491   }
2492 
2493   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2494     return MatchOperand_ParseFail;
2495   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2496     return MatchOperand_ParseFail;
2497   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2498     return MatchOperand_ParseFail;
2499 
2500   AMDGPUOperand::Modifiers Mods;
2501   Mods.Abs = Abs || SP3Abs;
2502   Mods.Neg = Neg || SP3Neg;
2503 
2504   if (Mods.hasFPModifiers()) {
2505     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2506     if (Op.isExpr()) {
2507       Error(Op.getStartLoc(), "expected an absolute expression");
2508       return MatchOperand_ParseFail;
2509     }
2510     Op.setModifiers(Mods);
2511   }
2512   return MatchOperand_Success;
2513 }
2514 
2515 OperandMatchResultTy
2516 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2517                                                bool AllowImm) {
2518   bool Sext = trySkipId("sext");
2519   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2520     return MatchOperand_ParseFail;
2521 
2522   OperandMatchResultTy Res;
2523   if (AllowImm) {
2524     Res = parseRegOrImm(Operands);
2525   } else {
2526     Res = parseReg(Operands);
2527   }
2528   if (Res != MatchOperand_Success) {
2529     return Sext? MatchOperand_ParseFail : Res;
2530   }
2531 
2532   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2533     return MatchOperand_ParseFail;
2534 
2535   AMDGPUOperand::Modifiers Mods;
2536   Mods.Sext = Sext;
2537 
2538   if (Mods.hasIntModifiers()) {
2539     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2540     if (Op.isExpr()) {
2541       Error(Op.getStartLoc(), "expected an absolute expression");
2542       return MatchOperand_ParseFail;
2543     }
2544     Op.setModifiers(Mods);
2545   }
2546 
2547   return MatchOperand_Success;
2548 }
2549 
2550 OperandMatchResultTy
2551 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2552   return parseRegOrImmWithFPInputMods(Operands, false);
2553 }
2554 
2555 OperandMatchResultTy
2556 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2557   return parseRegOrImmWithIntInputMods(Operands, false);
2558 }
2559 
2560 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2561   auto Loc = getLoc();
2562   if (trySkipId("off")) {
2563     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2564                                                 AMDGPUOperand::ImmTyOff, false));
2565     return MatchOperand_Success;
2566   }
2567 
2568   if (!isRegister())
2569     return MatchOperand_NoMatch;
2570 
2571   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2572   if (Reg) {
2573     Operands.push_back(std::move(Reg));
2574     return MatchOperand_Success;
2575   }
2576 
2577   return MatchOperand_ParseFail;
2578 
2579 }
2580 
2581 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2582   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2583 
2584   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2585       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2586       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2587       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2588     return Match_InvalidOperand;
2589 
2590   if ((TSFlags & SIInstrFlags::VOP3) &&
2591       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2592       getForcedEncodingSize() != 64)
2593     return Match_PreferE32;
2594 
2595   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2596       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2597     // v_mac_f32/16 allow only dst_sel == DWORD;
2598     auto OpNum =
2599         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2600     const auto &Op = Inst.getOperand(OpNum);
2601     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2602       return Match_InvalidOperand;
2603     }
2604   }
2605 
2606   return Match_Success;
2607 }
2608 
2609 // What asm variants we should check
2610 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2611   if (getForcedEncodingSize() == 32) {
2612     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2613     return makeArrayRef(Variants);
2614   }
2615 
2616   if (isForcedVOP3()) {
2617     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2618     return makeArrayRef(Variants);
2619   }
2620 
2621   if (isForcedSDWA()) {
2622     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2623                                         AMDGPUAsmVariants::SDWA9};
2624     return makeArrayRef(Variants);
2625   }
2626 
2627   if (isForcedDPP()) {
2628     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2629     return makeArrayRef(Variants);
2630   }
2631 
2632   static const unsigned Variants[] = {
2633     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2634     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2635   };
2636 
2637   return makeArrayRef(Variants);
2638 }
2639 
2640 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2641   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2642   const unsigned Num = Desc.getNumImplicitUses();
2643   for (unsigned i = 0; i < Num; ++i) {
2644     unsigned Reg = Desc.ImplicitUses[i];
2645     switch (Reg) {
2646     case AMDGPU::FLAT_SCR:
2647     case AMDGPU::VCC:
2648     case AMDGPU::VCC_LO:
2649     case AMDGPU::VCC_HI:
2650     case AMDGPU::M0:
2651     case AMDGPU::SGPR_NULL:
2652       return Reg;
2653     default:
2654       break;
2655     }
2656   }
2657   return AMDGPU::NoRegister;
2658 }
2659 
2660 // NB: This code is correct only when used to check constant
2661 // bus limitations because GFX7 support no f16 inline constants.
2662 // Note that there are no cases when a GFX7 opcode violates
2663 // constant bus limitations due to the use of an f16 constant.
2664 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2665                                        unsigned OpIdx) const {
2666   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2667 
2668   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2669     return false;
2670   }
2671 
2672   const MCOperand &MO = Inst.getOperand(OpIdx);
2673 
2674   int64_t Val = MO.getImm();
2675   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2676 
2677   switch (OpSize) { // expected operand size
2678   case 8:
2679     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2680   case 4:
2681     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2682   case 2: {
2683     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2684     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2685         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2686         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2687         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2688         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2689         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2690       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2691     } else {
2692       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2693     }
2694   }
2695   default:
2696     llvm_unreachable("invalid operand size");
2697   }
2698 }
2699 
2700 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2701   const MCOperand &MO = Inst.getOperand(OpIdx);
2702   if (MO.isImm()) {
2703     return !isInlineConstant(Inst, OpIdx);
2704   }
2705   return !MO.isReg() ||
2706          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2707 }
2708 
2709 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2710   const unsigned Opcode = Inst.getOpcode();
2711   const MCInstrDesc &Desc = MII.get(Opcode);
2712   unsigned ConstantBusUseCount = 0;
2713   unsigned NumLiterals = 0;
2714   unsigned LiteralSize;
2715 
2716   if (Desc.TSFlags &
2717       (SIInstrFlags::VOPC |
2718        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2719        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2720        SIInstrFlags::SDWA)) {
2721     // Check special imm operands (used by madmk, etc)
2722     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2723       ++ConstantBusUseCount;
2724     }
2725 
2726     SmallDenseSet<unsigned> SGPRsUsed;
2727     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2728     if (SGPRUsed != AMDGPU::NoRegister) {
2729       SGPRsUsed.insert(SGPRUsed);
2730       ++ConstantBusUseCount;
2731     }
2732 
2733     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2734     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2735     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2736 
2737     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2738 
2739     for (int OpIdx : OpIndices) {
2740       if (OpIdx == -1) break;
2741 
2742       const MCOperand &MO = Inst.getOperand(OpIdx);
2743       if (usesConstantBus(Inst, OpIdx)) {
2744         if (MO.isReg()) {
2745           const unsigned Reg = mc2PseudoReg(MO.getReg());
2746           // Pairs of registers with a partial intersections like these
2747           //   s0, s[0:1]
2748           //   flat_scratch_lo, flat_scratch
2749           //   flat_scratch_lo, flat_scratch_hi
2750           // are theoretically valid but they are disabled anyway.
2751           // Note that this code mimics SIInstrInfo::verifyInstruction
2752           if (!SGPRsUsed.count(Reg)) {
2753             SGPRsUsed.insert(Reg);
2754             ++ConstantBusUseCount;
2755           }
2756           SGPRUsed = Reg;
2757         } else { // Expression or a literal
2758 
2759           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2760             continue; // special operand like VINTERP attr_chan
2761 
2762           // An instruction may use only one literal.
2763           // This has been validated on the previous step.
2764           // See validateVOP3Literal.
2765           // This literal may be used as more than one operand.
2766           // If all these operands are of the same size,
2767           // this literal counts as one scalar value.
2768           // Otherwise it counts as 2 scalar values.
2769           // See "GFX10 Shader Programming", section 3.6.2.3.
2770 
2771           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2772           if (Size < 4) Size = 4;
2773 
2774           if (NumLiterals == 0) {
2775             NumLiterals = 1;
2776             LiteralSize = Size;
2777           } else if (LiteralSize != Size) {
2778             NumLiterals = 2;
2779           }
2780         }
2781       }
2782     }
2783   }
2784   ConstantBusUseCount += NumLiterals;
2785 
2786   if (isGFX10())
2787     return ConstantBusUseCount <= 2;
2788 
2789   return ConstantBusUseCount <= 1;
2790 }
2791 
2792 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2793   const unsigned Opcode = Inst.getOpcode();
2794   const MCInstrDesc &Desc = MII.get(Opcode);
2795 
2796   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2797   if (DstIdx == -1 ||
2798       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2799     return true;
2800   }
2801 
2802   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2803 
2804   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2805   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2806   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2807 
2808   assert(DstIdx != -1);
2809   const MCOperand &Dst = Inst.getOperand(DstIdx);
2810   assert(Dst.isReg());
2811   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2812 
2813   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2814 
2815   for (int SrcIdx : SrcIndices) {
2816     if (SrcIdx == -1) break;
2817     const MCOperand &Src = Inst.getOperand(SrcIdx);
2818     if (Src.isReg()) {
2819       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2820       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2821         return false;
2822       }
2823     }
2824   }
2825 
2826   return true;
2827 }
2828 
2829 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2830 
2831   const unsigned Opc = Inst.getOpcode();
2832   const MCInstrDesc &Desc = MII.get(Opc);
2833 
2834   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2835     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2836     assert(ClampIdx != -1);
2837     return Inst.getOperand(ClampIdx).getImm() == 0;
2838   }
2839 
2840   return true;
2841 }
2842 
2843 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2844 
2845   const unsigned Opc = Inst.getOpcode();
2846   const MCInstrDesc &Desc = MII.get(Opc);
2847 
2848   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2849     return true;
2850 
2851   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2852   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2853   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2854 
2855   assert(VDataIdx != -1);
2856   assert(DMaskIdx != -1);
2857   assert(TFEIdx != -1);
2858 
2859   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2860   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2861   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2862   if (DMask == 0)
2863     DMask = 1;
2864 
2865   unsigned DataSize =
2866     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2867   if (hasPackedD16()) {
2868     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2869     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2870       DataSize = (DataSize + 1) / 2;
2871   }
2872 
2873   return (VDataSize / 4) == DataSize + TFESize;
2874 }
2875 
2876 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2877   const unsigned Opc = Inst.getOpcode();
2878   const MCInstrDesc &Desc = MII.get(Opc);
2879 
2880   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2881     return true;
2882 
2883   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2884   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2885       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2886   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2887   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2888   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2889 
2890   assert(VAddr0Idx != -1);
2891   assert(SrsrcIdx != -1);
2892   assert(DimIdx != -1);
2893   assert(SrsrcIdx > VAddr0Idx);
2894 
2895   unsigned Dim = Inst.getOperand(DimIdx).getImm();
2896   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2897   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2898   unsigned VAddrSize =
2899       IsNSA ? SrsrcIdx - VAddr0Idx
2900             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2901 
2902   unsigned AddrSize = BaseOpcode->NumExtraArgs +
2903                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2904                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2905                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2906   if (!IsNSA) {
2907     if (AddrSize > 8)
2908       AddrSize = 16;
2909     else if (AddrSize > 4)
2910       AddrSize = 8;
2911   }
2912 
2913   return VAddrSize == AddrSize;
2914 }
2915 
2916 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2917 
2918   const unsigned Opc = Inst.getOpcode();
2919   const MCInstrDesc &Desc = MII.get(Opc);
2920 
2921   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2922     return true;
2923   if (!Desc.mayLoad() || !Desc.mayStore())
2924     return true; // Not atomic
2925 
2926   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2927   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2928 
2929   // This is an incomplete check because image_atomic_cmpswap
2930   // may only use 0x3 and 0xf while other atomic operations
2931   // may use 0x1 and 0x3. However these limitations are
2932   // verified when we check that dmask matches dst size.
2933   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2934 }
2935 
2936 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2937 
2938   const unsigned Opc = Inst.getOpcode();
2939   const MCInstrDesc &Desc = MII.get(Opc);
2940 
2941   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2942     return true;
2943 
2944   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2945   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2946 
2947   // GATHER4 instructions use dmask in a different fashion compared to
2948   // other MIMG instructions. The only useful DMASK values are
2949   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2950   // (red,red,red,red) etc.) The ISA document doesn't mention
2951   // this.
2952   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2953 }
2954 
2955 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2956 
2957   const unsigned Opc = Inst.getOpcode();
2958   const MCInstrDesc &Desc = MII.get(Opc);
2959 
2960   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2961     return true;
2962 
2963   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2964   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2965     if (isCI() || isSI())
2966       return false;
2967   }
2968 
2969   return true;
2970 }
2971 
2972 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
2973   const unsigned Opc = Inst.getOpcode();
2974   const MCInstrDesc &Desc = MII.get(Opc);
2975 
2976   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2977     return true;
2978 
2979   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2980   if (DimIdx < 0)
2981     return true;
2982 
2983   long Imm = Inst.getOperand(DimIdx).getImm();
2984   if (Imm < 0 || Imm >= 8)
2985     return false;
2986 
2987   return true;
2988 }
2989 
2990 static bool IsRevOpcode(const unsigned Opcode)
2991 {
2992   switch (Opcode) {
2993   case AMDGPU::V_SUBREV_F32_e32:
2994   case AMDGPU::V_SUBREV_F32_e64:
2995   case AMDGPU::V_SUBREV_F32_e32_gfx10:
2996   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
2997   case AMDGPU::V_SUBREV_F32_e32_vi:
2998   case AMDGPU::V_SUBREV_F32_e64_gfx10:
2999   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3000   case AMDGPU::V_SUBREV_F32_e64_vi:
3001 
3002   case AMDGPU::V_SUBREV_I32_e32:
3003   case AMDGPU::V_SUBREV_I32_e64:
3004   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3005   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3006 
3007   case AMDGPU::V_SUBBREV_U32_e32:
3008   case AMDGPU::V_SUBBREV_U32_e64:
3009   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3010   case AMDGPU::V_SUBBREV_U32_e32_vi:
3011   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3012   case AMDGPU::V_SUBBREV_U32_e64_vi:
3013 
3014   case AMDGPU::V_SUBREV_U32_e32:
3015   case AMDGPU::V_SUBREV_U32_e64:
3016   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3017   case AMDGPU::V_SUBREV_U32_e32_vi:
3018   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3019   case AMDGPU::V_SUBREV_U32_e64_vi:
3020 
3021   case AMDGPU::V_SUBREV_F16_e32:
3022   case AMDGPU::V_SUBREV_F16_e64:
3023   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3024   case AMDGPU::V_SUBREV_F16_e32_vi:
3025   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3026   case AMDGPU::V_SUBREV_F16_e64_vi:
3027 
3028   case AMDGPU::V_SUBREV_U16_e32:
3029   case AMDGPU::V_SUBREV_U16_e64:
3030   case AMDGPU::V_SUBREV_U16_e32_vi:
3031   case AMDGPU::V_SUBREV_U16_e64_vi:
3032 
3033   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3034   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3035   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3036 
3037   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3038   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3039 
3040   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3041   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3042 
3043   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3044   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3045 
3046   case AMDGPU::V_LSHRREV_B32_e32:
3047   case AMDGPU::V_LSHRREV_B32_e64:
3048   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3049   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3050   case AMDGPU::V_LSHRREV_B32_e32_vi:
3051   case AMDGPU::V_LSHRREV_B32_e64_vi:
3052   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3053   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3054 
3055   case AMDGPU::V_ASHRREV_I32_e32:
3056   case AMDGPU::V_ASHRREV_I32_e64:
3057   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3058   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3059   case AMDGPU::V_ASHRREV_I32_e32_vi:
3060   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3061   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3062   case AMDGPU::V_ASHRREV_I32_e64_vi:
3063 
3064   case AMDGPU::V_LSHLREV_B32_e32:
3065   case AMDGPU::V_LSHLREV_B32_e64:
3066   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3067   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3068   case AMDGPU::V_LSHLREV_B32_e32_vi:
3069   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3070   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3071   case AMDGPU::V_LSHLREV_B32_e64_vi:
3072 
3073   case AMDGPU::V_LSHLREV_B16_e32:
3074   case AMDGPU::V_LSHLREV_B16_e64:
3075   case AMDGPU::V_LSHLREV_B16_e32_vi:
3076   case AMDGPU::V_LSHLREV_B16_e64_vi:
3077   case AMDGPU::V_LSHLREV_B16_gfx10:
3078 
3079   case AMDGPU::V_LSHRREV_B16_e32:
3080   case AMDGPU::V_LSHRREV_B16_e64:
3081   case AMDGPU::V_LSHRREV_B16_e32_vi:
3082   case AMDGPU::V_LSHRREV_B16_e64_vi:
3083   case AMDGPU::V_LSHRREV_B16_gfx10:
3084 
3085   case AMDGPU::V_ASHRREV_I16_e32:
3086   case AMDGPU::V_ASHRREV_I16_e64:
3087   case AMDGPU::V_ASHRREV_I16_e32_vi:
3088   case AMDGPU::V_ASHRREV_I16_e64_vi:
3089   case AMDGPU::V_ASHRREV_I16_gfx10:
3090 
3091   case AMDGPU::V_LSHLREV_B64:
3092   case AMDGPU::V_LSHLREV_B64_gfx10:
3093   case AMDGPU::V_LSHLREV_B64_vi:
3094 
3095   case AMDGPU::V_LSHRREV_B64:
3096   case AMDGPU::V_LSHRREV_B64_gfx10:
3097   case AMDGPU::V_LSHRREV_B64_vi:
3098 
3099   case AMDGPU::V_ASHRREV_I64:
3100   case AMDGPU::V_ASHRREV_I64_gfx10:
3101   case AMDGPU::V_ASHRREV_I64_vi:
3102 
3103   case AMDGPU::V_PK_LSHLREV_B16:
3104   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3105   case AMDGPU::V_PK_LSHLREV_B16_vi:
3106 
3107   case AMDGPU::V_PK_LSHRREV_B16:
3108   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3109   case AMDGPU::V_PK_LSHRREV_B16_vi:
3110   case AMDGPU::V_PK_ASHRREV_I16:
3111   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3112   case AMDGPU::V_PK_ASHRREV_I16_vi:
3113     return true;
3114   default:
3115     return false;
3116   }
3117 }
3118 
3119 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3120 
3121   using namespace SIInstrFlags;
3122   const unsigned Opcode = Inst.getOpcode();
3123   const MCInstrDesc &Desc = MII.get(Opcode);
3124 
3125   // lds_direct register is defined so that it can be used
3126   // with 9-bit operands only. Ignore encodings which do not accept these.
3127   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3128     return true;
3129 
3130   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3131   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3132   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3133 
3134   const int SrcIndices[] = { Src1Idx, Src2Idx };
3135 
3136   // lds_direct cannot be specified as either src1 or src2.
3137   for (int SrcIdx : SrcIndices) {
3138     if (SrcIdx == -1) break;
3139     const MCOperand &Src = Inst.getOperand(SrcIdx);
3140     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3141       return false;
3142     }
3143   }
3144 
3145   if (Src0Idx == -1)
3146     return true;
3147 
3148   const MCOperand &Src = Inst.getOperand(Src0Idx);
3149   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3150     return true;
3151 
3152   // lds_direct is specified as src0. Check additional limitations.
3153   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3154 }
3155 
3156 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3157   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3158     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3159     if (Op.isFlatOffset())
3160       return Op.getStartLoc();
3161   }
3162   return getLoc();
3163 }
3164 
3165 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3166                                          const OperandVector &Operands) {
3167   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3168   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3169     return true;
3170 
3171   auto Opcode = Inst.getOpcode();
3172   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3173   assert(OpNum != -1);
3174 
3175   const auto &Op = Inst.getOperand(OpNum);
3176   if (!hasFlatOffsets() && Op.getImm() != 0) {
3177     Error(getFlatOffsetLoc(Operands),
3178           "flat offset modifier is not supported on this GPU");
3179     return false;
3180   }
3181 
3182   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3183   // For FLAT segment the offset must be positive;
3184   // MSB is ignored and forced to zero.
3185   unsigned OffsetSize = isGFX9() ? 13 : 12;
3186   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3187     if (!isIntN(OffsetSize, Op.getImm())) {
3188       Error(getFlatOffsetLoc(Operands),
3189             isGFX9() ? "expected a 13-bit signed offset" :
3190                        "expected a 12-bit signed offset");
3191       return false;
3192     }
3193   } else {
3194     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3195       Error(getFlatOffsetLoc(Operands),
3196             isGFX9() ? "expected a 12-bit unsigned offset" :
3197                        "expected an 11-bit unsigned offset");
3198       return false;
3199     }
3200   }
3201 
3202   return true;
3203 }
3204 
3205 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3206   unsigned Opcode = Inst.getOpcode();
3207   const MCInstrDesc &Desc = MII.get(Opcode);
3208   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3209     return true;
3210 
3211   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3212   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3213 
3214   const int OpIndices[] = { Src0Idx, Src1Idx };
3215 
3216   unsigned NumLiterals = 0;
3217   uint32_t LiteralValue;
3218 
3219   for (int OpIdx : OpIndices) {
3220     if (OpIdx == -1) break;
3221 
3222     const MCOperand &MO = Inst.getOperand(OpIdx);
3223     if (MO.isImm() &&
3224         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3225         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
3226         !isInlineConstant(Inst, OpIdx)) {
3227       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3228       if (NumLiterals == 0 || LiteralValue != Value) {
3229         LiteralValue = Value;
3230         ++NumLiterals;
3231       }
3232     }
3233   }
3234 
3235   return NumLiterals <= 1;
3236 }
3237 
3238 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3239   const unsigned Opc = Inst.getOpcode();
3240   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3241       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3242     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3243     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3244 
3245     if (OpSel & ~3)
3246       return false;
3247   }
3248   return true;
3249 }
3250 
3251 // Check if VCC register matches wavefront size
3252 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3253   auto FB = getFeatureBits();
3254   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3255     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3256 }
3257 
3258 // VOP3 literal is only allowed in GFX10+ and only one can be used
3259 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3260   unsigned Opcode = Inst.getOpcode();
3261   const MCInstrDesc &Desc = MII.get(Opcode);
3262   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3263     return true;
3264 
3265   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3266   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3267   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3268 
3269   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3270 
3271   unsigned NumLiterals = 0;
3272   uint32_t LiteralValue;
3273 
3274   for (int OpIdx : OpIndices) {
3275     if (OpIdx == -1) break;
3276 
3277     const MCOperand &MO = Inst.getOperand(OpIdx);
3278     if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
3279       continue;
3280 
3281     if (!isInlineConstant(Inst, OpIdx)) {
3282       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3283       if (NumLiterals == 0 || LiteralValue != Value) {
3284         LiteralValue = Value;
3285         ++NumLiterals;
3286       }
3287     }
3288   }
3289 
3290   return !NumLiterals ||
3291          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3292 }
3293 
3294 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3295                                           const SMLoc &IDLoc,
3296                                           const OperandVector &Operands) {
3297   if (!validateLdsDirect(Inst)) {
3298     Error(IDLoc,
3299       "invalid use of lds_direct");
3300     return false;
3301   }
3302   if (!validateSOPLiteral(Inst)) {
3303     Error(IDLoc,
3304       "only one literal operand is allowed");
3305     return false;
3306   }
3307   if (!validateVOP3Literal(Inst)) {
3308     Error(IDLoc,
3309       "invalid literal operand");
3310     return false;
3311   }
3312   if (!validateConstantBusLimitations(Inst)) {
3313     Error(IDLoc,
3314       "invalid operand (violates constant bus restrictions)");
3315     return false;
3316   }
3317   if (!validateEarlyClobberLimitations(Inst)) {
3318     Error(IDLoc,
3319       "destination must be different than all sources");
3320     return false;
3321   }
3322   if (!validateIntClampSupported(Inst)) {
3323     Error(IDLoc,
3324       "integer clamping is not supported on this GPU");
3325     return false;
3326   }
3327   if (!validateOpSel(Inst)) {
3328     Error(IDLoc,
3329       "invalid op_sel operand");
3330     return false;
3331   }
3332   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3333   if (!validateMIMGD16(Inst)) {
3334     Error(IDLoc,
3335       "d16 modifier is not supported on this GPU");
3336     return false;
3337   }
3338   if (!validateMIMGDim(Inst)) {
3339     Error(IDLoc, "dim modifier is required on this GPU");
3340     return false;
3341   }
3342   if (!validateMIMGDataSize(Inst)) {
3343     Error(IDLoc,
3344       "image data size does not match dmask and tfe");
3345     return false;
3346   }
3347   if (!validateMIMGAddrSize(Inst)) {
3348     Error(IDLoc,
3349       "image address size does not match dim and a16");
3350     return false;
3351   }
3352   if (!validateMIMGAtomicDMask(Inst)) {
3353     Error(IDLoc,
3354       "invalid atomic image dmask");
3355     return false;
3356   }
3357   if (!validateMIMGGatherDMask(Inst)) {
3358     Error(IDLoc,
3359       "invalid image_gather dmask: only one bit must be set");
3360     return false;
3361   }
3362   if (!validateFlatOffset(Inst, Operands)) {
3363     return false;
3364   }
3365 
3366   return true;
3367 }
3368 
3369 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3370                                             const FeatureBitset &FBS,
3371                                             unsigned VariantID = 0);
3372 
3373 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3374                                               OperandVector &Operands,
3375                                               MCStreamer &Out,
3376                                               uint64_t &ErrorInfo,
3377                                               bool MatchingInlineAsm) {
3378   MCInst Inst;
3379   unsigned Result = Match_Success;
3380   for (auto Variant : getMatchedVariants()) {
3381     uint64_t EI;
3382     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3383                                   Variant);
3384     // We order match statuses from least to most specific. We use most specific
3385     // status as resulting
3386     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3387     if ((R == Match_Success) ||
3388         (R == Match_PreferE32) ||
3389         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3390         (R == Match_InvalidOperand && Result != Match_MissingFeature
3391                                    && Result != Match_PreferE32) ||
3392         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3393                                    && Result != Match_MissingFeature
3394                                    && Result != Match_PreferE32)) {
3395       Result = R;
3396       ErrorInfo = EI;
3397     }
3398     if (R == Match_Success)
3399       break;
3400   }
3401 
3402   switch (Result) {
3403   default: break;
3404   case Match_Success:
3405     if (!validateInstruction(Inst, IDLoc, Operands)) {
3406       return true;
3407     }
3408     Inst.setLoc(IDLoc);
3409     Out.EmitInstruction(Inst, getSTI());
3410     return false;
3411 
3412   case Match_MissingFeature:
3413     return Error(IDLoc, "instruction not supported on this GPU");
3414 
3415   case Match_MnemonicFail: {
3416     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3417     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3418         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3419     return Error(IDLoc, "invalid instruction" + Suggestion,
3420                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3421   }
3422 
3423   case Match_InvalidOperand: {
3424     SMLoc ErrorLoc = IDLoc;
3425     if (ErrorInfo != ~0ULL) {
3426       if (ErrorInfo >= Operands.size()) {
3427         return Error(IDLoc, "too few operands for instruction");
3428       }
3429       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3430       if (ErrorLoc == SMLoc())
3431         ErrorLoc = IDLoc;
3432     }
3433     return Error(ErrorLoc, "invalid operand for instruction");
3434   }
3435 
3436   case Match_PreferE32:
3437     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3438                         "should be encoded as e32");
3439   }
3440   llvm_unreachable("Implement any new match types added!");
3441 }
3442 
3443 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3444   int64_t Tmp = -1;
3445   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3446     return true;
3447   }
3448   if (getParser().parseAbsoluteExpression(Tmp)) {
3449     return true;
3450   }
3451   Ret = static_cast<uint32_t>(Tmp);
3452   return false;
3453 }
3454 
3455 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3456                                                uint32_t &Minor) {
3457   if (ParseAsAbsoluteExpression(Major))
3458     return TokError("invalid major version");
3459 
3460   if (getLexer().isNot(AsmToken::Comma))
3461     return TokError("minor version number required, comma expected");
3462   Lex();
3463 
3464   if (ParseAsAbsoluteExpression(Minor))
3465     return TokError("invalid minor version");
3466 
3467   return false;
3468 }
3469 
3470 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3471   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3472     return TokError("directive only supported for amdgcn architecture");
3473 
3474   std::string Target;
3475 
3476   SMLoc TargetStart = getTok().getLoc();
3477   if (getParser().parseEscapedString(Target))
3478     return true;
3479   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3480 
3481   std::string ExpectedTarget;
3482   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3483   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3484 
3485   if (Target != ExpectedTargetOS.str())
3486     return getParser().Error(TargetRange.Start, "target must match options",
3487                              TargetRange);
3488 
3489   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3490   return false;
3491 }
3492 
3493 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3494   return getParser().Error(Range.Start, "value out of range", Range);
3495 }
3496 
3497 bool AMDGPUAsmParser::calculateGPRBlocks(
3498     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3499     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3500     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3501     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3502   // TODO(scott.linder): These calculations are duplicated from
3503   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3504   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3505 
3506   unsigned NumVGPRs = NextFreeVGPR;
3507   unsigned NumSGPRs = NextFreeSGPR;
3508 
3509   if (Version.Major >= 10)
3510     NumSGPRs = 0;
3511   else {
3512     unsigned MaxAddressableNumSGPRs =
3513         IsaInfo::getAddressableNumSGPRs(&getSTI());
3514 
3515     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3516         NumSGPRs > MaxAddressableNumSGPRs)
3517       return OutOfRangeError(SGPRRange);
3518 
3519     NumSGPRs +=
3520         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3521 
3522     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3523         NumSGPRs > MaxAddressableNumSGPRs)
3524       return OutOfRangeError(SGPRRange);
3525 
3526     if (Features.test(FeatureSGPRInitBug))
3527       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3528   }
3529 
3530   VGPRBlocks =
3531       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3532   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3533 
3534   return false;
3535 }
3536 
3537 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3538   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3539     return TokError("directive only supported for amdgcn architecture");
3540 
3541   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3542     return TokError("directive only supported for amdhsa OS");
3543 
3544   StringRef KernelName;
3545   if (getParser().parseIdentifier(KernelName))
3546     return true;
3547 
3548   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3549 
3550   StringSet<> Seen;
3551 
3552   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3553 
3554   SMRange VGPRRange;
3555   uint64_t NextFreeVGPR = 0;
3556   SMRange SGPRRange;
3557   uint64_t NextFreeSGPR = 0;
3558   unsigned UserSGPRCount = 0;
3559   bool ReserveVCC = true;
3560   bool ReserveFlatScr = true;
3561   bool ReserveXNACK = hasXNACK();
3562   Optional<bool> EnableWavefrontSize32;
3563 
3564   while (true) {
3565     while (getLexer().is(AsmToken::EndOfStatement))
3566       Lex();
3567 
3568     if (getLexer().isNot(AsmToken::Identifier))
3569       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3570 
3571     StringRef ID = getTok().getIdentifier();
3572     SMRange IDRange = getTok().getLocRange();
3573     Lex();
3574 
3575     if (ID == ".end_amdhsa_kernel")
3576       break;
3577 
3578     if (Seen.find(ID) != Seen.end())
3579       return TokError(".amdhsa_ directives cannot be repeated");
3580     Seen.insert(ID);
3581 
3582     SMLoc ValStart = getTok().getLoc();
3583     int64_t IVal;
3584     if (getParser().parseAbsoluteExpression(IVal))
3585       return true;
3586     SMLoc ValEnd = getTok().getLoc();
3587     SMRange ValRange = SMRange(ValStart, ValEnd);
3588 
3589     if (IVal < 0)
3590       return OutOfRangeError(ValRange);
3591 
3592     uint64_t Val = IVal;
3593 
3594 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3595   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3596     return OutOfRangeError(RANGE);                                             \
3597   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3598 
3599     if (ID == ".amdhsa_group_segment_fixed_size") {
3600       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3601         return OutOfRangeError(ValRange);
3602       KD.group_segment_fixed_size = Val;
3603     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3604       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3605         return OutOfRangeError(ValRange);
3606       KD.private_segment_fixed_size = Val;
3607     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3608       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3609                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3610                        Val, ValRange);
3611       UserSGPRCount += 4;
3612     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3613       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3614                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3615                        ValRange);
3616       UserSGPRCount += 2;
3617     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3618       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3619                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3620                        ValRange);
3621       UserSGPRCount += 2;
3622     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3623       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3624                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3625                        Val, ValRange);
3626       UserSGPRCount += 2;
3627     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3628       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3629                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3630                        ValRange);
3631       UserSGPRCount += 2;
3632     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3633       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3634                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3635                        ValRange);
3636       UserSGPRCount += 2;
3637     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3638       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3639                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3640                        Val, ValRange);
3641       UserSGPRCount += 1;
3642     } else if (ID == ".amdhsa_wavefront_size32") {
3643       if (IVersion.Major < 10)
3644         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3645                                  IDRange);
3646       EnableWavefrontSize32 = Val;
3647       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3648                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3649                        Val, ValRange);
3650     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3651       PARSE_BITS_ENTRY(
3652           KD.compute_pgm_rsrc2,
3653           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3654           ValRange);
3655     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3656       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3657                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3658                        ValRange);
3659     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3660       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3661                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3662                        ValRange);
3663     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3664       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3665                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3666                        ValRange);
3667     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3668       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3669                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3670                        ValRange);
3671     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3672       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3673                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3674                        ValRange);
3675     } else if (ID == ".amdhsa_next_free_vgpr") {
3676       VGPRRange = ValRange;
3677       NextFreeVGPR = Val;
3678     } else if (ID == ".amdhsa_next_free_sgpr") {
3679       SGPRRange = ValRange;
3680       NextFreeSGPR = Val;
3681     } else if (ID == ".amdhsa_reserve_vcc") {
3682       if (!isUInt<1>(Val))
3683         return OutOfRangeError(ValRange);
3684       ReserveVCC = Val;
3685     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3686       if (IVersion.Major < 7)
3687         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3688                                  IDRange);
3689       if (!isUInt<1>(Val))
3690         return OutOfRangeError(ValRange);
3691       ReserveFlatScr = Val;
3692     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3693       if (IVersion.Major < 8)
3694         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3695                                  IDRange);
3696       if (!isUInt<1>(Val))
3697         return OutOfRangeError(ValRange);
3698       ReserveXNACK = Val;
3699     } else if (ID == ".amdhsa_float_round_mode_32") {
3700       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3701                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3702     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3703       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3704                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3705     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3706       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3707                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3708     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3709       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3710                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3711                        ValRange);
3712     } else if (ID == ".amdhsa_dx10_clamp") {
3713       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3714                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3715     } else if (ID == ".amdhsa_ieee_mode") {
3716       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3717                        Val, ValRange);
3718     } else if (ID == ".amdhsa_fp16_overflow") {
3719       if (IVersion.Major < 9)
3720         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3721                                  IDRange);
3722       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3723                        ValRange);
3724     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3725       if (IVersion.Major < 10)
3726         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3727                                  IDRange);
3728       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3729                        ValRange);
3730     } else if (ID == ".amdhsa_memory_ordered") {
3731       if (IVersion.Major < 10)
3732         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3733                                  IDRange);
3734       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3735                        ValRange);
3736     } else if (ID == ".amdhsa_forward_progress") {
3737       if (IVersion.Major < 10)
3738         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3739                                  IDRange);
3740       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3741                        ValRange);
3742     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3743       PARSE_BITS_ENTRY(
3744           KD.compute_pgm_rsrc2,
3745           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3746           ValRange);
3747     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3748       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3749                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3750                        Val, ValRange);
3751     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3752       PARSE_BITS_ENTRY(
3753           KD.compute_pgm_rsrc2,
3754           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3755           ValRange);
3756     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3757       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3758                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3759                        Val, ValRange);
3760     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3761       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3762                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3763                        Val, ValRange);
3764     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3765       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3766                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3767                        Val, ValRange);
3768     } else if (ID == ".amdhsa_exception_int_div_zero") {
3769       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3770                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3771                        Val, ValRange);
3772     } else {
3773       return getParser().Error(IDRange.Start,
3774                                "unknown .amdhsa_kernel directive", IDRange);
3775     }
3776 
3777 #undef PARSE_BITS_ENTRY
3778   }
3779 
3780   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3781     return TokError(".amdhsa_next_free_vgpr directive is required");
3782 
3783   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3784     return TokError(".amdhsa_next_free_sgpr directive is required");
3785 
3786   unsigned VGPRBlocks;
3787   unsigned SGPRBlocks;
3788   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3789                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3790                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3791                          SGPRBlocks))
3792     return true;
3793 
3794   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3795           VGPRBlocks))
3796     return OutOfRangeError(VGPRRange);
3797   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3798                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3799 
3800   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3801           SGPRBlocks))
3802     return OutOfRangeError(SGPRRange);
3803   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3804                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3805                   SGPRBlocks);
3806 
3807   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3808     return TokError("too many user SGPRs enabled");
3809   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3810                   UserSGPRCount);
3811 
3812   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3813       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3814       ReserveFlatScr, ReserveXNACK);
3815   return false;
3816 }
3817 
3818 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3819   uint32_t Major;
3820   uint32_t Minor;
3821 
3822   if (ParseDirectiveMajorMinor(Major, Minor))
3823     return true;
3824 
3825   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3826   return false;
3827 }
3828 
3829 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3830   uint32_t Major;
3831   uint32_t Minor;
3832   uint32_t Stepping;
3833   StringRef VendorName;
3834   StringRef ArchName;
3835 
3836   // If this directive has no arguments, then use the ISA version for the
3837   // targeted GPU.
3838   if (getLexer().is(AsmToken::EndOfStatement)) {
3839     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3840     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3841                                                       ISA.Stepping,
3842                                                       "AMD", "AMDGPU");
3843     return false;
3844   }
3845 
3846   if (ParseDirectiveMajorMinor(Major, Minor))
3847     return true;
3848 
3849   if (getLexer().isNot(AsmToken::Comma))
3850     return TokError("stepping version number required, comma expected");
3851   Lex();
3852 
3853   if (ParseAsAbsoluteExpression(Stepping))
3854     return TokError("invalid stepping version");
3855 
3856   if (getLexer().isNot(AsmToken::Comma))
3857     return TokError("vendor name required, comma expected");
3858   Lex();
3859 
3860   if (getLexer().isNot(AsmToken::String))
3861     return TokError("invalid vendor name");
3862 
3863   VendorName = getLexer().getTok().getStringContents();
3864   Lex();
3865 
3866   if (getLexer().isNot(AsmToken::Comma))
3867     return TokError("arch name required, comma expected");
3868   Lex();
3869 
3870   if (getLexer().isNot(AsmToken::String))
3871     return TokError("invalid arch name");
3872 
3873   ArchName = getLexer().getTok().getStringContents();
3874   Lex();
3875 
3876   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3877                                                     VendorName, ArchName);
3878   return false;
3879 }
3880 
3881 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3882                                                amd_kernel_code_t &Header) {
3883   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3884   // assembly for backwards compatibility.
3885   if (ID == "max_scratch_backing_memory_byte_size") {
3886     Parser.eatToEndOfStatement();
3887     return false;
3888   }
3889 
3890   SmallString<40> ErrStr;
3891   raw_svector_ostream Err(ErrStr);
3892   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3893     return TokError(Err.str());
3894   }
3895   Lex();
3896 
3897   if (ID == "enable_wavefront_size32") {
3898     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
3899       if (!isGFX10())
3900         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
3901       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3902         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
3903     } else {
3904       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3905         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
3906     }
3907   }
3908 
3909   if (ID == "wavefront_size") {
3910     if (Header.wavefront_size == 5) {
3911       if (!isGFX10())
3912         return TokError("wavefront_size=5 is only allowed on GFX10+");
3913       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3914         return TokError("wavefront_size=5 requires +WavefrontSize32");
3915     } else if (Header.wavefront_size == 6) {
3916       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3917         return TokError("wavefront_size=6 requires +WavefrontSize64");
3918     }
3919   }
3920 
3921   if (ID == "enable_wgp_mode") {
3922     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
3923       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3924   }
3925 
3926   if (ID == "enable_mem_ordered") {
3927     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
3928       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3929   }
3930 
3931   if (ID == "enable_fwd_progress") {
3932     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
3933       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3934   }
3935 
3936   return false;
3937 }
3938 
3939 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3940   amd_kernel_code_t Header;
3941   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3942 
3943   while (true) {
3944     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3945     // will set the current token to EndOfStatement.
3946     while(getLexer().is(AsmToken::EndOfStatement))
3947       Lex();
3948 
3949     if (getLexer().isNot(AsmToken::Identifier))
3950       return TokError("expected value identifier or .end_amd_kernel_code_t");
3951 
3952     StringRef ID = getLexer().getTok().getIdentifier();
3953     Lex();
3954 
3955     if (ID == ".end_amd_kernel_code_t")
3956       break;
3957 
3958     if (ParseAMDKernelCodeTValue(ID, Header))
3959       return true;
3960   }
3961 
3962   getTargetStreamer().EmitAMDKernelCodeT(Header);
3963 
3964   return false;
3965 }
3966 
3967 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3968   if (getLexer().isNot(AsmToken::Identifier))
3969     return TokError("expected symbol name");
3970 
3971   StringRef KernelName = Parser.getTok().getString();
3972 
3973   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3974                                            ELF::STT_AMDGPU_HSA_KERNEL);
3975   Lex();
3976   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3977     KernelScope.initialize(getContext());
3978   return false;
3979 }
3980 
3981 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3982   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3983     return Error(getParser().getTok().getLoc(),
3984                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3985                  "architectures");
3986   }
3987 
3988   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3989 
3990   std::string ISAVersionStringFromSTI;
3991   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3992   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3993 
3994   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3995     return Error(getParser().getTok().getLoc(),
3996                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3997                  "arguments specified through the command line");
3998   }
3999 
4000   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4001   Lex();
4002 
4003   return false;
4004 }
4005 
4006 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4007   const char *AssemblerDirectiveBegin;
4008   const char *AssemblerDirectiveEnd;
4009   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4010       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4011           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4012                             HSAMD::V3::AssemblerDirectiveEnd)
4013           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4014                             HSAMD::AssemblerDirectiveEnd);
4015 
4016   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4017     return Error(getParser().getTok().getLoc(),
4018                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4019                  "not available on non-amdhsa OSes")).str());
4020   }
4021 
4022   std::string HSAMetadataString;
4023   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4024                           HSAMetadataString))
4025     return true;
4026 
4027   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4028     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4029       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4030   } else {
4031     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4032       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4033   }
4034 
4035   return false;
4036 }
4037 
4038 /// Common code to parse out a block of text (typically YAML) between start and
4039 /// end directives.
4040 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4041                                           const char *AssemblerDirectiveEnd,
4042                                           std::string &CollectString) {
4043 
4044   raw_string_ostream CollectStream(CollectString);
4045 
4046   getLexer().setSkipSpace(false);
4047 
4048   bool FoundEnd = false;
4049   while (!getLexer().is(AsmToken::Eof)) {
4050     while (getLexer().is(AsmToken::Space)) {
4051       CollectStream << getLexer().getTok().getString();
4052       Lex();
4053     }
4054 
4055     if (getLexer().is(AsmToken::Identifier)) {
4056       StringRef ID = getLexer().getTok().getIdentifier();
4057       if (ID == AssemblerDirectiveEnd) {
4058         Lex();
4059         FoundEnd = true;
4060         break;
4061       }
4062     }
4063 
4064     CollectStream << Parser.parseStringToEndOfStatement()
4065                   << getContext().getAsmInfo()->getSeparatorString();
4066 
4067     Parser.eatToEndOfStatement();
4068   }
4069 
4070   getLexer().setSkipSpace(true);
4071 
4072   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4073     return TokError(Twine("expected directive ") +
4074                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4075   }
4076 
4077   CollectStream.flush();
4078   return false;
4079 }
4080 
4081 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4082 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4083   std::string String;
4084   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4085                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4086     return true;
4087 
4088   auto PALMetadata = getTargetStreamer().getPALMetadata();
4089   if (!PALMetadata->setFromString(String))
4090     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4091   return false;
4092 }
4093 
4094 /// Parse the assembler directive for old linear-format PAL metadata.
4095 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4096   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4097     return Error(getParser().getTok().getLoc(),
4098                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4099                  "not available on non-amdpal OSes")).str());
4100   }
4101 
4102   auto PALMetadata = getTargetStreamer().getPALMetadata();
4103   PALMetadata->setLegacy();
4104   for (;;) {
4105     uint32_t Key, Value;
4106     if (ParseAsAbsoluteExpression(Key)) {
4107       return TokError(Twine("invalid value in ") +
4108                       Twine(PALMD::AssemblerDirective));
4109     }
4110     if (getLexer().isNot(AsmToken::Comma)) {
4111       return TokError(Twine("expected an even number of values in ") +
4112                       Twine(PALMD::AssemblerDirective));
4113     }
4114     Lex();
4115     if (ParseAsAbsoluteExpression(Value)) {
4116       return TokError(Twine("invalid value in ") +
4117                       Twine(PALMD::AssemblerDirective));
4118     }
4119     PALMetadata->setRegister(Key, Value);
4120     if (getLexer().isNot(AsmToken::Comma))
4121       break;
4122     Lex();
4123   }
4124   return false;
4125 }
4126 
4127 /// ParseDirectiveAMDGPULDS
4128 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4129 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4130   if (getParser().checkForValidSection())
4131     return true;
4132 
4133   StringRef Name;
4134   SMLoc NameLoc = getLexer().getLoc();
4135   if (getParser().parseIdentifier(Name))
4136     return TokError("expected identifier in directive");
4137 
4138   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4139   if (parseToken(AsmToken::Comma, "expected ','"))
4140     return true;
4141 
4142   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4143 
4144   int64_t Size;
4145   SMLoc SizeLoc = getLexer().getLoc();
4146   if (getParser().parseAbsoluteExpression(Size))
4147     return true;
4148   if (Size < 0)
4149     return Error(SizeLoc, "size must be non-negative");
4150   if (Size > LocalMemorySize)
4151     return Error(SizeLoc, "size is too large");
4152 
4153   int64_t Align = 4;
4154   if (getLexer().is(AsmToken::Comma)) {
4155     Lex();
4156     SMLoc AlignLoc = getLexer().getLoc();
4157     if (getParser().parseAbsoluteExpression(Align))
4158       return true;
4159     if (Align < 0 || !isPowerOf2_64(Align))
4160       return Error(AlignLoc, "alignment must be a power of two");
4161 
4162     // Alignment larger than the size of LDS is possible in theory, as long
4163     // as the linker manages to place to symbol at address 0, but we do want
4164     // to make sure the alignment fits nicely into a 32-bit integer.
4165     if (Align >= 1u << 31)
4166       return Error(AlignLoc, "alignment is too large");
4167   }
4168 
4169   if (parseToken(AsmToken::EndOfStatement,
4170                  "unexpected token in '.amdgpu_lds' directive"))
4171     return true;
4172 
4173   Symbol->redefineIfPossible();
4174   if (!Symbol->isUndefined())
4175     return Error(NameLoc, "invalid symbol redefinition");
4176 
4177   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4178   return false;
4179 }
4180 
4181 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4182   StringRef IDVal = DirectiveID.getString();
4183 
4184   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4185     if (IDVal == ".amdgcn_target")
4186       return ParseDirectiveAMDGCNTarget();
4187 
4188     if (IDVal == ".amdhsa_kernel")
4189       return ParseDirectiveAMDHSAKernel();
4190 
4191     // TODO: Restructure/combine with PAL metadata directive.
4192     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4193       return ParseDirectiveHSAMetadata();
4194   } else {
4195     if (IDVal == ".hsa_code_object_version")
4196       return ParseDirectiveHSACodeObjectVersion();
4197 
4198     if (IDVal == ".hsa_code_object_isa")
4199       return ParseDirectiveHSACodeObjectISA();
4200 
4201     if (IDVal == ".amd_kernel_code_t")
4202       return ParseDirectiveAMDKernelCodeT();
4203 
4204     if (IDVal == ".amdgpu_hsa_kernel")
4205       return ParseDirectiveAMDGPUHsaKernel();
4206 
4207     if (IDVal == ".amd_amdgpu_isa")
4208       return ParseDirectiveISAVersion();
4209 
4210     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4211       return ParseDirectiveHSAMetadata();
4212   }
4213 
4214   if (IDVal == ".amdgpu_lds")
4215     return ParseDirectiveAMDGPULDS();
4216 
4217   if (IDVal == PALMD::AssemblerDirectiveBegin)
4218     return ParseDirectivePALMetadataBegin();
4219 
4220   if (IDVal == PALMD::AssemblerDirective)
4221     return ParseDirectivePALMetadata();
4222 
4223   return true;
4224 }
4225 
4226 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4227                                            unsigned RegNo) const {
4228 
4229   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4230        R.isValid(); ++R) {
4231     if (*R == RegNo)
4232       return isGFX9() || isGFX10();
4233   }
4234 
4235   // GFX10 has 2 more SGPRs 104 and 105.
4236   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4237        R.isValid(); ++R) {
4238     if (*R == RegNo)
4239       return hasSGPR104_SGPR105();
4240   }
4241 
4242   switch (RegNo) {
4243   case AMDGPU::SRC_SHARED_BASE:
4244   case AMDGPU::SRC_SHARED_LIMIT:
4245   case AMDGPU::SRC_PRIVATE_BASE:
4246   case AMDGPU::SRC_PRIVATE_LIMIT:
4247   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4248     return !isCI() && !isSI() && !isVI();
4249   case AMDGPU::TBA:
4250   case AMDGPU::TBA_LO:
4251   case AMDGPU::TBA_HI:
4252   case AMDGPU::TMA:
4253   case AMDGPU::TMA_LO:
4254   case AMDGPU::TMA_HI:
4255     return !isGFX9() && !isGFX10();
4256   case AMDGPU::XNACK_MASK:
4257   case AMDGPU::XNACK_MASK_LO:
4258   case AMDGPU::XNACK_MASK_HI:
4259     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4260   case AMDGPU::SGPR_NULL:
4261     return isGFX10();
4262   default:
4263     break;
4264   }
4265 
4266   if (isCI())
4267     return true;
4268 
4269   if (isSI() || isGFX10()) {
4270     // No flat_scr on SI.
4271     // On GFX10 flat scratch is not a valid register operand and can only be
4272     // accessed with s_setreg/s_getreg.
4273     switch (RegNo) {
4274     case AMDGPU::FLAT_SCR:
4275     case AMDGPU::FLAT_SCR_LO:
4276     case AMDGPU::FLAT_SCR_HI:
4277       return false;
4278     default:
4279       return true;
4280     }
4281   }
4282 
4283   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4284   // SI/CI have.
4285   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4286        R.isValid(); ++R) {
4287     if (*R == RegNo)
4288       return hasSGPR102_SGPR103();
4289   }
4290 
4291   return true;
4292 }
4293 
4294 OperandMatchResultTy
4295 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4296                               OperandMode Mode) {
4297   // Try to parse with a custom parser
4298   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4299 
4300   // If we successfully parsed the operand or if there as an error parsing,
4301   // we are done.
4302   //
4303   // If we are parsing after we reach EndOfStatement then this means we
4304   // are appending default values to the Operands list.  This is only done
4305   // by custom parser, so we shouldn't continue on to the generic parsing.
4306   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4307       getLexer().is(AsmToken::EndOfStatement))
4308     return ResTy;
4309 
4310   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4311     unsigned Prefix = Operands.size();
4312     SMLoc LBraceLoc = getTok().getLoc();
4313     Parser.Lex(); // eat the '['
4314 
4315     for (;;) {
4316       ResTy = parseReg(Operands);
4317       if (ResTy != MatchOperand_Success)
4318         return ResTy;
4319 
4320       if (getLexer().is(AsmToken::RBrac))
4321         break;
4322 
4323       if (getLexer().isNot(AsmToken::Comma))
4324         return MatchOperand_ParseFail;
4325       Parser.Lex();
4326     }
4327 
4328     if (Operands.size() - Prefix > 1) {
4329       Operands.insert(Operands.begin() + Prefix,
4330                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4331       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4332                                                     getTok().getLoc()));
4333     }
4334 
4335     Parser.Lex(); // eat the ']'
4336     return MatchOperand_Success;
4337   }
4338 
4339   return parseRegOrImm(Operands);
4340 }
4341 
4342 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4343   // Clear any forced encodings from the previous instruction.
4344   setForcedEncodingSize(0);
4345   setForcedDPP(false);
4346   setForcedSDWA(false);
4347 
4348   if (Name.endswith("_e64")) {
4349     setForcedEncodingSize(64);
4350     return Name.substr(0, Name.size() - 4);
4351   } else if (Name.endswith("_e32")) {
4352     setForcedEncodingSize(32);
4353     return Name.substr(0, Name.size() - 4);
4354   } else if (Name.endswith("_dpp")) {
4355     setForcedDPP(true);
4356     return Name.substr(0, Name.size() - 4);
4357   } else if (Name.endswith("_sdwa")) {
4358     setForcedSDWA(true);
4359     return Name.substr(0, Name.size() - 5);
4360   }
4361   return Name;
4362 }
4363 
4364 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4365                                        StringRef Name,
4366                                        SMLoc NameLoc, OperandVector &Operands) {
4367   // Add the instruction mnemonic
4368   Name = parseMnemonicSuffix(Name);
4369   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4370 
4371   bool IsMIMG = Name.startswith("image_");
4372 
4373   while (!getLexer().is(AsmToken::EndOfStatement)) {
4374     OperandMode Mode = OperandMode_Default;
4375     if (IsMIMG && isGFX10() && Operands.size() == 2)
4376       Mode = OperandMode_NSA;
4377     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4378 
4379     // Eat the comma or space if there is one.
4380     if (getLexer().is(AsmToken::Comma))
4381       Parser.Lex();
4382 
4383     switch (Res) {
4384       case MatchOperand_Success: break;
4385       case MatchOperand_ParseFail:
4386         // FIXME: use real operand location rather than the current location.
4387         Error(getLexer().getLoc(), "failed parsing operand.");
4388         while (!getLexer().is(AsmToken::EndOfStatement)) {
4389           Parser.Lex();
4390         }
4391         return true;
4392       case MatchOperand_NoMatch:
4393         // FIXME: use real operand location rather than the current location.
4394         Error(getLexer().getLoc(), "not a valid operand.");
4395         while (!getLexer().is(AsmToken::EndOfStatement)) {
4396           Parser.Lex();
4397         }
4398         return true;
4399     }
4400   }
4401 
4402   return false;
4403 }
4404 
4405 //===----------------------------------------------------------------------===//
4406 // Utility functions
4407 //===----------------------------------------------------------------------===//
4408 
4409 OperandMatchResultTy
4410 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4411 
4412   if (!trySkipId(Prefix, AsmToken::Colon))
4413     return MatchOperand_NoMatch;
4414 
4415   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4416 }
4417 
4418 OperandMatchResultTy
4419 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4420                                     AMDGPUOperand::ImmTy ImmTy,
4421                                     bool (*ConvertResult)(int64_t&)) {
4422   SMLoc S = getLoc();
4423   int64_t Value = 0;
4424 
4425   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4426   if (Res != MatchOperand_Success)
4427     return Res;
4428 
4429   if (ConvertResult && !ConvertResult(Value)) {
4430     Error(S, "invalid " + StringRef(Prefix) + " value.");
4431   }
4432 
4433   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4434   return MatchOperand_Success;
4435 }
4436 
4437 OperandMatchResultTy
4438 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4439                                              OperandVector &Operands,
4440                                              AMDGPUOperand::ImmTy ImmTy,
4441                                              bool (*ConvertResult)(int64_t&)) {
4442   SMLoc S = getLoc();
4443   if (!trySkipId(Prefix, AsmToken::Colon))
4444     return MatchOperand_NoMatch;
4445 
4446   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4447     return MatchOperand_ParseFail;
4448 
4449   unsigned Val = 0;
4450   const unsigned MaxSize = 4;
4451 
4452   // FIXME: How to verify the number of elements matches the number of src
4453   // operands?
4454   for (int I = 0; ; ++I) {
4455     int64_t Op;
4456     SMLoc Loc = getLoc();
4457     if (!parseExpr(Op))
4458       return MatchOperand_ParseFail;
4459 
4460     if (Op != 0 && Op != 1) {
4461       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4462       return MatchOperand_ParseFail;
4463     }
4464 
4465     Val |= (Op << I);
4466 
4467     if (trySkipToken(AsmToken::RBrac))
4468       break;
4469 
4470     if (I + 1 == MaxSize) {
4471       Error(getLoc(), "expected a closing square bracket");
4472       return MatchOperand_ParseFail;
4473     }
4474 
4475     if (!skipToken(AsmToken::Comma, "expected a comma"))
4476       return MatchOperand_ParseFail;
4477   }
4478 
4479   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4480   return MatchOperand_Success;
4481 }
4482 
4483 OperandMatchResultTy
4484 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4485                                AMDGPUOperand::ImmTy ImmTy) {
4486   int64_t Bit = 0;
4487   SMLoc S = Parser.getTok().getLoc();
4488 
4489   // We are at the end of the statement, and this is a default argument, so
4490   // use a default value.
4491   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4492     switch(getLexer().getKind()) {
4493       case AsmToken::Identifier: {
4494         StringRef Tok = Parser.getTok().getString();
4495         if (Tok == Name) {
4496           if (Tok == "r128" && isGFX9())
4497             Error(S, "r128 modifier is not supported on this GPU");
4498           if (Tok == "a16" && !isGFX9() && !isGFX10())
4499             Error(S, "a16 modifier is not supported on this GPU");
4500           Bit = 1;
4501           Parser.Lex();
4502         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4503           Bit = 0;
4504           Parser.Lex();
4505         } else {
4506           return MatchOperand_NoMatch;
4507         }
4508         break;
4509       }
4510       default:
4511         return MatchOperand_NoMatch;
4512     }
4513   }
4514 
4515   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4516     return MatchOperand_ParseFail;
4517 
4518   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4519   return MatchOperand_Success;
4520 }
4521 
4522 static void addOptionalImmOperand(
4523   MCInst& Inst, const OperandVector& Operands,
4524   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4525   AMDGPUOperand::ImmTy ImmT,
4526   int64_t Default = 0) {
4527   auto i = OptionalIdx.find(ImmT);
4528   if (i != OptionalIdx.end()) {
4529     unsigned Idx = i->second;
4530     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4531   } else {
4532     Inst.addOperand(MCOperand::createImm(Default));
4533   }
4534 }
4535 
4536 OperandMatchResultTy
4537 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4538   if (getLexer().isNot(AsmToken::Identifier)) {
4539     return MatchOperand_NoMatch;
4540   }
4541   StringRef Tok = Parser.getTok().getString();
4542   if (Tok != Prefix) {
4543     return MatchOperand_NoMatch;
4544   }
4545 
4546   Parser.Lex();
4547   if (getLexer().isNot(AsmToken::Colon)) {
4548     return MatchOperand_ParseFail;
4549   }
4550 
4551   Parser.Lex();
4552   if (getLexer().isNot(AsmToken::Identifier)) {
4553     return MatchOperand_ParseFail;
4554   }
4555 
4556   Value = Parser.getTok().getString();
4557   return MatchOperand_Success;
4558 }
4559 
4560 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4561 // values to live in a joint format operand in the MCInst encoding.
4562 OperandMatchResultTy
4563 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4564   SMLoc S = Parser.getTok().getLoc();
4565   int64_t Dfmt = 0, Nfmt = 0;
4566   // dfmt and nfmt can appear in either order, and each is optional.
4567   bool GotDfmt = false, GotNfmt = false;
4568   while (!GotDfmt || !GotNfmt) {
4569     if (!GotDfmt) {
4570       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4571       if (Res != MatchOperand_NoMatch) {
4572         if (Res != MatchOperand_Success)
4573           return Res;
4574         if (Dfmt >= 16) {
4575           Error(Parser.getTok().getLoc(), "out of range dfmt");
4576           return MatchOperand_ParseFail;
4577         }
4578         GotDfmt = true;
4579         Parser.Lex();
4580         continue;
4581       }
4582     }
4583     if (!GotNfmt) {
4584       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4585       if (Res != MatchOperand_NoMatch) {
4586         if (Res != MatchOperand_Success)
4587           return Res;
4588         if (Nfmt >= 8) {
4589           Error(Parser.getTok().getLoc(), "out of range nfmt");
4590           return MatchOperand_ParseFail;
4591         }
4592         GotNfmt = true;
4593         Parser.Lex();
4594         continue;
4595       }
4596     }
4597     break;
4598   }
4599   if (!GotDfmt && !GotNfmt)
4600     return MatchOperand_NoMatch;
4601   auto Format = Dfmt | Nfmt << 4;
4602   Operands.push_back(
4603       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4604   return MatchOperand_Success;
4605 }
4606 
4607 //===----------------------------------------------------------------------===//
4608 // ds
4609 //===----------------------------------------------------------------------===//
4610 
4611 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4612                                     const OperandVector &Operands) {
4613   OptionalImmIndexMap OptionalIdx;
4614 
4615   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4616     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4617 
4618     // Add the register arguments
4619     if (Op.isReg()) {
4620       Op.addRegOperands(Inst, 1);
4621       continue;
4622     }
4623 
4624     // Handle optional arguments
4625     OptionalIdx[Op.getImmTy()] = i;
4626   }
4627 
4628   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4629   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4630   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4631 
4632   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4633 }
4634 
4635 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4636                                 bool IsGdsHardcoded) {
4637   OptionalImmIndexMap OptionalIdx;
4638 
4639   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4640     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4641 
4642     // Add the register arguments
4643     if (Op.isReg()) {
4644       Op.addRegOperands(Inst, 1);
4645       continue;
4646     }
4647 
4648     if (Op.isToken() && Op.getToken() == "gds") {
4649       IsGdsHardcoded = true;
4650       continue;
4651     }
4652 
4653     // Handle optional arguments
4654     OptionalIdx[Op.getImmTy()] = i;
4655   }
4656 
4657   AMDGPUOperand::ImmTy OffsetType =
4658     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4659      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4660      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4661                                                       AMDGPUOperand::ImmTyOffset;
4662 
4663   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4664 
4665   if (!IsGdsHardcoded) {
4666     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4667   }
4668   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4669 }
4670 
4671 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4672   OptionalImmIndexMap OptionalIdx;
4673 
4674   unsigned OperandIdx[4];
4675   unsigned EnMask = 0;
4676   int SrcIdx = 0;
4677 
4678   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4679     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4680 
4681     // Add the register arguments
4682     if (Op.isReg()) {
4683       assert(SrcIdx < 4);
4684       OperandIdx[SrcIdx] = Inst.size();
4685       Op.addRegOperands(Inst, 1);
4686       ++SrcIdx;
4687       continue;
4688     }
4689 
4690     if (Op.isOff()) {
4691       assert(SrcIdx < 4);
4692       OperandIdx[SrcIdx] = Inst.size();
4693       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4694       ++SrcIdx;
4695       continue;
4696     }
4697 
4698     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4699       Op.addImmOperands(Inst, 1);
4700       continue;
4701     }
4702 
4703     if (Op.isToken() && Op.getToken() == "done")
4704       continue;
4705 
4706     // Handle optional arguments
4707     OptionalIdx[Op.getImmTy()] = i;
4708   }
4709 
4710   assert(SrcIdx == 4);
4711 
4712   bool Compr = false;
4713   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4714     Compr = true;
4715     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4716     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4717     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4718   }
4719 
4720   for (auto i = 0; i < SrcIdx; ++i) {
4721     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4722       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4723     }
4724   }
4725 
4726   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4727   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4728 
4729   Inst.addOperand(MCOperand::createImm(EnMask));
4730 }
4731 
4732 //===----------------------------------------------------------------------===//
4733 // s_waitcnt
4734 //===----------------------------------------------------------------------===//
4735 
4736 static bool
4737 encodeCnt(
4738   const AMDGPU::IsaVersion ISA,
4739   int64_t &IntVal,
4740   int64_t CntVal,
4741   bool Saturate,
4742   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4743   unsigned (*decode)(const IsaVersion &Version, unsigned))
4744 {
4745   bool Failed = false;
4746 
4747   IntVal = encode(ISA, IntVal, CntVal);
4748   if (CntVal != decode(ISA, IntVal)) {
4749     if (Saturate) {
4750       IntVal = encode(ISA, IntVal, -1);
4751     } else {
4752       Failed = true;
4753     }
4754   }
4755   return Failed;
4756 }
4757 
4758 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4759 
4760   SMLoc CntLoc = getLoc();
4761   StringRef CntName = getTokenStr();
4762 
4763   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4764       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4765     return false;
4766 
4767   int64_t CntVal;
4768   SMLoc ValLoc = getLoc();
4769   if (!parseExpr(CntVal))
4770     return false;
4771 
4772   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4773 
4774   bool Failed = true;
4775   bool Sat = CntName.endswith("_sat");
4776 
4777   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4778     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4779   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4780     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4781   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4782     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4783   } else {
4784     Error(CntLoc, "invalid counter name " + CntName);
4785     return false;
4786   }
4787 
4788   if (Failed) {
4789     Error(ValLoc, "too large value for " + CntName);
4790     return false;
4791   }
4792 
4793   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4794     return false;
4795 
4796   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4797     if (isToken(AsmToken::EndOfStatement)) {
4798       Error(getLoc(), "expected a counter name");
4799       return false;
4800     }
4801   }
4802 
4803   return true;
4804 }
4805 
4806 OperandMatchResultTy
4807 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4808   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4809   int64_t Waitcnt = getWaitcntBitMask(ISA);
4810   SMLoc S = getLoc();
4811 
4812   // If parse failed, do not return error code
4813   // to avoid excessive error messages.
4814   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4815     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4816   } else {
4817     parseExpr(Waitcnt);
4818   }
4819 
4820   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4821   return MatchOperand_Success;
4822 }
4823 
4824 bool
4825 AMDGPUOperand::isSWaitCnt() const {
4826   return isImm();
4827 }
4828 
4829 //===----------------------------------------------------------------------===//
4830 // hwreg
4831 //===----------------------------------------------------------------------===//
4832 
4833 bool
4834 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4835                                 int64_t &Offset,
4836                                 int64_t &Width) {
4837   using namespace llvm::AMDGPU::Hwreg;
4838 
4839   // The register may be specified by name or using a numeric code
4840   if (isToken(AsmToken::Identifier) &&
4841       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4842     HwReg.IsSymbolic = true;
4843     lex(); // skip message name
4844   } else if (!parseExpr(HwReg.Id)) {
4845     return false;
4846   }
4847 
4848   if (trySkipToken(AsmToken::RParen))
4849     return true;
4850 
4851   // parse optional params
4852   return
4853     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
4854     parseExpr(Offset) &&
4855     skipToken(AsmToken::Comma, "expected a comma") &&
4856     parseExpr(Width) &&
4857     skipToken(AsmToken::RParen, "expected a closing parenthesis");
4858 }
4859 
4860 bool
4861 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
4862                                const int64_t Offset,
4863                                const int64_t Width,
4864                                const SMLoc Loc) {
4865 
4866   using namespace llvm::AMDGPU::Hwreg;
4867 
4868   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
4869     Error(Loc, "specified hardware register is not supported on this GPU");
4870     return false;
4871   } else if (!isValidHwreg(HwReg.Id)) {
4872     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
4873     return false;
4874   } else if (!isValidHwregOffset(Offset)) {
4875     Error(Loc, "invalid bit offset: only 5-bit values are legal");
4876     return false;
4877   } else if (!isValidHwregWidth(Width)) {
4878     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
4879     return false;
4880   }
4881   return true;
4882 }
4883 
4884 OperandMatchResultTy
4885 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4886   using namespace llvm::AMDGPU::Hwreg;
4887 
4888   int64_t ImmVal = 0;
4889   SMLoc Loc = getLoc();
4890 
4891   // If parse failed, do not return error code
4892   // to avoid excessive error messages.
4893   if (trySkipId("hwreg", AsmToken::LParen)) {
4894     OperandInfoTy HwReg(ID_UNKNOWN_);
4895     int64_t Offset = OFFSET_DEFAULT_;
4896     int64_t Width = WIDTH_DEFAULT_;
4897     if (parseHwregBody(HwReg, Offset, Width) &&
4898         validateHwreg(HwReg, Offset, Width, Loc)) {
4899       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
4900     }
4901   } else if (parseExpr(ImmVal)) {
4902     if (ImmVal < 0 || !isUInt<16>(ImmVal))
4903       Error(Loc, "invalid immediate: only 16-bit values are legal");
4904   }
4905 
4906   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
4907   return MatchOperand_Success;
4908 }
4909 
4910 bool AMDGPUOperand::isHwreg() const {
4911   return isImmTy(ImmTyHwreg);
4912 }
4913 
4914 //===----------------------------------------------------------------------===//
4915 // sendmsg
4916 //===----------------------------------------------------------------------===//
4917 
4918 bool
4919 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
4920                                   OperandInfoTy &Op,
4921                                   OperandInfoTy &Stream) {
4922   using namespace llvm::AMDGPU::SendMsg;
4923 
4924   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
4925     Msg.IsSymbolic = true;
4926     lex(); // skip message name
4927   } else if (!parseExpr(Msg.Id)) {
4928     return false;
4929   }
4930 
4931   if (trySkipToken(AsmToken::Comma)) {
4932     Op.IsDefined = true;
4933     if (isToken(AsmToken::Identifier) &&
4934         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
4935       lex(); // skip operation name
4936     } else if (!parseExpr(Op.Id)) {
4937       return false;
4938     }
4939 
4940     if (trySkipToken(AsmToken::Comma)) {
4941       Stream.IsDefined = true;
4942       if (!parseExpr(Stream.Id))
4943         return false;
4944     }
4945   }
4946 
4947   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
4948 }
4949 
4950 bool
4951 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
4952                                  const OperandInfoTy &Op,
4953                                  const OperandInfoTy &Stream,
4954                                  const SMLoc S) {
4955   using namespace llvm::AMDGPU::SendMsg;
4956 
4957   // Validation strictness depends on whether message is specified
4958   // in a symbolc or in a numeric form. In the latter case
4959   // only encoding possibility is checked.
4960   bool Strict = Msg.IsSymbolic;
4961 
4962   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
4963     Error(S, "invalid message id");
4964     return false;
4965   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
4966     Error(S, Op.IsDefined ?
4967              "message does not support operations" :
4968              "missing message operation");
4969     return false;
4970   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
4971     Error(S, "invalid operation id");
4972     return false;
4973   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
4974     Error(S, "message operation does not support streams");
4975     return false;
4976   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
4977     Error(S, "invalid message stream id");
4978     return false;
4979   }
4980   return true;
4981 }
4982 
4983 OperandMatchResultTy
4984 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4985   using namespace llvm::AMDGPU::SendMsg;
4986 
4987   int64_t ImmVal = 0;
4988   SMLoc Loc = getLoc();
4989 
4990   // If parse failed, do not return error code
4991   // to avoid excessive error messages.
4992   if (trySkipId("sendmsg", AsmToken::LParen)) {
4993     OperandInfoTy Msg(ID_UNKNOWN_);
4994     OperandInfoTy Op(OP_NONE_);
4995     OperandInfoTy Stream(STREAM_ID_NONE_);
4996     if (parseSendMsgBody(Msg, Op, Stream) &&
4997         validateSendMsg(Msg, Op, Stream, Loc)) {
4998       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
4999     }
5000   } else if (parseExpr(ImmVal)) {
5001     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5002       Error(Loc, "invalid immediate: only 16-bit values are legal");
5003   }
5004 
5005   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5006   return MatchOperand_Success;
5007 }
5008 
5009 bool AMDGPUOperand::isSendMsg() const {
5010   return isImmTy(ImmTySendMsg);
5011 }
5012 
5013 //===----------------------------------------------------------------------===//
5014 // v_interp
5015 //===----------------------------------------------------------------------===//
5016 
5017 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5018   if (getLexer().getKind() != AsmToken::Identifier)
5019     return MatchOperand_NoMatch;
5020 
5021   StringRef Str = Parser.getTok().getString();
5022   int Slot = StringSwitch<int>(Str)
5023     .Case("p10", 0)
5024     .Case("p20", 1)
5025     .Case("p0", 2)
5026     .Default(-1);
5027 
5028   SMLoc S = Parser.getTok().getLoc();
5029   if (Slot == -1)
5030     return MatchOperand_ParseFail;
5031 
5032   Parser.Lex();
5033   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5034                                               AMDGPUOperand::ImmTyInterpSlot));
5035   return MatchOperand_Success;
5036 }
5037 
5038 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5039   if (getLexer().getKind() != AsmToken::Identifier)
5040     return MatchOperand_NoMatch;
5041 
5042   StringRef Str = Parser.getTok().getString();
5043   if (!Str.startswith("attr"))
5044     return MatchOperand_NoMatch;
5045 
5046   StringRef Chan = Str.take_back(2);
5047   int AttrChan = StringSwitch<int>(Chan)
5048     .Case(".x", 0)
5049     .Case(".y", 1)
5050     .Case(".z", 2)
5051     .Case(".w", 3)
5052     .Default(-1);
5053   if (AttrChan == -1)
5054     return MatchOperand_ParseFail;
5055 
5056   Str = Str.drop_back(2).drop_front(4);
5057 
5058   uint8_t Attr;
5059   if (Str.getAsInteger(10, Attr))
5060     return MatchOperand_ParseFail;
5061 
5062   SMLoc S = Parser.getTok().getLoc();
5063   Parser.Lex();
5064   if (Attr > 63) {
5065     Error(S, "out of bounds attr");
5066     return MatchOperand_Success;
5067   }
5068 
5069   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5070 
5071   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5072                                               AMDGPUOperand::ImmTyInterpAttr));
5073   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5074                                               AMDGPUOperand::ImmTyAttrChan));
5075   return MatchOperand_Success;
5076 }
5077 
5078 //===----------------------------------------------------------------------===//
5079 // exp
5080 //===----------------------------------------------------------------------===//
5081 
5082 void AMDGPUAsmParser::errorExpTgt() {
5083   Error(Parser.getTok().getLoc(), "invalid exp target");
5084 }
5085 
5086 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5087                                                       uint8_t &Val) {
5088   if (Str == "null") {
5089     Val = 9;
5090     return MatchOperand_Success;
5091   }
5092 
5093   if (Str.startswith("mrt")) {
5094     Str = Str.drop_front(3);
5095     if (Str == "z") { // == mrtz
5096       Val = 8;
5097       return MatchOperand_Success;
5098     }
5099 
5100     if (Str.getAsInteger(10, Val))
5101       return MatchOperand_ParseFail;
5102 
5103     if (Val > 7)
5104       errorExpTgt();
5105 
5106     return MatchOperand_Success;
5107   }
5108 
5109   if (Str.startswith("pos")) {
5110     Str = Str.drop_front(3);
5111     if (Str.getAsInteger(10, Val))
5112       return MatchOperand_ParseFail;
5113 
5114     if (Val > 4 || (Val == 4 && !isGFX10()))
5115       errorExpTgt();
5116 
5117     Val += 12;
5118     return MatchOperand_Success;
5119   }
5120 
5121   if (isGFX10() && Str == "prim") {
5122     Val = 20;
5123     return MatchOperand_Success;
5124   }
5125 
5126   if (Str.startswith("param")) {
5127     Str = Str.drop_front(5);
5128     if (Str.getAsInteger(10, Val))
5129       return MatchOperand_ParseFail;
5130 
5131     if (Val >= 32)
5132       errorExpTgt();
5133 
5134     Val += 32;
5135     return MatchOperand_Success;
5136   }
5137 
5138   if (Str.startswith("invalid_target_")) {
5139     Str = Str.drop_front(15);
5140     if (Str.getAsInteger(10, Val))
5141       return MatchOperand_ParseFail;
5142 
5143     errorExpTgt();
5144     return MatchOperand_Success;
5145   }
5146 
5147   return MatchOperand_NoMatch;
5148 }
5149 
5150 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5151   uint8_t Val;
5152   StringRef Str = Parser.getTok().getString();
5153 
5154   auto Res = parseExpTgtImpl(Str, Val);
5155   if (Res != MatchOperand_Success)
5156     return Res;
5157 
5158   SMLoc S = Parser.getTok().getLoc();
5159   Parser.Lex();
5160 
5161   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5162                                               AMDGPUOperand::ImmTyExpTgt));
5163   return MatchOperand_Success;
5164 }
5165 
5166 //===----------------------------------------------------------------------===//
5167 // parser helpers
5168 //===----------------------------------------------------------------------===//
5169 
5170 bool
5171 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5172   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5173 }
5174 
5175 bool
5176 AMDGPUAsmParser::isId(const StringRef Id) const {
5177   return isId(getToken(), Id);
5178 }
5179 
5180 bool
5181 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5182   return getTokenKind() == Kind;
5183 }
5184 
5185 bool
5186 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5187   if (isId(Id)) {
5188     lex();
5189     return true;
5190   }
5191   return false;
5192 }
5193 
5194 bool
5195 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5196   if (isId(Id) && peekToken().is(Kind)) {
5197     lex();
5198     lex();
5199     return true;
5200   }
5201   return false;
5202 }
5203 
5204 bool
5205 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5206   if (isToken(Kind)) {
5207     lex();
5208     return true;
5209   }
5210   return false;
5211 }
5212 
5213 bool
5214 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5215                            const StringRef ErrMsg) {
5216   if (!trySkipToken(Kind)) {
5217     Error(getLoc(), ErrMsg);
5218     return false;
5219   }
5220   return true;
5221 }
5222 
5223 bool
5224 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5225   return !getParser().parseAbsoluteExpression(Imm);
5226 }
5227 
5228 bool
5229 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5230   if (isToken(AsmToken::String)) {
5231     Val = getToken().getStringContents();
5232     lex();
5233     return true;
5234   } else {
5235     Error(getLoc(), ErrMsg);
5236     return false;
5237   }
5238 }
5239 
5240 AsmToken
5241 AMDGPUAsmParser::getToken() const {
5242   return Parser.getTok();
5243 }
5244 
5245 AsmToken
5246 AMDGPUAsmParser::peekToken() {
5247   return getLexer().peekTok();
5248 }
5249 
5250 void
5251 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5252   auto TokCount = getLexer().peekTokens(Tokens);
5253 
5254   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5255     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5256 }
5257 
5258 AsmToken::TokenKind
5259 AMDGPUAsmParser::getTokenKind() const {
5260   return getLexer().getKind();
5261 }
5262 
5263 SMLoc
5264 AMDGPUAsmParser::getLoc() const {
5265   return getToken().getLoc();
5266 }
5267 
5268 StringRef
5269 AMDGPUAsmParser::getTokenStr() const {
5270   return getToken().getString();
5271 }
5272 
5273 void
5274 AMDGPUAsmParser::lex() {
5275   Parser.Lex();
5276 }
5277 
5278 //===----------------------------------------------------------------------===//
5279 // swizzle
5280 //===----------------------------------------------------------------------===//
5281 
5282 LLVM_READNONE
5283 static unsigned
5284 encodeBitmaskPerm(const unsigned AndMask,
5285                   const unsigned OrMask,
5286                   const unsigned XorMask) {
5287   using namespace llvm::AMDGPU::Swizzle;
5288 
5289   return BITMASK_PERM_ENC |
5290          (AndMask << BITMASK_AND_SHIFT) |
5291          (OrMask  << BITMASK_OR_SHIFT)  |
5292          (XorMask << BITMASK_XOR_SHIFT);
5293 }
5294 
5295 bool
5296 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5297                                       const unsigned MinVal,
5298                                       const unsigned MaxVal,
5299                                       const StringRef ErrMsg) {
5300   for (unsigned i = 0; i < OpNum; ++i) {
5301     if (!skipToken(AsmToken::Comma, "expected a comma")){
5302       return false;
5303     }
5304     SMLoc ExprLoc = Parser.getTok().getLoc();
5305     if (!parseExpr(Op[i])) {
5306       return false;
5307     }
5308     if (Op[i] < MinVal || Op[i] > MaxVal) {
5309       Error(ExprLoc, ErrMsg);
5310       return false;
5311     }
5312   }
5313 
5314   return true;
5315 }
5316 
5317 bool
5318 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5319   using namespace llvm::AMDGPU::Swizzle;
5320 
5321   int64_t Lane[LANE_NUM];
5322   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5323                            "expected a 2-bit lane id")) {
5324     Imm = QUAD_PERM_ENC;
5325     for (unsigned I = 0; I < LANE_NUM; ++I) {
5326       Imm |= Lane[I] << (LANE_SHIFT * I);
5327     }
5328     return true;
5329   }
5330   return false;
5331 }
5332 
5333 bool
5334 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5335   using namespace llvm::AMDGPU::Swizzle;
5336 
5337   SMLoc S = Parser.getTok().getLoc();
5338   int64_t GroupSize;
5339   int64_t LaneIdx;
5340 
5341   if (!parseSwizzleOperands(1, &GroupSize,
5342                             2, 32,
5343                             "group size must be in the interval [2,32]")) {
5344     return false;
5345   }
5346   if (!isPowerOf2_64(GroupSize)) {
5347     Error(S, "group size must be a power of two");
5348     return false;
5349   }
5350   if (parseSwizzleOperands(1, &LaneIdx,
5351                            0, GroupSize - 1,
5352                            "lane id must be in the interval [0,group size - 1]")) {
5353     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5354     return true;
5355   }
5356   return false;
5357 }
5358 
5359 bool
5360 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5361   using namespace llvm::AMDGPU::Swizzle;
5362 
5363   SMLoc S = Parser.getTok().getLoc();
5364   int64_t GroupSize;
5365 
5366   if (!parseSwizzleOperands(1, &GroupSize,
5367       2, 32, "group size must be in the interval [2,32]")) {
5368     return false;
5369   }
5370   if (!isPowerOf2_64(GroupSize)) {
5371     Error(S, "group size must be a power of two");
5372     return false;
5373   }
5374 
5375   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5376   return true;
5377 }
5378 
5379 bool
5380 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5381   using namespace llvm::AMDGPU::Swizzle;
5382 
5383   SMLoc S = Parser.getTok().getLoc();
5384   int64_t GroupSize;
5385 
5386   if (!parseSwizzleOperands(1, &GroupSize,
5387       1, 16, "group size must be in the interval [1,16]")) {
5388     return false;
5389   }
5390   if (!isPowerOf2_64(GroupSize)) {
5391     Error(S, "group size must be a power of two");
5392     return false;
5393   }
5394 
5395   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5396   return true;
5397 }
5398 
5399 bool
5400 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5401   using namespace llvm::AMDGPU::Swizzle;
5402 
5403   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5404     return false;
5405   }
5406 
5407   StringRef Ctl;
5408   SMLoc StrLoc = Parser.getTok().getLoc();
5409   if (!parseString(Ctl)) {
5410     return false;
5411   }
5412   if (Ctl.size() != BITMASK_WIDTH) {
5413     Error(StrLoc, "expected a 5-character mask");
5414     return false;
5415   }
5416 
5417   unsigned AndMask = 0;
5418   unsigned OrMask = 0;
5419   unsigned XorMask = 0;
5420 
5421   for (size_t i = 0; i < Ctl.size(); ++i) {
5422     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5423     switch(Ctl[i]) {
5424     default:
5425       Error(StrLoc, "invalid mask");
5426       return false;
5427     case '0':
5428       break;
5429     case '1':
5430       OrMask |= Mask;
5431       break;
5432     case 'p':
5433       AndMask |= Mask;
5434       break;
5435     case 'i':
5436       AndMask |= Mask;
5437       XorMask |= Mask;
5438       break;
5439     }
5440   }
5441 
5442   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5443   return true;
5444 }
5445 
5446 bool
5447 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5448 
5449   SMLoc OffsetLoc = Parser.getTok().getLoc();
5450 
5451   if (!parseExpr(Imm)) {
5452     return false;
5453   }
5454   if (!isUInt<16>(Imm)) {
5455     Error(OffsetLoc, "expected a 16-bit offset");
5456     return false;
5457   }
5458   return true;
5459 }
5460 
5461 bool
5462 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5463   using namespace llvm::AMDGPU::Swizzle;
5464 
5465   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5466 
5467     SMLoc ModeLoc = Parser.getTok().getLoc();
5468     bool Ok = false;
5469 
5470     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5471       Ok = parseSwizzleQuadPerm(Imm);
5472     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5473       Ok = parseSwizzleBitmaskPerm(Imm);
5474     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5475       Ok = parseSwizzleBroadcast(Imm);
5476     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5477       Ok = parseSwizzleSwap(Imm);
5478     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5479       Ok = parseSwizzleReverse(Imm);
5480     } else {
5481       Error(ModeLoc, "expected a swizzle mode");
5482     }
5483 
5484     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5485   }
5486 
5487   return false;
5488 }
5489 
5490 OperandMatchResultTy
5491 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5492   SMLoc S = Parser.getTok().getLoc();
5493   int64_t Imm = 0;
5494 
5495   if (trySkipId("offset")) {
5496 
5497     bool Ok = false;
5498     if (skipToken(AsmToken::Colon, "expected a colon")) {
5499       if (trySkipId("swizzle")) {
5500         Ok = parseSwizzleMacro(Imm);
5501       } else {
5502         Ok = parseSwizzleOffset(Imm);
5503       }
5504     }
5505 
5506     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5507 
5508     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5509   } else {
5510     // Swizzle "offset" operand is optional.
5511     // If it is omitted, try parsing other optional operands.
5512     return parseOptionalOpr(Operands);
5513   }
5514 }
5515 
5516 bool
5517 AMDGPUOperand::isSwizzle() const {
5518   return isImmTy(ImmTySwizzle);
5519 }
5520 
5521 //===----------------------------------------------------------------------===//
5522 // VGPR Index Mode
5523 //===----------------------------------------------------------------------===//
5524 
5525 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5526 
5527   using namespace llvm::AMDGPU::VGPRIndexMode;
5528 
5529   if (trySkipToken(AsmToken::RParen)) {
5530     return OFF;
5531   }
5532 
5533   int64_t Imm = 0;
5534 
5535   while (true) {
5536     unsigned Mode = 0;
5537     SMLoc S = Parser.getTok().getLoc();
5538 
5539     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5540       if (trySkipId(IdSymbolic[ModeId])) {
5541         Mode = 1 << ModeId;
5542         break;
5543       }
5544     }
5545 
5546     if (Mode == 0) {
5547       Error(S, (Imm == 0)?
5548                "expected a VGPR index mode or a closing parenthesis" :
5549                "expected a VGPR index mode");
5550       break;
5551     }
5552 
5553     if (Imm & Mode) {
5554       Error(S, "duplicate VGPR index mode");
5555       break;
5556     }
5557     Imm |= Mode;
5558 
5559     if (trySkipToken(AsmToken::RParen))
5560       break;
5561     if (!skipToken(AsmToken::Comma,
5562                    "expected a comma or a closing parenthesis"))
5563       break;
5564   }
5565 
5566   return Imm;
5567 }
5568 
5569 OperandMatchResultTy
5570 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5571 
5572   int64_t Imm = 0;
5573   SMLoc S = Parser.getTok().getLoc();
5574 
5575   if (getLexer().getKind() == AsmToken::Identifier &&
5576       Parser.getTok().getString() == "gpr_idx" &&
5577       getLexer().peekTok().is(AsmToken::LParen)) {
5578 
5579     Parser.Lex();
5580     Parser.Lex();
5581 
5582     // If parse failed, trigger an error but do not return error code
5583     // to avoid excessive error messages.
5584     Imm = parseGPRIdxMacro();
5585 
5586   } else {
5587     if (getParser().parseAbsoluteExpression(Imm))
5588       return MatchOperand_NoMatch;
5589     if (Imm < 0 || !isUInt<4>(Imm)) {
5590       Error(S, "invalid immediate: only 4-bit values are legal");
5591     }
5592   }
5593 
5594   Operands.push_back(
5595       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5596   return MatchOperand_Success;
5597 }
5598 
5599 bool AMDGPUOperand::isGPRIdxMode() const {
5600   return isImmTy(ImmTyGprIdxMode);
5601 }
5602 
5603 //===----------------------------------------------------------------------===//
5604 // sopp branch targets
5605 //===----------------------------------------------------------------------===//
5606 
5607 OperandMatchResultTy
5608 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5609   SMLoc S = Parser.getTok().getLoc();
5610 
5611   switch (getLexer().getKind()) {
5612     default: return MatchOperand_ParseFail;
5613     case AsmToken::Integer: {
5614       int64_t Imm;
5615       if (getParser().parseAbsoluteExpression(Imm))
5616         return MatchOperand_ParseFail;
5617       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
5618       return MatchOperand_Success;
5619     }
5620 
5621     case AsmToken::Identifier:
5622       Operands.push_back(AMDGPUOperand::CreateExpr(this,
5623           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
5624                                   Parser.getTok().getString()), getContext()), S));
5625       Parser.Lex();
5626       return MatchOperand_Success;
5627   }
5628 }
5629 
5630 //===----------------------------------------------------------------------===//
5631 // Boolean holding registers
5632 //===----------------------------------------------------------------------===//
5633 
5634 OperandMatchResultTy
5635 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5636   return parseReg(Operands);
5637 }
5638 
5639 //===----------------------------------------------------------------------===//
5640 // mubuf
5641 //===----------------------------------------------------------------------===//
5642 
5643 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5644   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5645 }
5646 
5647 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5648   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5649 }
5650 
5651 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5652   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5653 }
5654 
5655 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5656                                const OperandVector &Operands,
5657                                bool IsAtomic,
5658                                bool IsAtomicReturn,
5659                                bool IsLds) {
5660   bool IsLdsOpcode = IsLds;
5661   bool HasLdsModifier = false;
5662   OptionalImmIndexMap OptionalIdx;
5663   assert(IsAtomicReturn ? IsAtomic : true);
5664   unsigned FirstOperandIdx = 1;
5665 
5666   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5667     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5668 
5669     // Add the register arguments
5670     if (Op.isReg()) {
5671       Op.addRegOperands(Inst, 1);
5672       // Insert a tied src for atomic return dst.
5673       // This cannot be postponed as subsequent calls to
5674       // addImmOperands rely on correct number of MC operands.
5675       if (IsAtomicReturn && i == FirstOperandIdx)
5676         Op.addRegOperands(Inst, 1);
5677       continue;
5678     }
5679 
5680     // Handle the case where soffset is an immediate
5681     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5682       Op.addImmOperands(Inst, 1);
5683       continue;
5684     }
5685 
5686     HasLdsModifier |= Op.isLDS();
5687 
5688     // Handle tokens like 'offen' which are sometimes hard-coded into the
5689     // asm string.  There are no MCInst operands for these.
5690     if (Op.isToken()) {
5691       continue;
5692     }
5693     assert(Op.isImm());
5694 
5695     // Handle optional arguments
5696     OptionalIdx[Op.getImmTy()] = i;
5697   }
5698 
5699   // This is a workaround for an llvm quirk which may result in an
5700   // incorrect instruction selection. Lds and non-lds versions of
5701   // MUBUF instructions are identical except that lds versions
5702   // have mandatory 'lds' modifier. However this modifier follows
5703   // optional modifiers and llvm asm matcher regards this 'lds'
5704   // modifier as an optional one. As a result, an lds version
5705   // of opcode may be selected even if it has no 'lds' modifier.
5706   if (IsLdsOpcode && !HasLdsModifier) {
5707     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5708     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5709       Inst.setOpcode(NoLdsOpcode);
5710       IsLdsOpcode = false;
5711     }
5712   }
5713 
5714   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5715   if (!IsAtomic) { // glc is hard-coded.
5716     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5717   }
5718   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5719 
5720   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5721     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5722   }
5723 
5724   if (isGFX10())
5725     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5726 }
5727 
5728 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5729   OptionalImmIndexMap OptionalIdx;
5730 
5731   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5732     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5733 
5734     // Add the register arguments
5735     if (Op.isReg()) {
5736       Op.addRegOperands(Inst, 1);
5737       continue;
5738     }
5739 
5740     // Handle the case where soffset is an immediate
5741     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5742       Op.addImmOperands(Inst, 1);
5743       continue;
5744     }
5745 
5746     // Handle tokens like 'offen' which are sometimes hard-coded into the
5747     // asm string.  There are no MCInst operands for these.
5748     if (Op.isToken()) {
5749       continue;
5750     }
5751     assert(Op.isImm());
5752 
5753     // Handle optional arguments
5754     OptionalIdx[Op.getImmTy()] = i;
5755   }
5756 
5757   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5758                         AMDGPUOperand::ImmTyOffset);
5759   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5760   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5761   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5762   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5763 
5764   if (isGFX10())
5765     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5766 }
5767 
5768 //===----------------------------------------------------------------------===//
5769 // mimg
5770 //===----------------------------------------------------------------------===//
5771 
5772 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5773                               bool IsAtomic) {
5774   unsigned I = 1;
5775   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5776   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5777     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5778   }
5779 
5780   if (IsAtomic) {
5781     // Add src, same as dst
5782     assert(Desc.getNumDefs() == 1);
5783     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5784   }
5785 
5786   OptionalImmIndexMap OptionalIdx;
5787 
5788   for (unsigned E = Operands.size(); I != E; ++I) {
5789     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5790 
5791     // Add the register arguments
5792     if (Op.isReg()) {
5793       Op.addRegOperands(Inst, 1);
5794     } else if (Op.isImmModifier()) {
5795       OptionalIdx[Op.getImmTy()] = I;
5796     } else if (!Op.isToken()) {
5797       llvm_unreachable("unexpected operand type");
5798     }
5799   }
5800 
5801   bool IsGFX10 = isGFX10();
5802 
5803   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5804   if (IsGFX10)
5805     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5806   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5807   if (IsGFX10)
5808     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5809   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5810   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5811   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5812   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5813   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5814   if (!IsGFX10)
5815     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5816   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5817 }
5818 
5819 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5820   cvtMIMG(Inst, Operands, true);
5821 }
5822 
5823 //===----------------------------------------------------------------------===//
5824 // smrd
5825 //===----------------------------------------------------------------------===//
5826 
5827 bool AMDGPUOperand::isSMRDOffset8() const {
5828   return isImm() && isUInt<8>(getImm());
5829 }
5830 
5831 bool AMDGPUOperand::isSMRDOffset20() const {
5832   return isImm() && isUInt<20>(getImm());
5833 }
5834 
5835 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5836   // 32-bit literals are only supported on CI and we only want to use them
5837   // when the offset is > 8-bits.
5838   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5839 }
5840 
5841 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5842   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5843 }
5844 
5845 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5846   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5847 }
5848 
5849 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5850   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5851 }
5852 
5853 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
5854   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5855 }
5856 
5857 //===----------------------------------------------------------------------===//
5858 // vop3
5859 //===----------------------------------------------------------------------===//
5860 
5861 static bool ConvertOmodMul(int64_t &Mul) {
5862   if (Mul != 1 && Mul != 2 && Mul != 4)
5863     return false;
5864 
5865   Mul >>= 1;
5866   return true;
5867 }
5868 
5869 static bool ConvertOmodDiv(int64_t &Div) {
5870   if (Div == 1) {
5871     Div = 0;
5872     return true;
5873   }
5874 
5875   if (Div == 2) {
5876     Div = 3;
5877     return true;
5878   }
5879 
5880   return false;
5881 }
5882 
5883 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5884   if (BoundCtrl == 0) {
5885     BoundCtrl = 1;
5886     return true;
5887   }
5888 
5889   if (BoundCtrl == -1) {
5890     BoundCtrl = 0;
5891     return true;
5892   }
5893 
5894   return false;
5895 }
5896 
5897 // Note: the order in this table matches the order of operands in AsmString.
5898 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5899   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5900   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5901   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5902   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5903   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5904   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5905   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5906   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5907   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5908   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
5909   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5910   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5911   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5912   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5913   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5914   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5915   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5916   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5917   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5918   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5919   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5920   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5921   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5922   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5923   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5924   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
5925   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5926   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5927   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5928   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
5929   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5930   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5931   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5932   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5933   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5934   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5935   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5936   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5937   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5938   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
5939   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
5940   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
5941   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
5942 };
5943 
5944 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5945   unsigned size = Operands.size();
5946   assert(size > 0);
5947 
5948   OperandMatchResultTy res = parseOptionalOpr(Operands);
5949 
5950   // This is a hack to enable hardcoded mandatory operands which follow
5951   // optional operands.
5952   //
5953   // Current design assumes that all operands after the first optional operand
5954   // are also optional. However implementation of some instructions violates
5955   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5956   //
5957   // To alleviate this problem, we have to (implicitly) parse extra operands
5958   // to make sure autogenerated parser of custom operands never hit hardcoded
5959   // mandatory operands.
5960 
5961   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5962 
5963     // We have parsed the first optional operand.
5964     // Parse as many operands as necessary to skip all mandatory operands.
5965 
5966     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5967       if (res != MatchOperand_Success ||
5968           getLexer().is(AsmToken::EndOfStatement)) break;
5969       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5970       res = parseOptionalOpr(Operands);
5971     }
5972   }
5973 
5974   return res;
5975 }
5976 
5977 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
5978   OperandMatchResultTy res;
5979   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
5980     // try to parse any optional operand here
5981     if (Op.IsBit) {
5982       res = parseNamedBit(Op.Name, Operands, Op.Type);
5983     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
5984       res = parseOModOperand(Operands);
5985     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
5986                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
5987                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
5988       res = parseSDWASel(Operands, Op.Name, Op.Type);
5989     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
5990       res = parseSDWADstUnused(Operands);
5991     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
5992                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
5993                Op.Type == AMDGPUOperand::ImmTyNegLo ||
5994                Op.Type == AMDGPUOperand::ImmTyNegHi) {
5995       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
5996                                         Op.ConvertResult);
5997     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
5998       res = parseDim(Operands);
5999     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6000       res = parseDfmtNfmt(Operands);
6001     } else {
6002       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6003     }
6004     if (res != MatchOperand_NoMatch) {
6005       return res;
6006     }
6007   }
6008   return MatchOperand_NoMatch;
6009 }
6010 
6011 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6012   StringRef Name = Parser.getTok().getString();
6013   if (Name == "mul") {
6014     return parseIntWithPrefix("mul", Operands,
6015                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6016   }
6017 
6018   if (Name == "div") {
6019     return parseIntWithPrefix("div", Operands,
6020                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6021   }
6022 
6023   return MatchOperand_NoMatch;
6024 }
6025 
6026 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6027   cvtVOP3P(Inst, Operands);
6028 
6029   int Opc = Inst.getOpcode();
6030 
6031   int SrcNum;
6032   const int Ops[] = { AMDGPU::OpName::src0,
6033                       AMDGPU::OpName::src1,
6034                       AMDGPU::OpName::src2 };
6035   for (SrcNum = 0;
6036        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6037        ++SrcNum);
6038   assert(SrcNum > 0);
6039 
6040   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6041   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6042 
6043   if ((OpSel & (1 << SrcNum)) != 0) {
6044     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6045     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6046     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6047   }
6048 }
6049 
6050 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6051       // 1. This operand is input modifiers
6052   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6053       // 2. This is not last operand
6054       && Desc.NumOperands > (OpNum + 1)
6055       // 3. Next operand is register class
6056       && Desc.OpInfo[OpNum + 1].RegClass != -1
6057       // 4. Next register is not tied to any other operand
6058       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6059 }
6060 
6061 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6062 {
6063   OptionalImmIndexMap OptionalIdx;
6064   unsigned Opc = Inst.getOpcode();
6065 
6066   unsigned I = 1;
6067   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6068   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6069     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6070   }
6071 
6072   for (unsigned E = Operands.size(); I != E; ++I) {
6073     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6074     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6075       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6076     } else if (Op.isInterpSlot() ||
6077                Op.isInterpAttr() ||
6078                Op.isAttrChan()) {
6079       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6080     } else if (Op.isImmModifier()) {
6081       OptionalIdx[Op.getImmTy()] = I;
6082     } else {
6083       llvm_unreachable("unhandled operand type");
6084     }
6085   }
6086 
6087   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6088     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6089   }
6090 
6091   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6092     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6093   }
6094 
6095   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6096     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6097   }
6098 }
6099 
6100 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6101                               OptionalImmIndexMap &OptionalIdx) {
6102   unsigned Opc = Inst.getOpcode();
6103 
6104   unsigned I = 1;
6105   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6106   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6107     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6108   }
6109 
6110   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6111     // This instruction has src modifiers
6112     for (unsigned E = Operands.size(); I != E; ++I) {
6113       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6114       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6115         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6116       } else if (Op.isImmModifier()) {
6117         OptionalIdx[Op.getImmTy()] = I;
6118       } else if (Op.isRegOrImm()) {
6119         Op.addRegOrImmOperands(Inst, 1);
6120       } else {
6121         llvm_unreachable("unhandled operand type");
6122       }
6123     }
6124   } else {
6125     // No src modifiers
6126     for (unsigned E = Operands.size(); I != E; ++I) {
6127       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6128       if (Op.isMod()) {
6129         OptionalIdx[Op.getImmTy()] = I;
6130       } else {
6131         Op.addRegOrImmOperands(Inst, 1);
6132       }
6133     }
6134   }
6135 
6136   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6137     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6138   }
6139 
6140   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6141     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6142   }
6143 
6144   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6145   // it has src2 register operand that is tied to dst operand
6146   // we don't allow modifiers for this operand in assembler so src2_modifiers
6147   // should be 0.
6148   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6149       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6150       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6151       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6152       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6153       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6154       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6155     auto it = Inst.begin();
6156     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6157     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6158     ++it;
6159     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6160   }
6161 }
6162 
6163 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6164   OptionalImmIndexMap OptionalIdx;
6165   cvtVOP3(Inst, Operands, OptionalIdx);
6166 }
6167 
6168 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6169                                const OperandVector &Operands) {
6170   OptionalImmIndexMap OptIdx;
6171   const int Opc = Inst.getOpcode();
6172   const MCInstrDesc &Desc = MII.get(Opc);
6173 
6174   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6175 
6176   cvtVOP3(Inst, Operands, OptIdx);
6177 
6178   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6179     assert(!IsPacked);
6180     Inst.addOperand(Inst.getOperand(0));
6181   }
6182 
6183   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6184   // instruction, and then figure out where to actually put the modifiers
6185 
6186   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6187 
6188   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6189   if (OpSelHiIdx != -1) {
6190     int DefaultVal = IsPacked ? -1 : 0;
6191     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6192                           DefaultVal);
6193   }
6194 
6195   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6196   if (NegLoIdx != -1) {
6197     assert(IsPacked);
6198     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6199     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6200   }
6201 
6202   const int Ops[] = { AMDGPU::OpName::src0,
6203                       AMDGPU::OpName::src1,
6204                       AMDGPU::OpName::src2 };
6205   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6206                          AMDGPU::OpName::src1_modifiers,
6207                          AMDGPU::OpName::src2_modifiers };
6208 
6209   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6210 
6211   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6212   unsigned OpSelHi = 0;
6213   unsigned NegLo = 0;
6214   unsigned NegHi = 0;
6215 
6216   if (OpSelHiIdx != -1) {
6217     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6218   }
6219 
6220   if (NegLoIdx != -1) {
6221     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6222     NegLo = Inst.getOperand(NegLoIdx).getImm();
6223     NegHi = Inst.getOperand(NegHiIdx).getImm();
6224   }
6225 
6226   for (int J = 0; J < 3; ++J) {
6227     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6228     if (OpIdx == -1)
6229       break;
6230 
6231     uint32_t ModVal = 0;
6232 
6233     if ((OpSel & (1 << J)) != 0)
6234       ModVal |= SISrcMods::OP_SEL_0;
6235 
6236     if ((OpSelHi & (1 << J)) != 0)
6237       ModVal |= SISrcMods::OP_SEL_1;
6238 
6239     if ((NegLo & (1 << J)) != 0)
6240       ModVal |= SISrcMods::NEG;
6241 
6242     if ((NegHi & (1 << J)) != 0)
6243       ModVal |= SISrcMods::NEG_HI;
6244 
6245     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6246 
6247     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6248   }
6249 }
6250 
6251 //===----------------------------------------------------------------------===//
6252 // dpp
6253 //===----------------------------------------------------------------------===//
6254 
6255 bool AMDGPUOperand::isDPP8() const {
6256   return isImmTy(ImmTyDPP8);
6257 }
6258 
6259 bool AMDGPUOperand::isDPPCtrl() const {
6260   using namespace AMDGPU::DPP;
6261 
6262   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6263   if (result) {
6264     int64_t Imm = getImm();
6265     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6266            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6267            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6268            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6269            (Imm == DppCtrl::WAVE_SHL1) ||
6270            (Imm == DppCtrl::WAVE_ROL1) ||
6271            (Imm == DppCtrl::WAVE_SHR1) ||
6272            (Imm == DppCtrl::WAVE_ROR1) ||
6273            (Imm == DppCtrl::ROW_MIRROR) ||
6274            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6275            (Imm == DppCtrl::BCAST15) ||
6276            (Imm == DppCtrl::BCAST31) ||
6277            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6278            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6279   }
6280   return false;
6281 }
6282 
6283 //===----------------------------------------------------------------------===//
6284 // mAI
6285 //===----------------------------------------------------------------------===//
6286 
6287 bool AMDGPUOperand::isBLGP() const {
6288   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6289 }
6290 
6291 bool AMDGPUOperand::isCBSZ() const {
6292   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6293 }
6294 
6295 bool AMDGPUOperand::isABID() const {
6296   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6297 }
6298 
6299 bool AMDGPUOperand::isS16Imm() const {
6300   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6301 }
6302 
6303 bool AMDGPUOperand::isU16Imm() const {
6304   return isImm() && isUInt<16>(getImm());
6305 }
6306 
6307 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6308   if (!isGFX10())
6309     return MatchOperand_NoMatch;
6310 
6311   SMLoc S = Parser.getTok().getLoc();
6312 
6313   if (getLexer().isNot(AsmToken::Identifier))
6314     return MatchOperand_NoMatch;
6315   if (getLexer().getTok().getString() != "dim")
6316     return MatchOperand_NoMatch;
6317 
6318   Parser.Lex();
6319   if (getLexer().isNot(AsmToken::Colon))
6320     return MatchOperand_ParseFail;
6321 
6322   Parser.Lex();
6323 
6324   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6325   // integer.
6326   std::string Token;
6327   if (getLexer().is(AsmToken::Integer)) {
6328     SMLoc Loc = getLexer().getTok().getEndLoc();
6329     Token = getLexer().getTok().getString();
6330     Parser.Lex();
6331     if (getLexer().getTok().getLoc() != Loc)
6332       return MatchOperand_ParseFail;
6333   }
6334   if (getLexer().isNot(AsmToken::Identifier))
6335     return MatchOperand_ParseFail;
6336   Token += getLexer().getTok().getString();
6337 
6338   StringRef DimId = Token;
6339   if (DimId.startswith("SQ_RSRC_IMG_"))
6340     DimId = DimId.substr(12);
6341 
6342   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6343   if (!DimInfo)
6344     return MatchOperand_ParseFail;
6345 
6346   Parser.Lex();
6347 
6348   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6349                                               AMDGPUOperand::ImmTyDim));
6350   return MatchOperand_Success;
6351 }
6352 
6353 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6354   SMLoc S = Parser.getTok().getLoc();
6355   StringRef Prefix;
6356 
6357   if (getLexer().getKind() == AsmToken::Identifier) {
6358     Prefix = Parser.getTok().getString();
6359   } else {
6360     return MatchOperand_NoMatch;
6361   }
6362 
6363   if (Prefix != "dpp8")
6364     return parseDPPCtrl(Operands);
6365   if (!isGFX10())
6366     return MatchOperand_NoMatch;
6367 
6368   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6369 
6370   int64_t Sels[8];
6371 
6372   Parser.Lex();
6373   if (getLexer().isNot(AsmToken::Colon))
6374     return MatchOperand_ParseFail;
6375 
6376   Parser.Lex();
6377   if (getLexer().isNot(AsmToken::LBrac))
6378     return MatchOperand_ParseFail;
6379 
6380   Parser.Lex();
6381   if (getParser().parseAbsoluteExpression(Sels[0]))
6382     return MatchOperand_ParseFail;
6383   if (0 > Sels[0] || 7 < Sels[0])
6384     return MatchOperand_ParseFail;
6385 
6386   for (size_t i = 1; i < 8; ++i) {
6387     if (getLexer().isNot(AsmToken::Comma))
6388       return MatchOperand_ParseFail;
6389 
6390     Parser.Lex();
6391     if (getParser().parseAbsoluteExpression(Sels[i]))
6392       return MatchOperand_ParseFail;
6393     if (0 > Sels[i] || 7 < Sels[i])
6394       return MatchOperand_ParseFail;
6395   }
6396 
6397   if (getLexer().isNot(AsmToken::RBrac))
6398     return MatchOperand_ParseFail;
6399   Parser.Lex();
6400 
6401   unsigned DPP8 = 0;
6402   for (size_t i = 0; i < 8; ++i)
6403     DPP8 |= (Sels[i] << (i * 3));
6404 
6405   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6406   return MatchOperand_Success;
6407 }
6408 
6409 OperandMatchResultTy
6410 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6411   using namespace AMDGPU::DPP;
6412 
6413   SMLoc S = Parser.getTok().getLoc();
6414   StringRef Prefix;
6415   int64_t Int;
6416 
6417   if (getLexer().getKind() == AsmToken::Identifier) {
6418     Prefix = Parser.getTok().getString();
6419   } else {
6420     return MatchOperand_NoMatch;
6421   }
6422 
6423   if (Prefix == "row_mirror") {
6424     Int = DppCtrl::ROW_MIRROR;
6425     Parser.Lex();
6426   } else if (Prefix == "row_half_mirror") {
6427     Int = DppCtrl::ROW_HALF_MIRROR;
6428     Parser.Lex();
6429   } else {
6430     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6431     if (Prefix != "quad_perm"
6432         && Prefix != "row_shl"
6433         && Prefix != "row_shr"
6434         && Prefix != "row_ror"
6435         && Prefix != "wave_shl"
6436         && Prefix != "wave_rol"
6437         && Prefix != "wave_shr"
6438         && Prefix != "wave_ror"
6439         && Prefix != "row_bcast"
6440         && Prefix != "row_share"
6441         && Prefix != "row_xmask") {
6442       return MatchOperand_NoMatch;
6443     }
6444 
6445     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6446       return MatchOperand_NoMatch;
6447 
6448     if (!isVI() && !isGFX9() &&
6449         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6450          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6451          Prefix == "row_bcast"))
6452       return MatchOperand_NoMatch;
6453 
6454     Parser.Lex();
6455     if (getLexer().isNot(AsmToken::Colon))
6456       return MatchOperand_ParseFail;
6457 
6458     if (Prefix == "quad_perm") {
6459       // quad_perm:[%d,%d,%d,%d]
6460       Parser.Lex();
6461       if (getLexer().isNot(AsmToken::LBrac))
6462         return MatchOperand_ParseFail;
6463       Parser.Lex();
6464 
6465       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6466         return MatchOperand_ParseFail;
6467 
6468       for (int i = 0; i < 3; ++i) {
6469         if (getLexer().isNot(AsmToken::Comma))
6470           return MatchOperand_ParseFail;
6471         Parser.Lex();
6472 
6473         int64_t Temp;
6474         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6475           return MatchOperand_ParseFail;
6476         const int shift = i*2 + 2;
6477         Int += (Temp << shift);
6478       }
6479 
6480       if (getLexer().isNot(AsmToken::RBrac))
6481         return MatchOperand_ParseFail;
6482       Parser.Lex();
6483     } else {
6484       // sel:%d
6485       Parser.Lex();
6486       if (getParser().parseAbsoluteExpression(Int))
6487         return MatchOperand_ParseFail;
6488 
6489       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6490         Int |= DppCtrl::ROW_SHL0;
6491       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6492         Int |= DppCtrl::ROW_SHR0;
6493       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6494         Int |= DppCtrl::ROW_ROR0;
6495       } else if (Prefix == "wave_shl" && 1 == Int) {
6496         Int = DppCtrl::WAVE_SHL1;
6497       } else if (Prefix == "wave_rol" && 1 == Int) {
6498         Int = DppCtrl::WAVE_ROL1;
6499       } else if (Prefix == "wave_shr" && 1 == Int) {
6500         Int = DppCtrl::WAVE_SHR1;
6501       } else if (Prefix == "wave_ror" && 1 == Int) {
6502         Int = DppCtrl::WAVE_ROR1;
6503       } else if (Prefix == "row_bcast") {
6504         if (Int == 15) {
6505           Int = DppCtrl::BCAST15;
6506         } else if (Int == 31) {
6507           Int = DppCtrl::BCAST31;
6508         } else {
6509           return MatchOperand_ParseFail;
6510         }
6511       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6512         Int |= DppCtrl::ROW_SHARE_FIRST;
6513       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6514         Int |= DppCtrl::ROW_XMASK_FIRST;
6515       } else {
6516         return MatchOperand_ParseFail;
6517       }
6518     }
6519   }
6520 
6521   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6522   return MatchOperand_Success;
6523 }
6524 
6525 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6526   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6527 }
6528 
6529 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6530   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6531 }
6532 
6533 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6534   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6535 }
6536 
6537 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6538   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6539 }
6540 
6541 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6542   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6543 }
6544 
6545 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6546   OptionalImmIndexMap OptionalIdx;
6547 
6548   unsigned I = 1;
6549   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6550   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6551     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6552   }
6553 
6554   int Fi = 0;
6555   for (unsigned E = Operands.size(); I != E; ++I) {
6556     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6557                                             MCOI::TIED_TO);
6558     if (TiedTo != -1) {
6559       assert((unsigned)TiedTo < Inst.getNumOperands());
6560       // handle tied old or src2 for MAC instructions
6561       Inst.addOperand(Inst.getOperand(TiedTo));
6562     }
6563     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6564     // Add the register arguments
6565     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6566       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6567       // Skip it.
6568       continue;
6569     }
6570 
6571     if (IsDPP8) {
6572       if (Op.isDPP8()) {
6573         Op.addImmOperands(Inst, 1);
6574       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6575         Op.addRegWithFPInputModsOperands(Inst, 2);
6576       } else if (Op.isFI()) {
6577         Fi = Op.getImm();
6578       } else if (Op.isReg()) {
6579         Op.addRegOperands(Inst, 1);
6580       } else {
6581         llvm_unreachable("Invalid operand type");
6582       }
6583     } else {
6584       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6585         Op.addRegWithFPInputModsOperands(Inst, 2);
6586       } else if (Op.isDPPCtrl()) {
6587         Op.addImmOperands(Inst, 1);
6588       } else if (Op.isImm()) {
6589         // Handle optional arguments
6590         OptionalIdx[Op.getImmTy()] = I;
6591       } else {
6592         llvm_unreachable("Invalid operand type");
6593       }
6594     }
6595   }
6596 
6597   if (IsDPP8) {
6598     using namespace llvm::AMDGPU::DPP;
6599     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6600   } else {
6601     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6602     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6603     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6604     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6605       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6606     }
6607   }
6608 }
6609 
6610 //===----------------------------------------------------------------------===//
6611 // sdwa
6612 //===----------------------------------------------------------------------===//
6613 
6614 OperandMatchResultTy
6615 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6616                               AMDGPUOperand::ImmTy Type) {
6617   using namespace llvm::AMDGPU::SDWA;
6618 
6619   SMLoc S = Parser.getTok().getLoc();
6620   StringRef Value;
6621   OperandMatchResultTy res;
6622 
6623   res = parseStringWithPrefix(Prefix, Value);
6624   if (res != MatchOperand_Success) {
6625     return res;
6626   }
6627 
6628   int64_t Int;
6629   Int = StringSwitch<int64_t>(Value)
6630         .Case("BYTE_0", SdwaSel::BYTE_0)
6631         .Case("BYTE_1", SdwaSel::BYTE_1)
6632         .Case("BYTE_2", SdwaSel::BYTE_2)
6633         .Case("BYTE_3", SdwaSel::BYTE_3)
6634         .Case("WORD_0", SdwaSel::WORD_0)
6635         .Case("WORD_1", SdwaSel::WORD_1)
6636         .Case("DWORD", SdwaSel::DWORD)
6637         .Default(0xffffffff);
6638   Parser.Lex(); // eat last token
6639 
6640   if (Int == 0xffffffff) {
6641     return MatchOperand_ParseFail;
6642   }
6643 
6644   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6645   return MatchOperand_Success;
6646 }
6647 
6648 OperandMatchResultTy
6649 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6650   using namespace llvm::AMDGPU::SDWA;
6651 
6652   SMLoc S = Parser.getTok().getLoc();
6653   StringRef Value;
6654   OperandMatchResultTy res;
6655 
6656   res = parseStringWithPrefix("dst_unused", Value);
6657   if (res != MatchOperand_Success) {
6658     return res;
6659   }
6660 
6661   int64_t Int;
6662   Int = StringSwitch<int64_t>(Value)
6663         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6664         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6665         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6666         .Default(0xffffffff);
6667   Parser.Lex(); // eat last token
6668 
6669   if (Int == 0xffffffff) {
6670     return MatchOperand_ParseFail;
6671   }
6672 
6673   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6674   return MatchOperand_Success;
6675 }
6676 
6677 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6678   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6679 }
6680 
6681 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6682   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6683 }
6684 
6685 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6686   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6687 }
6688 
6689 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6690   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6691 }
6692 
6693 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6694                               uint64_t BasicInstType, bool skipVcc) {
6695   using namespace llvm::AMDGPU::SDWA;
6696 
6697   OptionalImmIndexMap OptionalIdx;
6698   bool skippedVcc = false;
6699 
6700   unsigned I = 1;
6701   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6702   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6703     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6704   }
6705 
6706   for (unsigned E = Operands.size(); I != E; ++I) {
6707     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6708     if (skipVcc && !skippedVcc && Op.isReg() &&
6709         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6710       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6711       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6712       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6713       // Skip VCC only if we didn't skip it on previous iteration.
6714       if (BasicInstType == SIInstrFlags::VOP2 &&
6715           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6716         skippedVcc = true;
6717         continue;
6718       } else if (BasicInstType == SIInstrFlags::VOPC &&
6719                  Inst.getNumOperands() == 0) {
6720         skippedVcc = true;
6721         continue;
6722       }
6723     }
6724     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6725       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6726     } else if (Op.isImm()) {
6727       // Handle optional arguments
6728       OptionalIdx[Op.getImmTy()] = I;
6729     } else {
6730       llvm_unreachable("Invalid operand type");
6731     }
6732     skippedVcc = false;
6733   }
6734 
6735   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6736       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6737       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6738     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6739     switch (BasicInstType) {
6740     case SIInstrFlags::VOP1:
6741       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6742       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6743         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6744       }
6745       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6746       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6747       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6748       break;
6749 
6750     case SIInstrFlags::VOP2:
6751       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6752       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6753         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6754       }
6755       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6756       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6757       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6758       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6759       break;
6760 
6761     case SIInstrFlags::VOPC:
6762       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6763         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6764       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6765       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6766       break;
6767 
6768     default:
6769       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6770     }
6771   }
6772 
6773   // special case v_mac_{f16, f32}:
6774   // it has src2 register operand that is tied to dst operand
6775   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6776       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6777     auto it = Inst.begin();
6778     std::advance(
6779       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6780     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6781   }
6782 }
6783 
6784 //===----------------------------------------------------------------------===//
6785 // mAI
6786 //===----------------------------------------------------------------------===//
6787 
6788 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
6789   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
6790 }
6791 
6792 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
6793   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
6794 }
6795 
6796 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
6797   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
6798 }
6799 
6800 /// Force static initialization.
6801 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6802   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6803   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6804 }
6805 
6806 #define GET_REGISTER_MATCHER
6807 #define GET_MATCHER_IMPLEMENTATION
6808 #define GET_MNEMONIC_SPELL_CHECKER
6809 #include "AMDGPUGenAsmMatcher.inc"
6810 
6811 // This fuction should be defined after auto-generated include so that we have
6812 // MatchClassKind enum defined
6813 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6814                                                      unsigned Kind) {
6815   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6816   // But MatchInstructionImpl() expects to meet token and fails to validate
6817   // operand. This method checks if we are given immediate operand but expect to
6818   // get corresponding token.
6819   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6820   switch (Kind) {
6821   case MCK_addr64:
6822     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6823   case MCK_gds:
6824     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6825   case MCK_lds:
6826     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6827   case MCK_glc:
6828     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6829   case MCK_idxen:
6830     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6831   case MCK_offen:
6832     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6833   case MCK_SSrcB32:
6834     // When operands have expression values, they will return true for isToken,
6835     // because it is not possible to distinguish between a token and an
6836     // expression at parse time. MatchInstructionImpl() will always try to
6837     // match an operand as a token, when isToken returns true, and when the
6838     // name of the expression is not a valid token, the match will fail,
6839     // so we need to handle it here.
6840     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6841   case MCK_SSrcF32:
6842     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6843   case MCK_SoppBrTarget:
6844     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6845   case MCK_VReg32OrOff:
6846     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6847   case MCK_InterpSlot:
6848     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6849   case MCK_Attr:
6850     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6851   case MCK_AttrChan:
6852     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6853   default:
6854     return Match_InvalidOperand;
6855   }
6856 }
6857 
6858 //===----------------------------------------------------------------------===//
6859 // endpgm
6860 //===----------------------------------------------------------------------===//
6861 
6862 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6863   SMLoc S = Parser.getTok().getLoc();
6864   int64_t Imm = 0;
6865 
6866   if (!parseExpr(Imm)) {
6867     // The operand is optional, if not present default to 0
6868     Imm = 0;
6869   }
6870 
6871   if (!isUInt<16>(Imm)) {
6872     Error(S, "expected a 16-bit value");
6873     return MatchOperand_ParseFail;
6874   }
6875 
6876   Operands.push_back(
6877       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6878   return MatchOperand_Success;
6879 }
6880 
6881 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
6882