1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTyTFE,
147     ImmTyD16,
148     ImmTyClampSI,
149     ImmTyOModSI,
150     ImmTyDPP8,
151     ImmTyDppCtrl,
152     ImmTyDppRowMask,
153     ImmTyDppBankMask,
154     ImmTyDppBoundCtrl,
155     ImmTyDppFi,
156     ImmTySdwaDstSel,
157     ImmTySdwaSrc0Sel,
158     ImmTySdwaSrc1Sel,
159     ImmTySdwaDstUnused,
160     ImmTyDMask,
161     ImmTyDim,
162     ImmTyUNorm,
163     ImmTyDA,
164     ImmTyR128A16,
165     ImmTyLWE,
166     ImmTyExpTgt,
167     ImmTyExpCompr,
168     ImmTyExpVM,
169     ImmTyFORMAT,
170     ImmTyHwreg,
171     ImmTyOff,
172     ImmTySendMsg,
173     ImmTyInterpSlot,
174     ImmTyInterpAttr,
175     ImmTyAttrChan,
176     ImmTyOpSel,
177     ImmTyOpSelHi,
178     ImmTyNegLo,
179     ImmTyNegHi,
180     ImmTySwizzle,
181     ImmTyGprIdxMode,
182     ImmTyHigh,
183     ImmTyBLGP,
184     ImmTyCBSZ,
185     ImmTyABID,
186     ImmTyEndpgm,
187   };
188 
189 private:
190   struct TokOp {
191     const char *Data;
192     unsigned Length;
193   };
194 
195   struct ImmOp {
196     int64_t Val;
197     ImmTy Type;
198     bool IsFPImm;
199     Modifiers Mods;
200   };
201 
202   struct RegOp {
203     unsigned RegNo;
204     Modifiers Mods;
205   };
206 
207   union {
208     TokOp Tok;
209     ImmOp Imm;
210     RegOp Reg;
211     const MCExpr *Expr;
212   };
213 
214 public:
215   bool isToken() const override {
216     if (Kind == Token)
217       return true;
218 
219     // When parsing operands, we can't always tell if something was meant to be
220     // a token, like 'gds', or an expression that references a global variable.
221     // In this case, we assume the string is an expression, and if we need to
222     // interpret is a token, then we treat the symbol name as the token.
223     return isSymbolRefExpr();
224   }
225 
226   bool isSymbolRefExpr() const {
227     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
228   }
229 
230   bool isImm() const override {
231     return Kind == Immediate;
232   }
233 
234   bool isInlinableImm(MVT type) const;
235   bool isLiteralImm(MVT type) const;
236 
237   bool isRegKind() const {
238     return Kind == Register;
239   }
240 
241   bool isReg() const override {
242     return isRegKind() && !hasModifiers();
243   }
244 
245   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
246     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
247   }
248 
249   bool isRegOrImmWithInt16InputMods() const {
250     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
251   }
252 
253   bool isRegOrImmWithInt32InputMods() const {
254     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
255   }
256 
257   bool isRegOrImmWithInt64InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
259   }
260 
261   bool isRegOrImmWithFP16InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
263   }
264 
265   bool isRegOrImmWithFP32InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
267   }
268 
269   bool isRegOrImmWithFP64InputMods() const {
270     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
271   }
272 
273   bool isVReg() const {
274     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
275            isRegClass(AMDGPU::VReg_64RegClassID) ||
276            isRegClass(AMDGPU::VReg_96RegClassID) ||
277            isRegClass(AMDGPU::VReg_128RegClassID) ||
278            isRegClass(AMDGPU::VReg_256RegClassID) ||
279            isRegClass(AMDGPU::VReg_512RegClassID);
280   }
281 
282   bool isVReg32() const {
283     return isRegClass(AMDGPU::VGPR_32RegClassID);
284   }
285 
286   bool isVReg32OrOff() const {
287     return isOff() || isVReg32();
288   }
289 
290   bool isSDWAOperand(MVT type) const;
291   bool isSDWAFP16Operand() const;
292   bool isSDWAFP32Operand() const;
293   bool isSDWAInt16Operand() const;
294   bool isSDWAInt32Operand() const;
295 
296   bool isImmTy(ImmTy ImmT) const {
297     return isImm() && Imm.Type == ImmT;
298   }
299 
300   bool isImmModifier() const {
301     return isImm() && Imm.Type != ImmTyNone;
302   }
303 
304   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
305   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
306   bool isDMask() const { return isImmTy(ImmTyDMask); }
307   bool isDim() const { return isImmTy(ImmTyDim); }
308   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
309   bool isDA() const { return isImmTy(ImmTyDA); }
310   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
311   bool isLWE() const { return isImmTy(ImmTyLWE); }
312   bool isOff() const { return isImmTy(ImmTyOff); }
313   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
314   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
315   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
316   bool isOffen() const { return isImmTy(ImmTyOffen); }
317   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
318   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
319   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
320   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
321   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
322 
323   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
324   bool isGDS() const { return isImmTy(ImmTyGDS); }
325   bool isLDS() const { return isImmTy(ImmTyLDS); }
326   bool isDLC() const { return isImmTy(ImmTyDLC); }
327   bool isGLC() const { return isImmTy(ImmTyGLC); }
328   bool isSLC() const { return isImmTy(ImmTySLC); }
329   bool isTFE() const { return isImmTy(ImmTyTFE); }
330   bool isD16() const { return isImmTy(ImmTyD16); }
331   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
332   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
333   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
334   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
335   bool isFI() const { return isImmTy(ImmTyDppFi); }
336   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
337   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
338   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
339   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
340   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
341   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
342   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
343   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
344   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
345   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
346   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
347   bool isHigh() const { return isImmTy(ImmTyHigh); }
348 
349   bool isMod() const {
350     return isClampSI() || isOModSI();
351   }
352 
353   bool isRegOrImm() const {
354     return isReg() || isImm();
355   }
356 
357   bool isRegClass(unsigned RCID) const;
358 
359   bool isInlineValue() const;
360 
361   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
362     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
363   }
364 
365   bool isSCSrcB16() const {
366     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
367   }
368 
369   bool isSCSrcV2B16() const {
370     return isSCSrcB16();
371   }
372 
373   bool isSCSrcB32() const {
374     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
375   }
376 
377   bool isSCSrcB64() const {
378     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
379   }
380 
381   bool isBoolReg() const;
382 
383   bool isSCSrcF16() const {
384     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
385   }
386 
387   bool isSCSrcV2F16() const {
388     return isSCSrcF16();
389   }
390 
391   bool isSCSrcF32() const {
392     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
393   }
394 
395   bool isSCSrcF64() const {
396     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
397   }
398 
399   bool isSSrcB32() const {
400     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
401   }
402 
403   bool isSSrcB16() const {
404     return isSCSrcB16() || isLiteralImm(MVT::i16);
405   }
406 
407   bool isSSrcV2B16() const {
408     llvm_unreachable("cannot happen");
409     return isSSrcB16();
410   }
411 
412   bool isSSrcB64() const {
413     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
414     // See isVSrc64().
415     return isSCSrcB64() || isLiteralImm(MVT::i64);
416   }
417 
418   bool isSSrcF32() const {
419     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
420   }
421 
422   bool isSSrcF64() const {
423     return isSCSrcB64() || isLiteralImm(MVT::f64);
424   }
425 
426   bool isSSrcF16() const {
427     return isSCSrcB16() || isLiteralImm(MVT::f16);
428   }
429 
430   bool isSSrcV2F16() const {
431     llvm_unreachable("cannot happen");
432     return isSSrcF16();
433   }
434 
435   bool isSSrcOrLdsB32() const {
436     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
437            isLiteralImm(MVT::i32) || isExpr();
438   }
439 
440   bool isVCSrcB32() const {
441     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
442   }
443 
444   bool isVCSrcB64() const {
445     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
446   }
447 
448   bool isVCSrcB16() const {
449     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
450   }
451 
452   bool isVCSrcV2B16() const {
453     return isVCSrcB16();
454   }
455 
456   bool isVCSrcF32() const {
457     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
458   }
459 
460   bool isVCSrcF64() const {
461     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
462   }
463 
464   bool isVCSrcF16() const {
465     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
466   }
467 
468   bool isVCSrcV2F16() const {
469     return isVCSrcF16();
470   }
471 
472   bool isVSrcB32() const {
473     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
474   }
475 
476   bool isVSrcB64() const {
477     return isVCSrcF64() || isLiteralImm(MVT::i64);
478   }
479 
480   bool isVSrcB16() const {
481     return isVCSrcF16() || isLiteralImm(MVT::i16);
482   }
483 
484   bool isVSrcV2B16() const {
485     return isVSrcB16() || isLiteralImm(MVT::v2i16);
486   }
487 
488   bool isVSrcF32() const {
489     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
490   }
491 
492   bool isVSrcF64() const {
493     return isVCSrcF64() || isLiteralImm(MVT::f64);
494   }
495 
496   bool isVSrcF16() const {
497     return isVCSrcF16() || isLiteralImm(MVT::f16);
498   }
499 
500   bool isVSrcV2F16() const {
501     return isVSrcF16() || isLiteralImm(MVT::v2f16);
502   }
503 
504   bool isVISrcB32() const {
505     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
506   }
507 
508   bool isVISrcB16() const {
509     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
510   }
511 
512   bool isVISrcV2B16() const {
513     return isVISrcB16();
514   }
515 
516   bool isVISrcF32() const {
517     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
518   }
519 
520   bool isVISrcF16() const {
521     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
522   }
523 
524   bool isVISrcV2F16() const {
525     return isVISrcF16() || isVISrcB32();
526   }
527 
528   bool isAISrcB32() const {
529     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
530   }
531 
532   bool isAISrcB16() const {
533     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
534   }
535 
536   bool isAISrcV2B16() const {
537     return isAISrcB16();
538   }
539 
540   bool isAISrcF32() const {
541     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
542   }
543 
544   bool isAISrcF16() const {
545     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
546   }
547 
548   bool isAISrcV2F16() const {
549     return isAISrcF16() || isAISrcB32();
550   }
551 
552   bool isAISrc_128B32() const {
553     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
554   }
555 
556   bool isAISrc_128B16() const {
557     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
558   }
559 
560   bool isAISrc_128V2B16() const {
561     return isAISrc_128B16();
562   }
563 
564   bool isAISrc_128F32() const {
565     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
566   }
567 
568   bool isAISrc_128F16() const {
569     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
570   }
571 
572   bool isAISrc_128V2F16() const {
573     return isAISrc_128F16() || isAISrc_128B32();
574   }
575 
576   bool isAISrc_512B32() const {
577     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
578   }
579 
580   bool isAISrc_512B16() const {
581     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
582   }
583 
584   bool isAISrc_512V2B16() const {
585     return isAISrc_512B16();
586   }
587 
588   bool isAISrc_512F32() const {
589     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
590   }
591 
592   bool isAISrc_512F16() const {
593     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
594   }
595 
596   bool isAISrc_512V2F16() const {
597     return isAISrc_512F16() || isAISrc_512B32();
598   }
599 
600   bool isAISrc_1024B32() const {
601     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
602   }
603 
604   bool isAISrc_1024B16() const {
605     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
606   }
607 
608   bool isAISrc_1024V2B16() const {
609     return isAISrc_1024B16();
610   }
611 
612   bool isAISrc_1024F32() const {
613     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
614   }
615 
616   bool isAISrc_1024F16() const {
617     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
618   }
619 
620   bool isAISrc_1024V2F16() const {
621     return isAISrc_1024F16() || isAISrc_1024B32();
622   }
623 
624   bool isKImmFP32() const {
625     return isLiteralImm(MVT::f32);
626   }
627 
628   bool isKImmFP16() const {
629     return isLiteralImm(MVT::f16);
630   }
631 
632   bool isMem() const override {
633     return false;
634   }
635 
636   bool isExpr() const {
637     return Kind == Expression;
638   }
639 
640   bool isSoppBrTarget() const {
641     return isExpr() || isImm();
642   }
643 
644   bool isSWaitCnt() const;
645   bool isHwreg() const;
646   bool isSendMsg() const;
647   bool isSwizzle() const;
648   bool isSMRDOffset8() const;
649   bool isSMRDOffset20() const;
650   bool isSMRDLiteralOffset() const;
651   bool isDPP8() const;
652   bool isDPPCtrl() const;
653   bool isBLGP() const;
654   bool isCBSZ() const;
655   bool isABID() const;
656   bool isGPRIdxMode() const;
657   bool isS16Imm() const;
658   bool isU16Imm() const;
659   bool isEndpgm() const;
660 
661   StringRef getExpressionAsToken() const {
662     assert(isExpr());
663     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
664     return S->getSymbol().getName();
665   }
666 
667   StringRef getToken() const {
668     assert(isToken());
669 
670     if (Kind == Expression)
671       return getExpressionAsToken();
672 
673     return StringRef(Tok.Data, Tok.Length);
674   }
675 
676   int64_t getImm() const {
677     assert(isImm());
678     return Imm.Val;
679   }
680 
681   ImmTy getImmTy() const {
682     assert(isImm());
683     return Imm.Type;
684   }
685 
686   unsigned getReg() const override {
687     assert(isRegKind());
688     return Reg.RegNo;
689   }
690 
691   SMLoc getStartLoc() const override {
692     return StartLoc;
693   }
694 
695   SMLoc getEndLoc() const override {
696     return EndLoc;
697   }
698 
699   SMRange getLocRange() const {
700     return SMRange(StartLoc, EndLoc);
701   }
702 
703   Modifiers getModifiers() const {
704     assert(isRegKind() || isImmTy(ImmTyNone));
705     return isRegKind() ? Reg.Mods : Imm.Mods;
706   }
707 
708   void setModifiers(Modifiers Mods) {
709     assert(isRegKind() || isImmTy(ImmTyNone));
710     if (isRegKind())
711       Reg.Mods = Mods;
712     else
713       Imm.Mods = Mods;
714   }
715 
716   bool hasModifiers() const {
717     return getModifiers().hasModifiers();
718   }
719 
720   bool hasFPModifiers() const {
721     return getModifiers().hasFPModifiers();
722   }
723 
724   bool hasIntModifiers() const {
725     return getModifiers().hasIntModifiers();
726   }
727 
728   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
729 
730   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
731 
732   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
733 
734   template <unsigned Bitwidth>
735   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
736 
737   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
738     addKImmFPOperands<16>(Inst, N);
739   }
740 
741   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
742     addKImmFPOperands<32>(Inst, N);
743   }
744 
745   void addRegOperands(MCInst &Inst, unsigned N) const;
746 
747   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
748     addRegOperands(Inst, N);
749   }
750 
751   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
752     if (isRegKind())
753       addRegOperands(Inst, N);
754     else if (isExpr())
755       Inst.addOperand(MCOperand::createExpr(Expr));
756     else
757       addImmOperands(Inst, N);
758   }
759 
760   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
761     Modifiers Mods = getModifiers();
762     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
763     if (isRegKind()) {
764       addRegOperands(Inst, N);
765     } else {
766       addImmOperands(Inst, N, false);
767     }
768   }
769 
770   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
771     assert(!hasIntModifiers());
772     addRegOrImmWithInputModsOperands(Inst, N);
773   }
774 
775   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
776     assert(!hasFPModifiers());
777     addRegOrImmWithInputModsOperands(Inst, N);
778   }
779 
780   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
781     Modifiers Mods = getModifiers();
782     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
783     assert(isRegKind());
784     addRegOperands(Inst, N);
785   }
786 
787   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
788     assert(!hasIntModifiers());
789     addRegWithInputModsOperands(Inst, N);
790   }
791 
792   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
793     assert(!hasFPModifiers());
794     addRegWithInputModsOperands(Inst, N);
795   }
796 
797   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
798     if (isImm())
799       addImmOperands(Inst, N);
800     else {
801       assert(isExpr());
802       Inst.addOperand(MCOperand::createExpr(Expr));
803     }
804   }
805 
806   static void printImmTy(raw_ostream& OS, ImmTy Type) {
807     switch (Type) {
808     case ImmTyNone: OS << "None"; break;
809     case ImmTyGDS: OS << "GDS"; break;
810     case ImmTyLDS: OS << "LDS"; break;
811     case ImmTyOffen: OS << "Offen"; break;
812     case ImmTyIdxen: OS << "Idxen"; break;
813     case ImmTyAddr64: OS << "Addr64"; break;
814     case ImmTyOffset: OS << "Offset"; break;
815     case ImmTyInstOffset: OS << "InstOffset"; break;
816     case ImmTyOffset0: OS << "Offset0"; break;
817     case ImmTyOffset1: OS << "Offset1"; break;
818     case ImmTyDLC: OS << "DLC"; break;
819     case ImmTyGLC: OS << "GLC"; break;
820     case ImmTySLC: OS << "SLC"; break;
821     case ImmTyTFE: OS << "TFE"; break;
822     case ImmTyD16: OS << "D16"; break;
823     case ImmTyFORMAT: OS << "FORMAT"; break;
824     case ImmTyClampSI: OS << "ClampSI"; break;
825     case ImmTyOModSI: OS << "OModSI"; break;
826     case ImmTyDPP8: OS << "DPP8"; break;
827     case ImmTyDppCtrl: OS << "DppCtrl"; break;
828     case ImmTyDppRowMask: OS << "DppRowMask"; break;
829     case ImmTyDppBankMask: OS << "DppBankMask"; break;
830     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
831     case ImmTyDppFi: OS << "FI"; break;
832     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
833     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
834     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
835     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
836     case ImmTyDMask: OS << "DMask"; break;
837     case ImmTyDim: OS << "Dim"; break;
838     case ImmTyUNorm: OS << "UNorm"; break;
839     case ImmTyDA: OS << "DA"; break;
840     case ImmTyR128A16: OS << "R128A16"; break;
841     case ImmTyLWE: OS << "LWE"; break;
842     case ImmTyOff: OS << "Off"; break;
843     case ImmTyExpTgt: OS << "ExpTgt"; break;
844     case ImmTyExpCompr: OS << "ExpCompr"; break;
845     case ImmTyExpVM: OS << "ExpVM"; break;
846     case ImmTyHwreg: OS << "Hwreg"; break;
847     case ImmTySendMsg: OS << "SendMsg"; break;
848     case ImmTyInterpSlot: OS << "InterpSlot"; break;
849     case ImmTyInterpAttr: OS << "InterpAttr"; break;
850     case ImmTyAttrChan: OS << "AttrChan"; break;
851     case ImmTyOpSel: OS << "OpSel"; break;
852     case ImmTyOpSelHi: OS << "OpSelHi"; break;
853     case ImmTyNegLo: OS << "NegLo"; break;
854     case ImmTyNegHi: OS << "NegHi"; break;
855     case ImmTySwizzle: OS << "Swizzle"; break;
856     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
857     case ImmTyHigh: OS << "High"; break;
858     case ImmTyBLGP: OS << "BLGP"; break;
859     case ImmTyCBSZ: OS << "CBSZ"; break;
860     case ImmTyABID: OS << "ABID"; break;
861     case ImmTyEndpgm: OS << "Endpgm"; break;
862     }
863   }
864 
865   void print(raw_ostream &OS) const override {
866     switch (Kind) {
867     case Register:
868       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
869       break;
870     case Immediate:
871       OS << '<' << getImm();
872       if (getImmTy() != ImmTyNone) {
873         OS << " type: "; printImmTy(OS, getImmTy());
874       }
875       OS << " mods: " << Imm.Mods << '>';
876       break;
877     case Token:
878       OS << '\'' << getToken() << '\'';
879       break;
880     case Expression:
881       OS << "<expr " << *Expr << '>';
882       break;
883     }
884   }
885 
886   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
887                                       int64_t Val, SMLoc Loc,
888                                       ImmTy Type = ImmTyNone,
889                                       bool IsFPImm = false) {
890     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
891     Op->Imm.Val = Val;
892     Op->Imm.IsFPImm = IsFPImm;
893     Op->Imm.Type = Type;
894     Op->Imm.Mods = Modifiers();
895     Op->StartLoc = Loc;
896     Op->EndLoc = Loc;
897     return Op;
898   }
899 
900   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
901                                         StringRef Str, SMLoc Loc,
902                                         bool HasExplicitEncodingSize = true) {
903     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
904     Res->Tok.Data = Str.data();
905     Res->Tok.Length = Str.size();
906     Res->StartLoc = Loc;
907     Res->EndLoc = Loc;
908     return Res;
909   }
910 
911   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
912                                       unsigned RegNo, SMLoc S,
913                                       SMLoc E) {
914     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
915     Op->Reg.RegNo = RegNo;
916     Op->Reg.Mods = Modifiers();
917     Op->StartLoc = S;
918     Op->EndLoc = E;
919     return Op;
920   }
921 
922   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
923                                        const class MCExpr *Expr, SMLoc S) {
924     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
925     Op->Expr = Expr;
926     Op->StartLoc = S;
927     Op->EndLoc = S;
928     return Op;
929   }
930 };
931 
932 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
933   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
934   return OS;
935 }
936 
937 //===----------------------------------------------------------------------===//
938 // AsmParser
939 //===----------------------------------------------------------------------===//
940 
941 // Holds info related to the current kernel, e.g. count of SGPRs used.
942 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
943 // .amdgpu_hsa_kernel or at EOF.
944 class KernelScopeInfo {
945   int SgprIndexUnusedMin = -1;
946   int VgprIndexUnusedMin = -1;
947   MCContext *Ctx = nullptr;
948 
949   void usesSgprAt(int i) {
950     if (i >= SgprIndexUnusedMin) {
951       SgprIndexUnusedMin = ++i;
952       if (Ctx) {
953         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
954         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
955       }
956     }
957   }
958 
959   void usesVgprAt(int i) {
960     if (i >= VgprIndexUnusedMin) {
961       VgprIndexUnusedMin = ++i;
962       if (Ctx) {
963         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
964         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
965       }
966     }
967   }
968 
969 public:
970   KernelScopeInfo() = default;
971 
972   void initialize(MCContext &Context) {
973     Ctx = &Context;
974     usesSgprAt(SgprIndexUnusedMin = -1);
975     usesVgprAt(VgprIndexUnusedMin = -1);
976   }
977 
978   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
979     switch (RegKind) {
980       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
981       case IS_AGPR: // fall through
982       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
983       default: break;
984     }
985   }
986 };
987 
988 class AMDGPUAsmParser : public MCTargetAsmParser {
989   MCAsmParser &Parser;
990 
991   // Number of extra operands parsed after the first optional operand.
992   // This may be necessary to skip hardcoded mandatory operands.
993   static const unsigned MAX_OPR_LOOKAHEAD = 8;
994 
995   unsigned ForcedEncodingSize = 0;
996   bool ForcedDPP = false;
997   bool ForcedSDWA = false;
998   KernelScopeInfo KernelScope;
999 
1000   /// @name Auto-generated Match Functions
1001   /// {
1002 
1003 #define GET_ASSEMBLER_HEADER
1004 #include "AMDGPUGenAsmMatcher.inc"
1005 
1006   /// }
1007 
1008 private:
1009   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1010   bool OutOfRangeError(SMRange Range);
1011   /// Calculate VGPR/SGPR blocks required for given target, reserved
1012   /// registers, and user-specified NextFreeXGPR values.
1013   ///
1014   /// \param Features [in] Target features, used for bug corrections.
1015   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1016   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1017   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1018   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1019   /// descriptor field, if valid.
1020   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1021   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1022   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1023   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1024   /// \param VGPRBlocks [out] Result VGPR block count.
1025   /// \param SGPRBlocks [out] Result SGPR block count.
1026   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1027                           bool FlatScrUsed, bool XNACKUsed,
1028                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1029                           SMRange VGPRRange, unsigned NextFreeSGPR,
1030                           SMRange SGPRRange, unsigned &VGPRBlocks,
1031                           unsigned &SGPRBlocks);
1032   bool ParseDirectiveAMDGCNTarget();
1033   bool ParseDirectiveAMDHSAKernel();
1034   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1035   bool ParseDirectiveHSACodeObjectVersion();
1036   bool ParseDirectiveHSACodeObjectISA();
1037   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1038   bool ParseDirectiveAMDKernelCodeT();
1039   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1040   bool ParseDirectiveAMDGPUHsaKernel();
1041 
1042   bool ParseDirectiveISAVersion();
1043   bool ParseDirectiveHSAMetadata();
1044   bool ParseDirectivePALMetadataBegin();
1045   bool ParseDirectivePALMetadata();
1046   bool ParseDirectiveAMDGPULDS();
1047 
1048   /// Common code to parse out a block of text (typically YAML) between start and
1049   /// end directives.
1050   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1051                            const char *AssemblerDirectiveEnd,
1052                            std::string &CollectString);
1053 
1054   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1055                              RegisterKind RegKind, unsigned Reg1,
1056                              unsigned RegNum);
1057   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
1058                            unsigned& RegNum, unsigned& RegWidth,
1059                            unsigned *DwordRegIndex);
1060   bool isRegister();
1061   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1062   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1063   void initializeGprCountSymbol(RegisterKind RegKind);
1064   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1065                              unsigned RegWidth);
1066   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1067                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1068   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1069                  bool IsGdsHardcoded);
1070 
1071 public:
1072   enum AMDGPUMatchResultTy {
1073     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1074   };
1075   enum OperandMode {
1076     OperandMode_Default,
1077     OperandMode_NSA,
1078   };
1079 
1080   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1081 
1082   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1083                const MCInstrInfo &MII,
1084                const MCTargetOptions &Options)
1085       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1086     MCAsmParserExtension::Initialize(Parser);
1087 
1088     if (getFeatureBits().none()) {
1089       // Set default features.
1090       copySTI().ToggleFeature("southern-islands");
1091     }
1092 
1093     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1094 
1095     {
1096       // TODO: make those pre-defined variables read-only.
1097       // Currently there is none suitable machinery in the core llvm-mc for this.
1098       // MCSymbol::isRedefinable is intended for another purpose, and
1099       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1100       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1101       MCContext &Ctx = getContext();
1102       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1103         MCSymbol *Sym =
1104             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1105         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1106         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1107         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1108         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1109         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1110       } else {
1111         MCSymbol *Sym =
1112             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1113         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1114         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1115         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1116         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1117         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1118       }
1119       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1120         initializeGprCountSymbol(IS_VGPR);
1121         initializeGprCountSymbol(IS_SGPR);
1122       } else
1123         KernelScope.initialize(getContext());
1124     }
1125   }
1126 
1127   bool hasXNACK() const {
1128     return AMDGPU::hasXNACK(getSTI());
1129   }
1130 
1131   bool hasMIMG_R128() const {
1132     return AMDGPU::hasMIMG_R128(getSTI());
1133   }
1134 
1135   bool hasPackedD16() const {
1136     return AMDGPU::hasPackedD16(getSTI());
1137   }
1138 
1139   bool isSI() const {
1140     return AMDGPU::isSI(getSTI());
1141   }
1142 
1143   bool isCI() const {
1144     return AMDGPU::isCI(getSTI());
1145   }
1146 
1147   bool isVI() const {
1148     return AMDGPU::isVI(getSTI());
1149   }
1150 
1151   bool isGFX9() const {
1152     return AMDGPU::isGFX9(getSTI());
1153   }
1154 
1155   bool isGFX10() const {
1156     return AMDGPU::isGFX10(getSTI());
1157   }
1158 
1159   bool hasInv2PiInlineImm() const {
1160     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1161   }
1162 
1163   bool hasFlatOffsets() const {
1164     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1165   }
1166 
1167   bool hasSGPR102_SGPR103() const {
1168     return !isVI() && !isGFX9();
1169   }
1170 
1171   bool hasSGPR104_SGPR105() const {
1172     return isGFX10();
1173   }
1174 
1175   bool hasIntClamp() const {
1176     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1177   }
1178 
1179   AMDGPUTargetStreamer &getTargetStreamer() {
1180     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1181     return static_cast<AMDGPUTargetStreamer &>(TS);
1182   }
1183 
1184   const MCRegisterInfo *getMRI() const {
1185     // We need this const_cast because for some reason getContext() is not const
1186     // in MCAsmParser.
1187     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1188   }
1189 
1190   const MCInstrInfo *getMII() const {
1191     return &MII;
1192   }
1193 
1194   const FeatureBitset &getFeatureBits() const {
1195     return getSTI().getFeatureBits();
1196   }
1197 
1198   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1199   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1200   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1201 
1202   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1203   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1204   bool isForcedDPP() const { return ForcedDPP; }
1205   bool isForcedSDWA() const { return ForcedSDWA; }
1206   ArrayRef<unsigned> getMatchedVariants() const;
1207 
1208   std::unique_ptr<AMDGPUOperand> parseRegister();
1209   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1210   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1211   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1212                                       unsigned Kind) override;
1213   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1214                                OperandVector &Operands, MCStreamer &Out,
1215                                uint64_t &ErrorInfo,
1216                                bool MatchingInlineAsm) override;
1217   bool ParseDirective(AsmToken DirectiveID) override;
1218   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1219                                     OperandMode Mode = OperandMode_Default);
1220   StringRef parseMnemonicSuffix(StringRef Name);
1221   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1222                         SMLoc NameLoc, OperandVector &Operands) override;
1223   //bool ProcessInstruction(MCInst &Inst);
1224 
1225   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1226 
1227   OperandMatchResultTy
1228   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1229                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1230                      bool (*ConvertResult)(int64_t &) = nullptr);
1231 
1232   OperandMatchResultTy
1233   parseOperandArrayWithPrefix(const char *Prefix,
1234                               OperandVector &Operands,
1235                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1236                               bool (*ConvertResult)(int64_t&) = nullptr);
1237 
1238   OperandMatchResultTy
1239   parseNamedBit(const char *Name, OperandVector &Operands,
1240                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1241   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1242                                              StringRef &Value);
1243 
1244   bool isModifier();
1245   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1246   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1247   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1248   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1249   bool parseSP3NegModifier();
1250   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1251   OperandMatchResultTy parseReg(OperandVector &Operands);
1252   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1253   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1254   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1255   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1256   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1257   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1258   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1259 
1260   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1261   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1262   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1263   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1264 
1265   bool parseCnt(int64_t &IntVal);
1266   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1267   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1268 
1269 private:
1270   struct OperandInfoTy {
1271     int64_t Id;
1272     bool IsSymbolic = false;
1273     bool IsDefined = false;
1274 
1275     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1276   };
1277 
1278   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1279   bool validateSendMsg(const OperandInfoTy &Msg,
1280                        const OperandInfoTy &Op,
1281                        const OperandInfoTy &Stream,
1282                        const SMLoc Loc);
1283 
1284   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1285   bool validateHwreg(const OperandInfoTy &HwReg,
1286                      const int64_t Offset,
1287                      const int64_t Width,
1288                      const SMLoc Loc);
1289 
1290   void errorExpTgt();
1291   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1292   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1293 
1294   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1295   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1296   bool validateSOPLiteral(const MCInst &Inst) const;
1297   bool validateConstantBusLimitations(const MCInst &Inst);
1298   bool validateEarlyClobberLimitations(const MCInst &Inst);
1299   bool validateIntClampSupported(const MCInst &Inst);
1300   bool validateMIMGAtomicDMask(const MCInst &Inst);
1301   bool validateMIMGGatherDMask(const MCInst &Inst);
1302   bool validateMIMGDataSize(const MCInst &Inst);
1303   bool validateMIMGAddrSize(const MCInst &Inst);
1304   bool validateMIMGD16(const MCInst &Inst);
1305   bool validateMIMGDim(const MCInst &Inst);
1306   bool validateLdsDirect(const MCInst &Inst);
1307   bool validateOpSel(const MCInst &Inst);
1308   bool validateVccOperand(unsigned Reg) const;
1309   bool validateVOP3Literal(const MCInst &Inst) const;
1310   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1311   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1312   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1313 
1314   bool isId(const StringRef Id) const;
1315   bool isId(const AsmToken &Token, const StringRef Id) const;
1316   bool isToken(const AsmToken::TokenKind Kind) const;
1317   bool trySkipId(const StringRef Id);
1318   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1319   bool trySkipToken(const AsmToken::TokenKind Kind);
1320   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1321   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1322   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1323   AsmToken::TokenKind getTokenKind() const;
1324   bool parseExpr(int64_t &Imm);
1325   bool parseExpr(OperandVector &Operands);
1326   StringRef getTokenStr() const;
1327   AsmToken peekToken();
1328   AsmToken getToken() const;
1329   SMLoc getLoc() const;
1330   void lex();
1331 
1332 public:
1333   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1334   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1335 
1336   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1337   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1338   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1339   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1340   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1341   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1342 
1343   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1344                             const unsigned MinVal,
1345                             const unsigned MaxVal,
1346                             const StringRef ErrMsg);
1347   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1348   bool parseSwizzleOffset(int64_t &Imm);
1349   bool parseSwizzleMacro(int64_t &Imm);
1350   bool parseSwizzleQuadPerm(int64_t &Imm);
1351   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1352   bool parseSwizzleBroadcast(int64_t &Imm);
1353   bool parseSwizzleSwap(int64_t &Imm);
1354   bool parseSwizzleReverse(int64_t &Imm);
1355 
1356   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1357   int64_t parseGPRIdxMacro();
1358 
1359   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1360   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1361   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1362   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1363   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1364 
1365   AMDGPUOperand::Ptr defaultDLC() const;
1366   AMDGPUOperand::Ptr defaultGLC() const;
1367   AMDGPUOperand::Ptr defaultSLC() const;
1368 
1369   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1370   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1371   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1372   AMDGPUOperand::Ptr defaultFlatOffset() const;
1373 
1374   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1375 
1376   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1377                OptionalImmIndexMap &OptionalIdx);
1378   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1379   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1380   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1381 
1382   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1383 
1384   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1385                bool IsAtomic = false);
1386   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1387 
1388   OperandMatchResultTy parseDim(OperandVector &Operands);
1389   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1390   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1391   AMDGPUOperand::Ptr defaultRowMask() const;
1392   AMDGPUOperand::Ptr defaultBankMask() const;
1393   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1394   AMDGPUOperand::Ptr defaultFI() const;
1395   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1396   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1397 
1398   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1399                                     AMDGPUOperand::ImmTy Type);
1400   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1401   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1402   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1403   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1404   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1405   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1406                 uint64_t BasicInstType, bool skipVcc = false);
1407 
1408   AMDGPUOperand::Ptr defaultBLGP() const;
1409   AMDGPUOperand::Ptr defaultCBSZ() const;
1410   AMDGPUOperand::Ptr defaultABID() const;
1411 
1412   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1413   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1414 };
1415 
1416 struct OptionalOperand {
1417   const char *Name;
1418   AMDGPUOperand::ImmTy Type;
1419   bool IsBit;
1420   bool (*ConvertResult)(int64_t&);
1421 };
1422 
1423 } // end anonymous namespace
1424 
1425 // May be called with integer type with equivalent bitwidth.
1426 static const fltSemantics *getFltSemantics(unsigned Size) {
1427   switch (Size) {
1428   case 4:
1429     return &APFloat::IEEEsingle();
1430   case 8:
1431     return &APFloat::IEEEdouble();
1432   case 2:
1433     return &APFloat::IEEEhalf();
1434   default:
1435     llvm_unreachable("unsupported fp type");
1436   }
1437 }
1438 
1439 static const fltSemantics *getFltSemantics(MVT VT) {
1440   return getFltSemantics(VT.getSizeInBits() / 8);
1441 }
1442 
1443 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1444   switch (OperandType) {
1445   case AMDGPU::OPERAND_REG_IMM_INT32:
1446   case AMDGPU::OPERAND_REG_IMM_FP32:
1447   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1448   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1449   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1450   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1451     return &APFloat::IEEEsingle();
1452   case AMDGPU::OPERAND_REG_IMM_INT64:
1453   case AMDGPU::OPERAND_REG_IMM_FP64:
1454   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1455   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1456     return &APFloat::IEEEdouble();
1457   case AMDGPU::OPERAND_REG_IMM_INT16:
1458   case AMDGPU::OPERAND_REG_IMM_FP16:
1459   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1460   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1461   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1462   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1463   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1464   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1465   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1466   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1467   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1468   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1469     return &APFloat::IEEEhalf();
1470   default:
1471     llvm_unreachable("unsupported fp type");
1472   }
1473 }
1474 
1475 //===----------------------------------------------------------------------===//
1476 // Operand
1477 //===----------------------------------------------------------------------===//
1478 
1479 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1480   bool Lost;
1481 
1482   // Convert literal to single precision
1483   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1484                                                APFloat::rmNearestTiesToEven,
1485                                                &Lost);
1486   // We allow precision lost but not overflow or underflow
1487   if (Status != APFloat::opOK &&
1488       Lost &&
1489       ((Status & APFloat::opOverflow)  != 0 ||
1490        (Status & APFloat::opUnderflow) != 0)) {
1491     return false;
1492   }
1493 
1494   return true;
1495 }
1496 
1497 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1498   return isUIntN(Size, Val) || isIntN(Size, Val);
1499 }
1500 
1501 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1502 
1503   // This is a hack to enable named inline values like
1504   // shared_base with both 32-bit and 64-bit operands.
1505   // Note that these values are defined as
1506   // 32-bit operands only.
1507   if (isInlineValue()) {
1508     return true;
1509   }
1510 
1511   if (!isImmTy(ImmTyNone)) {
1512     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1513     return false;
1514   }
1515   // TODO: We should avoid using host float here. It would be better to
1516   // check the float bit values which is what a few other places do.
1517   // We've had bot failures before due to weird NaN support on mips hosts.
1518 
1519   APInt Literal(64, Imm.Val);
1520 
1521   if (Imm.IsFPImm) { // We got fp literal token
1522     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1523       return AMDGPU::isInlinableLiteral64(Imm.Val,
1524                                           AsmParser->hasInv2PiInlineImm());
1525     }
1526 
1527     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1528     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1529       return false;
1530 
1531     if (type.getScalarSizeInBits() == 16) {
1532       return AMDGPU::isInlinableLiteral16(
1533         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1534         AsmParser->hasInv2PiInlineImm());
1535     }
1536 
1537     // Check if single precision literal is inlinable
1538     return AMDGPU::isInlinableLiteral32(
1539       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1540       AsmParser->hasInv2PiInlineImm());
1541   }
1542 
1543   // We got int literal token.
1544   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1545     return AMDGPU::isInlinableLiteral64(Imm.Val,
1546                                         AsmParser->hasInv2PiInlineImm());
1547   }
1548 
1549   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1550     return false;
1551   }
1552 
1553   if (type.getScalarSizeInBits() == 16) {
1554     return AMDGPU::isInlinableLiteral16(
1555       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1556       AsmParser->hasInv2PiInlineImm());
1557   }
1558 
1559   return AMDGPU::isInlinableLiteral32(
1560     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1561     AsmParser->hasInv2PiInlineImm());
1562 }
1563 
1564 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1565   // Check that this immediate can be added as literal
1566   if (!isImmTy(ImmTyNone)) {
1567     return false;
1568   }
1569 
1570   if (!Imm.IsFPImm) {
1571     // We got int literal token.
1572 
1573     if (type == MVT::f64 && hasFPModifiers()) {
1574       // Cannot apply fp modifiers to int literals preserving the same semantics
1575       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1576       // disable these cases.
1577       return false;
1578     }
1579 
1580     unsigned Size = type.getSizeInBits();
1581     if (Size == 64)
1582       Size = 32;
1583 
1584     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1585     // types.
1586     return isSafeTruncation(Imm.Val, Size);
1587   }
1588 
1589   // We got fp literal token
1590   if (type == MVT::f64) { // Expected 64-bit fp operand
1591     // We would set low 64-bits of literal to zeroes but we accept this literals
1592     return true;
1593   }
1594 
1595   if (type == MVT::i64) { // Expected 64-bit int operand
1596     // We don't allow fp literals in 64-bit integer instructions. It is
1597     // unclear how we should encode them.
1598     return false;
1599   }
1600 
1601   // We allow fp literals with f16x2 operands assuming that the specified
1602   // literal goes into the lower half and the upper half is zero. We also
1603   // require that the literal may be losslesly converted to f16.
1604   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1605                      (type == MVT::v2i16)? MVT::i16 : type;
1606 
1607   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1608   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1609 }
1610 
1611 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1612   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1613 }
1614 
1615 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1616   if (AsmParser->isVI())
1617     return isVReg32();
1618   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1619     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1620   else
1621     return false;
1622 }
1623 
1624 bool AMDGPUOperand::isSDWAFP16Operand() const {
1625   return isSDWAOperand(MVT::f16);
1626 }
1627 
1628 bool AMDGPUOperand::isSDWAFP32Operand() const {
1629   return isSDWAOperand(MVT::f32);
1630 }
1631 
1632 bool AMDGPUOperand::isSDWAInt16Operand() const {
1633   return isSDWAOperand(MVT::i16);
1634 }
1635 
1636 bool AMDGPUOperand::isSDWAInt32Operand() const {
1637   return isSDWAOperand(MVT::i32);
1638 }
1639 
1640 bool AMDGPUOperand::isBoolReg() const {
1641   return AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
1642     isSCSrcB64() : isSCSrcB32();
1643 }
1644 
1645 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1646 {
1647   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1648   assert(Size == 2 || Size == 4 || Size == 8);
1649 
1650   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1651 
1652   if (Imm.Mods.Abs) {
1653     Val &= ~FpSignMask;
1654   }
1655   if (Imm.Mods.Neg) {
1656     Val ^= FpSignMask;
1657   }
1658 
1659   return Val;
1660 }
1661 
1662 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1663   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1664                              Inst.getNumOperands())) {
1665     addLiteralImmOperand(Inst, Imm.Val,
1666                          ApplyModifiers &
1667                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1668   } else {
1669     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1670     Inst.addOperand(MCOperand::createImm(Imm.Val));
1671   }
1672 }
1673 
1674 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1675   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1676   auto OpNum = Inst.getNumOperands();
1677   // Check that this operand accepts literals
1678   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1679 
1680   if (ApplyModifiers) {
1681     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1682     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1683     Val = applyInputFPModifiers(Val, Size);
1684   }
1685 
1686   APInt Literal(64, Val);
1687   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1688 
1689   if (Imm.IsFPImm) { // We got fp literal token
1690     switch (OpTy) {
1691     case AMDGPU::OPERAND_REG_IMM_INT64:
1692     case AMDGPU::OPERAND_REG_IMM_FP64:
1693     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1694     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1695       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1696                                        AsmParser->hasInv2PiInlineImm())) {
1697         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1698         return;
1699       }
1700 
1701       // Non-inlineable
1702       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1703         // For fp operands we check if low 32 bits are zeros
1704         if (Literal.getLoBits(32) != 0) {
1705           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1706           "Can't encode literal as exact 64-bit floating-point operand. "
1707           "Low 32-bits will be set to zero");
1708         }
1709 
1710         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1711         return;
1712       }
1713 
1714       // We don't allow fp literals in 64-bit integer instructions. It is
1715       // unclear how we should encode them. This case should be checked earlier
1716       // in predicate methods (isLiteralImm())
1717       llvm_unreachable("fp literal in 64-bit integer instruction.");
1718 
1719     case AMDGPU::OPERAND_REG_IMM_INT32:
1720     case AMDGPU::OPERAND_REG_IMM_FP32:
1721     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1722     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1723     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1724     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1725     case AMDGPU::OPERAND_REG_IMM_INT16:
1726     case AMDGPU::OPERAND_REG_IMM_FP16:
1727     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1728     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1729     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1730     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1731     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1732     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1733     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1734     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1735     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1736     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1737       bool lost;
1738       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1739       // Convert literal to single precision
1740       FPLiteral.convert(*getOpFltSemantics(OpTy),
1741                         APFloat::rmNearestTiesToEven, &lost);
1742       // We allow precision lost but not overflow or underflow. This should be
1743       // checked earlier in isLiteralImm()
1744 
1745       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1746       Inst.addOperand(MCOperand::createImm(ImmVal));
1747       return;
1748     }
1749     default:
1750       llvm_unreachable("invalid operand size");
1751     }
1752 
1753     return;
1754   }
1755 
1756   // We got int literal token.
1757   // Only sign extend inline immediates.
1758   switch (OpTy) {
1759   case AMDGPU::OPERAND_REG_IMM_INT32:
1760   case AMDGPU::OPERAND_REG_IMM_FP32:
1761   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1762   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1763   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1764   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1765   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1766   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1767     if (isSafeTruncation(Val, 32) &&
1768         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1769                                      AsmParser->hasInv2PiInlineImm())) {
1770       Inst.addOperand(MCOperand::createImm(Val));
1771       return;
1772     }
1773 
1774     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1775     return;
1776 
1777   case AMDGPU::OPERAND_REG_IMM_INT64:
1778   case AMDGPU::OPERAND_REG_IMM_FP64:
1779   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1780   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1781     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1782       Inst.addOperand(MCOperand::createImm(Val));
1783       return;
1784     }
1785 
1786     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1787     return;
1788 
1789   case AMDGPU::OPERAND_REG_IMM_INT16:
1790   case AMDGPU::OPERAND_REG_IMM_FP16:
1791   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1792   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1793   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1794   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1795     if (isSafeTruncation(Val, 16) &&
1796         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1797                                      AsmParser->hasInv2PiInlineImm())) {
1798       Inst.addOperand(MCOperand::createImm(Val));
1799       return;
1800     }
1801 
1802     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1803     return;
1804 
1805   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1806   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1807   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1808   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1809     assert(isSafeTruncation(Val, 16));
1810     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1811                                         AsmParser->hasInv2PiInlineImm()));
1812 
1813     Inst.addOperand(MCOperand::createImm(Val));
1814     return;
1815   }
1816   default:
1817     llvm_unreachable("invalid operand size");
1818   }
1819 }
1820 
1821 template <unsigned Bitwidth>
1822 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1823   APInt Literal(64, Imm.Val);
1824 
1825   if (!Imm.IsFPImm) {
1826     // We got int literal token.
1827     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1828     return;
1829   }
1830 
1831   bool Lost;
1832   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1833   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1834                     APFloat::rmNearestTiesToEven, &Lost);
1835   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1836 }
1837 
1838 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1839   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1840 }
1841 
1842 static bool isInlineValue(unsigned Reg) {
1843   switch (Reg) {
1844   case AMDGPU::SRC_SHARED_BASE:
1845   case AMDGPU::SRC_SHARED_LIMIT:
1846   case AMDGPU::SRC_PRIVATE_BASE:
1847   case AMDGPU::SRC_PRIVATE_LIMIT:
1848   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1849     return true;
1850   case AMDGPU::SRC_VCCZ:
1851   case AMDGPU::SRC_EXECZ:
1852   case AMDGPU::SRC_SCC:
1853     return true;
1854   default:
1855     return false;
1856   }
1857 }
1858 
1859 bool AMDGPUOperand::isInlineValue() const {
1860   return isRegKind() && ::isInlineValue(getReg());
1861 }
1862 
1863 //===----------------------------------------------------------------------===//
1864 // AsmParser
1865 //===----------------------------------------------------------------------===//
1866 
1867 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1868   if (Is == IS_VGPR) {
1869     switch (RegWidth) {
1870       default: return -1;
1871       case 1: return AMDGPU::VGPR_32RegClassID;
1872       case 2: return AMDGPU::VReg_64RegClassID;
1873       case 3: return AMDGPU::VReg_96RegClassID;
1874       case 4: return AMDGPU::VReg_128RegClassID;
1875       case 8: return AMDGPU::VReg_256RegClassID;
1876       case 16: return AMDGPU::VReg_512RegClassID;
1877     }
1878   } else if (Is == IS_TTMP) {
1879     switch (RegWidth) {
1880       default: return -1;
1881       case 1: return AMDGPU::TTMP_32RegClassID;
1882       case 2: return AMDGPU::TTMP_64RegClassID;
1883       case 4: return AMDGPU::TTMP_128RegClassID;
1884       case 8: return AMDGPU::TTMP_256RegClassID;
1885       case 16: return AMDGPU::TTMP_512RegClassID;
1886     }
1887   } else if (Is == IS_SGPR) {
1888     switch (RegWidth) {
1889       default: return -1;
1890       case 1: return AMDGPU::SGPR_32RegClassID;
1891       case 2: return AMDGPU::SGPR_64RegClassID;
1892       case 4: return AMDGPU::SGPR_128RegClassID;
1893       case 8: return AMDGPU::SGPR_256RegClassID;
1894       case 16: return AMDGPU::SGPR_512RegClassID;
1895     }
1896   } else if (Is == IS_AGPR) {
1897     switch (RegWidth) {
1898       default: return -1;
1899       case 1: return AMDGPU::AGPR_32RegClassID;
1900       case 2: return AMDGPU::AReg_64RegClassID;
1901       case 4: return AMDGPU::AReg_128RegClassID;
1902       case 16: return AMDGPU::AReg_512RegClassID;
1903       case 32: return AMDGPU::AReg_1024RegClassID;
1904     }
1905   }
1906   return -1;
1907 }
1908 
1909 static unsigned getSpecialRegForName(StringRef RegName) {
1910   return StringSwitch<unsigned>(RegName)
1911     .Case("exec", AMDGPU::EXEC)
1912     .Case("vcc", AMDGPU::VCC)
1913     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1914     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1915     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1916     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1917     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1918     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1919     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1920     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1921     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1922     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1923     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1924     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1925     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1926     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1927     .Case("m0", AMDGPU::M0)
1928     .Case("vccz", AMDGPU::SRC_VCCZ)
1929     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1930     .Case("execz", AMDGPU::SRC_EXECZ)
1931     .Case("src_execz", AMDGPU::SRC_EXECZ)
1932     .Case("scc", AMDGPU::SRC_SCC)
1933     .Case("src_scc", AMDGPU::SRC_SCC)
1934     .Case("tba", AMDGPU::TBA)
1935     .Case("tma", AMDGPU::TMA)
1936     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1937     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1938     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1939     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1940     .Case("vcc_lo", AMDGPU::VCC_LO)
1941     .Case("vcc_hi", AMDGPU::VCC_HI)
1942     .Case("exec_lo", AMDGPU::EXEC_LO)
1943     .Case("exec_hi", AMDGPU::EXEC_HI)
1944     .Case("tma_lo", AMDGPU::TMA_LO)
1945     .Case("tma_hi", AMDGPU::TMA_HI)
1946     .Case("tba_lo", AMDGPU::TBA_LO)
1947     .Case("tba_hi", AMDGPU::TBA_HI)
1948     .Case("null", AMDGPU::SGPR_NULL)
1949     .Default(0);
1950 }
1951 
1952 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1953                                     SMLoc &EndLoc) {
1954   auto R = parseRegister();
1955   if (!R) return true;
1956   assert(R->isReg());
1957   RegNo = R->getReg();
1958   StartLoc = R->getStartLoc();
1959   EndLoc = R->getEndLoc();
1960   return false;
1961 }
1962 
1963 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1964                                             RegisterKind RegKind, unsigned Reg1,
1965                                             unsigned RegNum) {
1966   switch (RegKind) {
1967   case IS_SPECIAL:
1968     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1969       Reg = AMDGPU::EXEC;
1970       RegWidth = 2;
1971       return true;
1972     }
1973     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1974       Reg = AMDGPU::FLAT_SCR;
1975       RegWidth = 2;
1976       return true;
1977     }
1978     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1979       Reg = AMDGPU::XNACK_MASK;
1980       RegWidth = 2;
1981       return true;
1982     }
1983     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1984       Reg = AMDGPU::VCC;
1985       RegWidth = 2;
1986       return true;
1987     }
1988     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1989       Reg = AMDGPU::TBA;
1990       RegWidth = 2;
1991       return true;
1992     }
1993     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1994       Reg = AMDGPU::TMA;
1995       RegWidth = 2;
1996       return true;
1997     }
1998     return false;
1999   case IS_VGPR:
2000   case IS_SGPR:
2001   case IS_AGPR:
2002   case IS_TTMP:
2003     if (Reg1 != Reg + RegWidth) {
2004       return false;
2005     }
2006     RegWidth++;
2007     return true;
2008   default:
2009     llvm_unreachable("unexpected register kind");
2010   }
2011 }
2012 
2013 static const StringRef Registers[] = {
2014   { "v" },
2015   { "s" },
2016   { "ttmp" },
2017   { "acc" },
2018   { "a" },
2019 };
2020 
2021 bool
2022 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2023                             const AsmToken &NextToken) const {
2024 
2025   // A list of consecutive registers: [s0,s1,s2,s3]
2026   if (Token.is(AsmToken::LBrac))
2027     return true;
2028 
2029   if (!Token.is(AsmToken::Identifier))
2030     return false;
2031 
2032   // A single register like s0 or a range of registers like s[0:1]
2033 
2034   StringRef RegName = Token.getString();
2035 
2036   for (StringRef Reg : Registers) {
2037     if (RegName.startswith(Reg)) {
2038       if (Reg.size() < RegName.size()) {
2039         unsigned RegNum;
2040         // A single register with an index: rXX
2041         if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
2042           return true;
2043       } else {
2044         // A range of registers: r[XX:YY].
2045         if (NextToken.is(AsmToken::LBrac))
2046           return true;
2047       }
2048     }
2049   }
2050 
2051   return getSpecialRegForName(RegName);
2052 }
2053 
2054 bool
2055 AMDGPUAsmParser::isRegister()
2056 {
2057   return isRegister(getToken(), peekToken());
2058 }
2059 
2060 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2061                                           unsigned &RegNum, unsigned &RegWidth,
2062                                           unsigned *DwordRegIndex) {
2063   if (DwordRegIndex) { *DwordRegIndex = 0; }
2064   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2065   if (getLexer().is(AsmToken::Identifier)) {
2066     StringRef RegName = Parser.getTok().getString();
2067     if ((Reg = getSpecialRegForName(RegName))) {
2068       Parser.Lex();
2069       RegKind = IS_SPECIAL;
2070     } else {
2071       unsigned RegNumIndex = 0;
2072       if (RegName[0] == 'v') {
2073         RegNumIndex = 1;
2074         RegKind = IS_VGPR;
2075       } else if (RegName[0] == 's') {
2076         RegNumIndex = 1;
2077         RegKind = IS_SGPR;
2078       } else if (RegName[0] == 'a') {
2079         RegNumIndex = RegName.startswith("acc") ? 3 : 1;
2080         RegKind = IS_AGPR;
2081       } else if (RegName.startswith("ttmp")) {
2082         RegNumIndex = strlen("ttmp");
2083         RegKind = IS_TTMP;
2084       } else {
2085         return false;
2086       }
2087       if (RegName.size() > RegNumIndex) {
2088         // Single 32-bit register: vXX.
2089         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
2090           return false;
2091         Parser.Lex();
2092         RegWidth = 1;
2093       } else {
2094         // Range of registers: v[XX:YY]. ":YY" is optional.
2095         Parser.Lex();
2096         int64_t RegLo, RegHi;
2097         if (getLexer().isNot(AsmToken::LBrac))
2098           return false;
2099         Parser.Lex();
2100 
2101         if (getParser().parseAbsoluteExpression(RegLo))
2102           return false;
2103 
2104         const bool isRBrace = getLexer().is(AsmToken::RBrac);
2105         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
2106           return false;
2107         Parser.Lex();
2108 
2109         if (isRBrace) {
2110           RegHi = RegLo;
2111         } else {
2112           if (getParser().parseAbsoluteExpression(RegHi))
2113             return false;
2114 
2115           if (getLexer().isNot(AsmToken::RBrac))
2116             return false;
2117           Parser.Lex();
2118         }
2119         RegNum = (unsigned) RegLo;
2120         RegWidth = (RegHi - RegLo) + 1;
2121       }
2122     }
2123   } else if (getLexer().is(AsmToken::LBrac)) {
2124     // List of consecutive registers: [s0,s1,s2,s3]
2125     Parser.Lex();
2126     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
2127       return false;
2128     if (RegWidth != 1)
2129       return false;
2130     RegisterKind RegKind1;
2131     unsigned Reg1, RegNum1, RegWidth1;
2132     do {
2133       if (getLexer().is(AsmToken::Comma)) {
2134         Parser.Lex();
2135       } else if (getLexer().is(AsmToken::RBrac)) {
2136         Parser.Lex();
2137         break;
2138       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
2139         if (RegWidth1 != 1) {
2140           return false;
2141         }
2142         if (RegKind1 != RegKind) {
2143           return false;
2144         }
2145         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
2146           return false;
2147         }
2148       } else {
2149         return false;
2150       }
2151     } while (true);
2152   } else {
2153     return false;
2154   }
2155   switch (RegKind) {
2156   case IS_SPECIAL:
2157     RegNum = 0;
2158     RegWidth = 1;
2159     break;
2160   case IS_VGPR:
2161   case IS_SGPR:
2162   case IS_AGPR:
2163   case IS_TTMP:
2164   {
2165     unsigned Size = 1;
2166     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2167       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
2168       Size = std::min(RegWidth, 4u);
2169     }
2170     if (RegNum % Size != 0)
2171       return false;
2172     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
2173     RegNum = RegNum / Size;
2174     int RCID = getRegClass(RegKind, RegWidth);
2175     if (RCID == -1)
2176       return false;
2177     const MCRegisterClass RC = TRI->getRegClass(RCID);
2178     if (RegNum >= RC.getNumRegs())
2179       return false;
2180     Reg = RC.getRegister(RegNum);
2181     break;
2182   }
2183 
2184   default:
2185     llvm_unreachable("unexpected register kind");
2186   }
2187 
2188   if (!subtargetHasRegister(*TRI, Reg))
2189     return false;
2190   return true;
2191 }
2192 
2193 Optional<StringRef>
2194 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2195   switch (RegKind) {
2196   case IS_VGPR:
2197     return StringRef(".amdgcn.next_free_vgpr");
2198   case IS_SGPR:
2199     return StringRef(".amdgcn.next_free_sgpr");
2200   default:
2201     return None;
2202   }
2203 }
2204 
2205 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2206   auto SymbolName = getGprCountSymbolName(RegKind);
2207   assert(SymbolName && "initializing invalid register kind");
2208   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2209   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2210 }
2211 
2212 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2213                                             unsigned DwordRegIndex,
2214                                             unsigned RegWidth) {
2215   // Symbols are only defined for GCN targets
2216   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2217     return true;
2218 
2219   auto SymbolName = getGprCountSymbolName(RegKind);
2220   if (!SymbolName)
2221     return true;
2222   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2223 
2224   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2225   int64_t OldCount;
2226 
2227   if (!Sym->isVariable())
2228     return !Error(getParser().getTok().getLoc(),
2229                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2230   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2231     return !Error(
2232         getParser().getTok().getLoc(),
2233         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2234 
2235   if (OldCount <= NewMax)
2236     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2237 
2238   return true;
2239 }
2240 
2241 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2242   const auto &Tok = Parser.getTok();
2243   SMLoc StartLoc = Tok.getLoc();
2244   SMLoc EndLoc = Tok.getEndLoc();
2245   RegisterKind RegKind;
2246   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2247 
2248   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2249     //FIXME: improve error messages (bug 41303).
2250     Error(StartLoc, "not a valid operand.");
2251     return nullptr;
2252   }
2253   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2254     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2255       return nullptr;
2256   } else
2257     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2258   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2259 }
2260 
2261 OperandMatchResultTy
2262 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2263   // TODO: add syntactic sugar for 1/(2*PI)
2264 
2265   assert(!isRegister());
2266   assert(!isModifier());
2267 
2268   const auto& Tok = getToken();
2269   const auto& NextTok = peekToken();
2270   bool IsReal = Tok.is(AsmToken::Real);
2271   SMLoc S = getLoc();
2272   bool Negate = false;
2273 
2274   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2275     lex();
2276     IsReal = true;
2277     Negate = true;
2278   }
2279 
2280   if (IsReal) {
2281     // Floating-point expressions are not supported.
2282     // Can only allow floating-point literals with an
2283     // optional sign.
2284 
2285     StringRef Num = getTokenStr();
2286     lex();
2287 
2288     APFloat RealVal(APFloat::IEEEdouble());
2289     auto roundMode = APFloat::rmNearestTiesToEven;
2290     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2291       return MatchOperand_ParseFail;
2292     }
2293     if (Negate)
2294       RealVal.changeSign();
2295 
2296     Operands.push_back(
2297       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2298                                AMDGPUOperand::ImmTyNone, true));
2299 
2300     return MatchOperand_Success;
2301 
2302   } else {
2303     int64_t IntVal;
2304     const MCExpr *Expr;
2305     SMLoc S = getLoc();
2306 
2307     if (HasSP3AbsModifier) {
2308       // This is a workaround for handling expressions
2309       // as arguments of SP3 'abs' modifier, for example:
2310       //     |1.0|
2311       //     |-1|
2312       //     |1+x|
2313       // This syntax is not compatible with syntax of standard
2314       // MC expressions (due to the trailing '|').
2315       SMLoc EndLoc;
2316       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2317         return MatchOperand_ParseFail;
2318     } else {
2319       if (Parser.parseExpression(Expr))
2320         return MatchOperand_ParseFail;
2321     }
2322 
2323     if (Expr->evaluateAsAbsolute(IntVal)) {
2324       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2325     } else {
2326       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2327     }
2328 
2329     return MatchOperand_Success;
2330   }
2331 
2332   return MatchOperand_NoMatch;
2333 }
2334 
2335 OperandMatchResultTy
2336 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2337   if (!isRegister())
2338     return MatchOperand_NoMatch;
2339 
2340   if (auto R = parseRegister()) {
2341     assert(R->isReg());
2342     Operands.push_back(std::move(R));
2343     return MatchOperand_Success;
2344   }
2345   return MatchOperand_ParseFail;
2346 }
2347 
2348 OperandMatchResultTy
2349 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2350   auto res = parseReg(Operands);
2351   if (res != MatchOperand_NoMatch) {
2352     return res;
2353   } else if (isModifier()) {
2354     return MatchOperand_NoMatch;
2355   } else {
2356     return parseImm(Operands, HasSP3AbsMod);
2357   }
2358 }
2359 
2360 bool
2361 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2362   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2363     const auto &str = Token.getString();
2364     return str == "abs" || str == "neg" || str == "sext";
2365   }
2366   return false;
2367 }
2368 
2369 bool
2370 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2371   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2372 }
2373 
2374 bool
2375 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2376   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2377 }
2378 
2379 bool
2380 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2381   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2382 }
2383 
2384 // Check if this is an operand modifier or an opcode modifier
2385 // which may look like an expression but it is not. We should
2386 // avoid parsing these modifiers as expressions. Currently
2387 // recognized sequences are:
2388 //   |...|
2389 //   abs(...)
2390 //   neg(...)
2391 //   sext(...)
2392 //   -reg
2393 //   -|...|
2394 //   -abs(...)
2395 //   name:...
2396 // Note that simple opcode modifiers like 'gds' may be parsed as
2397 // expressions; this is a special case. See getExpressionAsToken.
2398 //
2399 bool
2400 AMDGPUAsmParser::isModifier() {
2401 
2402   AsmToken Tok = getToken();
2403   AsmToken NextToken[2];
2404   peekTokens(NextToken);
2405 
2406   return isOperandModifier(Tok, NextToken[0]) ||
2407          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2408          isOpcodeModifierWithVal(Tok, NextToken[0]);
2409 }
2410 
2411 // Check if the current token is an SP3 'neg' modifier.
2412 // Currently this modifier is allowed in the following context:
2413 //
2414 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2415 // 2. Before an 'abs' modifier: -abs(...)
2416 // 3. Before an SP3 'abs' modifier: -|...|
2417 //
2418 // In all other cases "-" is handled as a part
2419 // of an expression that follows the sign.
2420 //
2421 // Note: When "-" is followed by an integer literal,
2422 // this is interpreted as integer negation rather
2423 // than a floating-point NEG modifier applied to N.
2424 // Beside being contr-intuitive, such use of floating-point
2425 // NEG modifier would have resulted in different meaning
2426 // of integer literals used with VOP1/2/C and VOP3,
2427 // for example:
2428 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2429 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2430 // Negative fp literals with preceding "-" are
2431 // handled likewise for unifomtity
2432 //
2433 bool
2434 AMDGPUAsmParser::parseSP3NegModifier() {
2435 
2436   AsmToken NextToken[2];
2437   peekTokens(NextToken);
2438 
2439   if (isToken(AsmToken::Minus) &&
2440       (isRegister(NextToken[0], NextToken[1]) ||
2441        NextToken[0].is(AsmToken::Pipe) ||
2442        isId(NextToken[0], "abs"))) {
2443     lex();
2444     return true;
2445   }
2446 
2447   return false;
2448 }
2449 
2450 OperandMatchResultTy
2451 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2452                                               bool AllowImm) {
2453   bool Neg, SP3Neg;
2454   bool Abs, SP3Abs;
2455   SMLoc Loc;
2456 
2457   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2458   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2459     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2460     return MatchOperand_ParseFail;
2461   }
2462 
2463   SP3Neg = parseSP3NegModifier();
2464 
2465   Loc = getLoc();
2466   Neg = trySkipId("neg");
2467   if (Neg && SP3Neg) {
2468     Error(Loc, "expected register or immediate");
2469     return MatchOperand_ParseFail;
2470   }
2471   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2472     return MatchOperand_ParseFail;
2473 
2474   Abs = trySkipId("abs");
2475   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2476     return MatchOperand_ParseFail;
2477 
2478   Loc = getLoc();
2479   SP3Abs = trySkipToken(AsmToken::Pipe);
2480   if (Abs && SP3Abs) {
2481     Error(Loc, "expected register or immediate");
2482     return MatchOperand_ParseFail;
2483   }
2484 
2485   OperandMatchResultTy Res;
2486   if (AllowImm) {
2487     Res = parseRegOrImm(Operands, SP3Abs);
2488   } else {
2489     Res = parseReg(Operands);
2490   }
2491   if (Res != MatchOperand_Success) {
2492     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2493   }
2494 
2495   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2496     return MatchOperand_ParseFail;
2497   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2498     return MatchOperand_ParseFail;
2499   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2500     return MatchOperand_ParseFail;
2501 
2502   AMDGPUOperand::Modifiers Mods;
2503   Mods.Abs = Abs || SP3Abs;
2504   Mods.Neg = Neg || SP3Neg;
2505 
2506   if (Mods.hasFPModifiers()) {
2507     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2508     if (Op.isExpr()) {
2509       Error(Op.getStartLoc(), "expected an absolute expression");
2510       return MatchOperand_ParseFail;
2511     }
2512     Op.setModifiers(Mods);
2513   }
2514   return MatchOperand_Success;
2515 }
2516 
2517 OperandMatchResultTy
2518 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2519                                                bool AllowImm) {
2520   bool Sext = trySkipId("sext");
2521   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2522     return MatchOperand_ParseFail;
2523 
2524   OperandMatchResultTy Res;
2525   if (AllowImm) {
2526     Res = parseRegOrImm(Operands);
2527   } else {
2528     Res = parseReg(Operands);
2529   }
2530   if (Res != MatchOperand_Success) {
2531     return Sext? MatchOperand_ParseFail : Res;
2532   }
2533 
2534   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2535     return MatchOperand_ParseFail;
2536 
2537   AMDGPUOperand::Modifiers Mods;
2538   Mods.Sext = Sext;
2539 
2540   if (Mods.hasIntModifiers()) {
2541     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2542     if (Op.isExpr()) {
2543       Error(Op.getStartLoc(), "expected an absolute expression");
2544       return MatchOperand_ParseFail;
2545     }
2546     Op.setModifiers(Mods);
2547   }
2548 
2549   return MatchOperand_Success;
2550 }
2551 
2552 OperandMatchResultTy
2553 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2554   return parseRegOrImmWithFPInputMods(Operands, false);
2555 }
2556 
2557 OperandMatchResultTy
2558 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2559   return parseRegOrImmWithIntInputMods(Operands, false);
2560 }
2561 
2562 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2563   auto Loc = getLoc();
2564   if (trySkipId("off")) {
2565     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2566                                                 AMDGPUOperand::ImmTyOff, false));
2567     return MatchOperand_Success;
2568   }
2569 
2570   if (!isRegister())
2571     return MatchOperand_NoMatch;
2572 
2573   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2574   if (Reg) {
2575     Operands.push_back(std::move(Reg));
2576     return MatchOperand_Success;
2577   }
2578 
2579   return MatchOperand_ParseFail;
2580 
2581 }
2582 
2583 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2584   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2585 
2586   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2587       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2588       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2589       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2590     return Match_InvalidOperand;
2591 
2592   if ((TSFlags & SIInstrFlags::VOP3) &&
2593       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2594       getForcedEncodingSize() != 64)
2595     return Match_PreferE32;
2596 
2597   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2598       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2599     // v_mac_f32/16 allow only dst_sel == DWORD;
2600     auto OpNum =
2601         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2602     const auto &Op = Inst.getOperand(OpNum);
2603     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2604       return Match_InvalidOperand;
2605     }
2606   }
2607 
2608   return Match_Success;
2609 }
2610 
2611 // What asm variants we should check
2612 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2613   if (getForcedEncodingSize() == 32) {
2614     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2615     return makeArrayRef(Variants);
2616   }
2617 
2618   if (isForcedVOP3()) {
2619     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2620     return makeArrayRef(Variants);
2621   }
2622 
2623   if (isForcedSDWA()) {
2624     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2625                                         AMDGPUAsmVariants::SDWA9};
2626     return makeArrayRef(Variants);
2627   }
2628 
2629   if (isForcedDPP()) {
2630     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2631     return makeArrayRef(Variants);
2632   }
2633 
2634   static const unsigned Variants[] = {
2635     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2636     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2637   };
2638 
2639   return makeArrayRef(Variants);
2640 }
2641 
2642 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2643   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2644   const unsigned Num = Desc.getNumImplicitUses();
2645   for (unsigned i = 0; i < Num; ++i) {
2646     unsigned Reg = Desc.ImplicitUses[i];
2647     switch (Reg) {
2648     case AMDGPU::FLAT_SCR:
2649     case AMDGPU::VCC:
2650     case AMDGPU::VCC_LO:
2651     case AMDGPU::VCC_HI:
2652     case AMDGPU::M0:
2653     case AMDGPU::SGPR_NULL:
2654       return Reg;
2655     default:
2656       break;
2657     }
2658   }
2659   return AMDGPU::NoRegister;
2660 }
2661 
2662 // NB: This code is correct only when used to check constant
2663 // bus limitations because GFX7 support no f16 inline constants.
2664 // Note that there are no cases when a GFX7 opcode violates
2665 // constant bus limitations due to the use of an f16 constant.
2666 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2667                                        unsigned OpIdx) const {
2668   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2669 
2670   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2671     return false;
2672   }
2673 
2674   const MCOperand &MO = Inst.getOperand(OpIdx);
2675 
2676   int64_t Val = MO.getImm();
2677   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2678 
2679   switch (OpSize) { // expected operand size
2680   case 8:
2681     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2682   case 4:
2683     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2684   case 2: {
2685     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2686     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2687         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2688         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2689         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2690         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2691         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2692       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2693     } else {
2694       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2695     }
2696   }
2697   default:
2698     llvm_unreachable("invalid operand size");
2699   }
2700 }
2701 
2702 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2703   const MCOperand &MO = Inst.getOperand(OpIdx);
2704   if (MO.isImm()) {
2705     return !isInlineConstant(Inst, OpIdx);
2706   }
2707   return !MO.isReg() ||
2708          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2709 }
2710 
2711 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2712   const unsigned Opcode = Inst.getOpcode();
2713   const MCInstrDesc &Desc = MII.get(Opcode);
2714   unsigned ConstantBusUseCount = 0;
2715   unsigned NumLiterals = 0;
2716   unsigned LiteralSize;
2717 
2718   if (Desc.TSFlags &
2719       (SIInstrFlags::VOPC |
2720        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2721        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2722        SIInstrFlags::SDWA)) {
2723     // Check special imm operands (used by madmk, etc)
2724     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2725       ++ConstantBusUseCount;
2726     }
2727 
2728     SmallDenseSet<unsigned> SGPRsUsed;
2729     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2730     if (SGPRUsed != AMDGPU::NoRegister) {
2731       SGPRsUsed.insert(SGPRUsed);
2732       ++ConstantBusUseCount;
2733     }
2734 
2735     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2736     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2737     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2738 
2739     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2740 
2741     for (int OpIdx : OpIndices) {
2742       if (OpIdx == -1) break;
2743 
2744       const MCOperand &MO = Inst.getOperand(OpIdx);
2745       if (usesConstantBus(Inst, OpIdx)) {
2746         if (MO.isReg()) {
2747           const unsigned Reg = mc2PseudoReg(MO.getReg());
2748           // Pairs of registers with a partial intersections like these
2749           //   s0, s[0:1]
2750           //   flat_scratch_lo, flat_scratch
2751           //   flat_scratch_lo, flat_scratch_hi
2752           // are theoretically valid but they are disabled anyway.
2753           // Note that this code mimics SIInstrInfo::verifyInstruction
2754           if (!SGPRsUsed.count(Reg)) {
2755             SGPRsUsed.insert(Reg);
2756             ++ConstantBusUseCount;
2757           }
2758         } else { // Expression or a literal
2759 
2760           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2761             continue; // special operand like VINTERP attr_chan
2762 
2763           // An instruction may use only one literal.
2764           // This has been validated on the previous step.
2765           // See validateVOP3Literal.
2766           // This literal may be used as more than one operand.
2767           // If all these operands are of the same size,
2768           // this literal counts as one scalar value.
2769           // Otherwise it counts as 2 scalar values.
2770           // See "GFX10 Shader Programming", section 3.6.2.3.
2771 
2772           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2773           if (Size < 4) Size = 4;
2774 
2775           if (NumLiterals == 0) {
2776             NumLiterals = 1;
2777             LiteralSize = Size;
2778           } else if (LiteralSize != Size) {
2779             NumLiterals = 2;
2780           }
2781         }
2782       }
2783     }
2784   }
2785   ConstantBusUseCount += NumLiterals;
2786 
2787   if (isGFX10())
2788     return ConstantBusUseCount <= 2;
2789 
2790   return ConstantBusUseCount <= 1;
2791 }
2792 
2793 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2794   const unsigned Opcode = Inst.getOpcode();
2795   const MCInstrDesc &Desc = MII.get(Opcode);
2796 
2797   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2798   if (DstIdx == -1 ||
2799       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2800     return true;
2801   }
2802 
2803   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2804 
2805   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2806   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2807   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2808 
2809   assert(DstIdx != -1);
2810   const MCOperand &Dst = Inst.getOperand(DstIdx);
2811   assert(Dst.isReg());
2812   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2813 
2814   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2815 
2816   for (int SrcIdx : SrcIndices) {
2817     if (SrcIdx == -1) break;
2818     const MCOperand &Src = Inst.getOperand(SrcIdx);
2819     if (Src.isReg()) {
2820       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2821       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2822         return false;
2823       }
2824     }
2825   }
2826 
2827   return true;
2828 }
2829 
2830 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2831 
2832   const unsigned Opc = Inst.getOpcode();
2833   const MCInstrDesc &Desc = MII.get(Opc);
2834 
2835   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2836     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2837     assert(ClampIdx != -1);
2838     return Inst.getOperand(ClampIdx).getImm() == 0;
2839   }
2840 
2841   return true;
2842 }
2843 
2844 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2845 
2846   const unsigned Opc = Inst.getOpcode();
2847   const MCInstrDesc &Desc = MII.get(Opc);
2848 
2849   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2850     return true;
2851 
2852   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2853   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2854   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2855 
2856   assert(VDataIdx != -1);
2857   assert(DMaskIdx != -1);
2858   assert(TFEIdx != -1);
2859 
2860   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2861   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2862   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2863   if (DMask == 0)
2864     DMask = 1;
2865 
2866   unsigned DataSize =
2867     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2868   if (hasPackedD16()) {
2869     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2870     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2871       DataSize = (DataSize + 1) / 2;
2872   }
2873 
2874   return (VDataSize / 4) == DataSize + TFESize;
2875 }
2876 
2877 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2878   const unsigned Opc = Inst.getOpcode();
2879   const MCInstrDesc &Desc = MII.get(Opc);
2880 
2881   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2882     return true;
2883 
2884   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2885   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2886       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2887   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2888   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2889   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2890 
2891   assert(VAddr0Idx != -1);
2892   assert(SrsrcIdx != -1);
2893   assert(DimIdx != -1);
2894   assert(SrsrcIdx > VAddr0Idx);
2895 
2896   unsigned Dim = Inst.getOperand(DimIdx).getImm();
2897   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2898   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2899   unsigned VAddrSize =
2900       IsNSA ? SrsrcIdx - VAddr0Idx
2901             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2902 
2903   unsigned AddrSize = BaseOpcode->NumExtraArgs +
2904                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2905                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2906                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2907   if (!IsNSA) {
2908     if (AddrSize > 8)
2909       AddrSize = 16;
2910     else if (AddrSize > 4)
2911       AddrSize = 8;
2912   }
2913 
2914   return VAddrSize == AddrSize;
2915 }
2916 
2917 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2918 
2919   const unsigned Opc = Inst.getOpcode();
2920   const MCInstrDesc &Desc = MII.get(Opc);
2921 
2922   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2923     return true;
2924   if (!Desc.mayLoad() || !Desc.mayStore())
2925     return true; // Not atomic
2926 
2927   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2928   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2929 
2930   // This is an incomplete check because image_atomic_cmpswap
2931   // may only use 0x3 and 0xf while other atomic operations
2932   // may use 0x1 and 0x3. However these limitations are
2933   // verified when we check that dmask matches dst size.
2934   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2935 }
2936 
2937 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2938 
2939   const unsigned Opc = Inst.getOpcode();
2940   const MCInstrDesc &Desc = MII.get(Opc);
2941 
2942   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2943     return true;
2944 
2945   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2946   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2947 
2948   // GATHER4 instructions use dmask in a different fashion compared to
2949   // other MIMG instructions. The only useful DMASK values are
2950   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2951   // (red,red,red,red) etc.) The ISA document doesn't mention
2952   // this.
2953   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2954 }
2955 
2956 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2957 
2958   const unsigned Opc = Inst.getOpcode();
2959   const MCInstrDesc &Desc = MII.get(Opc);
2960 
2961   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2962     return true;
2963 
2964   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2965   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2966     if (isCI() || isSI())
2967       return false;
2968   }
2969 
2970   return true;
2971 }
2972 
2973 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
2974   const unsigned Opc = Inst.getOpcode();
2975   const MCInstrDesc &Desc = MII.get(Opc);
2976 
2977   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2978     return true;
2979 
2980   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2981   if (DimIdx < 0)
2982     return true;
2983 
2984   long Imm = Inst.getOperand(DimIdx).getImm();
2985   if (Imm < 0 || Imm >= 8)
2986     return false;
2987 
2988   return true;
2989 }
2990 
2991 static bool IsRevOpcode(const unsigned Opcode)
2992 {
2993   switch (Opcode) {
2994   case AMDGPU::V_SUBREV_F32_e32:
2995   case AMDGPU::V_SUBREV_F32_e64:
2996   case AMDGPU::V_SUBREV_F32_e32_gfx10:
2997   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
2998   case AMDGPU::V_SUBREV_F32_e32_vi:
2999   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3000   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3001   case AMDGPU::V_SUBREV_F32_e64_vi:
3002 
3003   case AMDGPU::V_SUBREV_I32_e32:
3004   case AMDGPU::V_SUBREV_I32_e64:
3005   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3006   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3007 
3008   case AMDGPU::V_SUBBREV_U32_e32:
3009   case AMDGPU::V_SUBBREV_U32_e64:
3010   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3011   case AMDGPU::V_SUBBREV_U32_e32_vi:
3012   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3013   case AMDGPU::V_SUBBREV_U32_e64_vi:
3014 
3015   case AMDGPU::V_SUBREV_U32_e32:
3016   case AMDGPU::V_SUBREV_U32_e64:
3017   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3018   case AMDGPU::V_SUBREV_U32_e32_vi:
3019   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3020   case AMDGPU::V_SUBREV_U32_e64_vi:
3021 
3022   case AMDGPU::V_SUBREV_F16_e32:
3023   case AMDGPU::V_SUBREV_F16_e64:
3024   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3025   case AMDGPU::V_SUBREV_F16_e32_vi:
3026   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3027   case AMDGPU::V_SUBREV_F16_e64_vi:
3028 
3029   case AMDGPU::V_SUBREV_U16_e32:
3030   case AMDGPU::V_SUBREV_U16_e64:
3031   case AMDGPU::V_SUBREV_U16_e32_vi:
3032   case AMDGPU::V_SUBREV_U16_e64_vi:
3033 
3034   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3035   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3036   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3037 
3038   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3039   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3040 
3041   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3042   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3043 
3044   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3045   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3046 
3047   case AMDGPU::V_LSHRREV_B32_e32:
3048   case AMDGPU::V_LSHRREV_B32_e64:
3049   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3050   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3051   case AMDGPU::V_LSHRREV_B32_e32_vi:
3052   case AMDGPU::V_LSHRREV_B32_e64_vi:
3053   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3054   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3055 
3056   case AMDGPU::V_ASHRREV_I32_e32:
3057   case AMDGPU::V_ASHRREV_I32_e64:
3058   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3059   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3060   case AMDGPU::V_ASHRREV_I32_e32_vi:
3061   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3062   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3063   case AMDGPU::V_ASHRREV_I32_e64_vi:
3064 
3065   case AMDGPU::V_LSHLREV_B32_e32:
3066   case AMDGPU::V_LSHLREV_B32_e64:
3067   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3068   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3069   case AMDGPU::V_LSHLREV_B32_e32_vi:
3070   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3071   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3072   case AMDGPU::V_LSHLREV_B32_e64_vi:
3073 
3074   case AMDGPU::V_LSHLREV_B16_e32:
3075   case AMDGPU::V_LSHLREV_B16_e64:
3076   case AMDGPU::V_LSHLREV_B16_e32_vi:
3077   case AMDGPU::V_LSHLREV_B16_e64_vi:
3078   case AMDGPU::V_LSHLREV_B16_gfx10:
3079 
3080   case AMDGPU::V_LSHRREV_B16_e32:
3081   case AMDGPU::V_LSHRREV_B16_e64:
3082   case AMDGPU::V_LSHRREV_B16_e32_vi:
3083   case AMDGPU::V_LSHRREV_B16_e64_vi:
3084   case AMDGPU::V_LSHRREV_B16_gfx10:
3085 
3086   case AMDGPU::V_ASHRREV_I16_e32:
3087   case AMDGPU::V_ASHRREV_I16_e64:
3088   case AMDGPU::V_ASHRREV_I16_e32_vi:
3089   case AMDGPU::V_ASHRREV_I16_e64_vi:
3090   case AMDGPU::V_ASHRREV_I16_gfx10:
3091 
3092   case AMDGPU::V_LSHLREV_B64:
3093   case AMDGPU::V_LSHLREV_B64_gfx10:
3094   case AMDGPU::V_LSHLREV_B64_vi:
3095 
3096   case AMDGPU::V_LSHRREV_B64:
3097   case AMDGPU::V_LSHRREV_B64_gfx10:
3098   case AMDGPU::V_LSHRREV_B64_vi:
3099 
3100   case AMDGPU::V_ASHRREV_I64:
3101   case AMDGPU::V_ASHRREV_I64_gfx10:
3102   case AMDGPU::V_ASHRREV_I64_vi:
3103 
3104   case AMDGPU::V_PK_LSHLREV_B16:
3105   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3106   case AMDGPU::V_PK_LSHLREV_B16_vi:
3107 
3108   case AMDGPU::V_PK_LSHRREV_B16:
3109   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3110   case AMDGPU::V_PK_LSHRREV_B16_vi:
3111   case AMDGPU::V_PK_ASHRREV_I16:
3112   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3113   case AMDGPU::V_PK_ASHRREV_I16_vi:
3114     return true;
3115   default:
3116     return false;
3117   }
3118 }
3119 
3120 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3121 
3122   using namespace SIInstrFlags;
3123   const unsigned Opcode = Inst.getOpcode();
3124   const MCInstrDesc &Desc = MII.get(Opcode);
3125 
3126   // lds_direct register is defined so that it can be used
3127   // with 9-bit operands only. Ignore encodings which do not accept these.
3128   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3129     return true;
3130 
3131   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3132   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3133   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3134 
3135   const int SrcIndices[] = { Src1Idx, Src2Idx };
3136 
3137   // lds_direct cannot be specified as either src1 or src2.
3138   for (int SrcIdx : SrcIndices) {
3139     if (SrcIdx == -1) break;
3140     const MCOperand &Src = Inst.getOperand(SrcIdx);
3141     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3142       return false;
3143     }
3144   }
3145 
3146   if (Src0Idx == -1)
3147     return true;
3148 
3149   const MCOperand &Src = Inst.getOperand(Src0Idx);
3150   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3151     return true;
3152 
3153   // lds_direct is specified as src0. Check additional limitations.
3154   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3155 }
3156 
3157 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3158   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3159     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3160     if (Op.isFlatOffset())
3161       return Op.getStartLoc();
3162   }
3163   return getLoc();
3164 }
3165 
3166 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3167                                          const OperandVector &Operands) {
3168   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3169   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3170     return true;
3171 
3172   auto Opcode = Inst.getOpcode();
3173   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3174   assert(OpNum != -1);
3175 
3176   const auto &Op = Inst.getOperand(OpNum);
3177   if (!hasFlatOffsets() && Op.getImm() != 0) {
3178     Error(getFlatOffsetLoc(Operands),
3179           "flat offset modifier is not supported on this GPU");
3180     return false;
3181   }
3182 
3183   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3184   // For FLAT segment the offset must be positive;
3185   // MSB is ignored and forced to zero.
3186   unsigned OffsetSize = isGFX9() ? 13 : 12;
3187   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3188     if (!isIntN(OffsetSize, Op.getImm())) {
3189       Error(getFlatOffsetLoc(Operands),
3190             isGFX9() ? "expected a 13-bit signed offset" :
3191                        "expected a 12-bit signed offset");
3192       return false;
3193     }
3194   } else {
3195     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3196       Error(getFlatOffsetLoc(Operands),
3197             isGFX9() ? "expected a 12-bit unsigned offset" :
3198                        "expected an 11-bit unsigned offset");
3199       return false;
3200     }
3201   }
3202 
3203   return true;
3204 }
3205 
3206 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3207   unsigned Opcode = Inst.getOpcode();
3208   const MCInstrDesc &Desc = MII.get(Opcode);
3209   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3210     return true;
3211 
3212   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3213   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3214 
3215   const int OpIndices[] = { Src0Idx, Src1Idx };
3216 
3217   unsigned NumLiterals = 0;
3218   uint32_t LiteralValue;
3219 
3220   for (int OpIdx : OpIndices) {
3221     if (OpIdx == -1) break;
3222 
3223     const MCOperand &MO = Inst.getOperand(OpIdx);
3224     if (MO.isImm() &&
3225         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3226         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
3227         !isInlineConstant(Inst, OpIdx)) {
3228       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3229       if (NumLiterals == 0 || LiteralValue != Value) {
3230         LiteralValue = Value;
3231         ++NumLiterals;
3232       }
3233     }
3234   }
3235 
3236   return NumLiterals <= 1;
3237 }
3238 
3239 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3240   const unsigned Opc = Inst.getOpcode();
3241   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3242       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3243     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3244     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3245 
3246     if (OpSel & ~3)
3247       return false;
3248   }
3249   return true;
3250 }
3251 
3252 // Check if VCC register matches wavefront size
3253 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3254   auto FB = getFeatureBits();
3255   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3256     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3257 }
3258 
3259 // VOP3 literal is only allowed in GFX10+ and only one can be used
3260 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3261   unsigned Opcode = Inst.getOpcode();
3262   const MCInstrDesc &Desc = MII.get(Opcode);
3263   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3264     return true;
3265 
3266   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3267   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3268   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3269 
3270   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3271 
3272   unsigned NumLiterals = 0;
3273   uint32_t LiteralValue;
3274 
3275   for (int OpIdx : OpIndices) {
3276     if (OpIdx == -1) break;
3277 
3278     const MCOperand &MO = Inst.getOperand(OpIdx);
3279     if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
3280       continue;
3281 
3282     if (!isInlineConstant(Inst, OpIdx)) {
3283       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3284       if (NumLiterals == 0 || LiteralValue != Value) {
3285         LiteralValue = Value;
3286         ++NumLiterals;
3287       }
3288     }
3289   }
3290 
3291   return !NumLiterals ||
3292          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3293 }
3294 
3295 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3296                                           const SMLoc &IDLoc,
3297                                           const OperandVector &Operands) {
3298   if (!validateLdsDirect(Inst)) {
3299     Error(IDLoc,
3300       "invalid use of lds_direct");
3301     return false;
3302   }
3303   if (!validateSOPLiteral(Inst)) {
3304     Error(IDLoc,
3305       "only one literal operand is allowed");
3306     return false;
3307   }
3308   if (!validateVOP3Literal(Inst)) {
3309     Error(IDLoc,
3310       "invalid literal operand");
3311     return false;
3312   }
3313   if (!validateConstantBusLimitations(Inst)) {
3314     Error(IDLoc,
3315       "invalid operand (violates constant bus restrictions)");
3316     return false;
3317   }
3318   if (!validateEarlyClobberLimitations(Inst)) {
3319     Error(IDLoc,
3320       "destination must be different than all sources");
3321     return false;
3322   }
3323   if (!validateIntClampSupported(Inst)) {
3324     Error(IDLoc,
3325       "integer clamping is not supported on this GPU");
3326     return false;
3327   }
3328   if (!validateOpSel(Inst)) {
3329     Error(IDLoc,
3330       "invalid op_sel operand");
3331     return false;
3332   }
3333   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3334   if (!validateMIMGD16(Inst)) {
3335     Error(IDLoc,
3336       "d16 modifier is not supported on this GPU");
3337     return false;
3338   }
3339   if (!validateMIMGDim(Inst)) {
3340     Error(IDLoc, "dim modifier is required on this GPU");
3341     return false;
3342   }
3343   if (!validateMIMGDataSize(Inst)) {
3344     Error(IDLoc,
3345       "image data size does not match dmask and tfe");
3346     return false;
3347   }
3348   if (!validateMIMGAddrSize(Inst)) {
3349     Error(IDLoc,
3350       "image address size does not match dim and a16");
3351     return false;
3352   }
3353   if (!validateMIMGAtomicDMask(Inst)) {
3354     Error(IDLoc,
3355       "invalid atomic image dmask");
3356     return false;
3357   }
3358   if (!validateMIMGGatherDMask(Inst)) {
3359     Error(IDLoc,
3360       "invalid image_gather dmask: only one bit must be set");
3361     return false;
3362   }
3363   if (!validateFlatOffset(Inst, Operands)) {
3364     return false;
3365   }
3366 
3367   return true;
3368 }
3369 
3370 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3371                                             const FeatureBitset &FBS,
3372                                             unsigned VariantID = 0);
3373 
3374 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3375                                               OperandVector &Operands,
3376                                               MCStreamer &Out,
3377                                               uint64_t &ErrorInfo,
3378                                               bool MatchingInlineAsm) {
3379   MCInst Inst;
3380   unsigned Result = Match_Success;
3381   for (auto Variant : getMatchedVariants()) {
3382     uint64_t EI;
3383     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3384                                   Variant);
3385     // We order match statuses from least to most specific. We use most specific
3386     // status as resulting
3387     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3388     if ((R == Match_Success) ||
3389         (R == Match_PreferE32) ||
3390         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3391         (R == Match_InvalidOperand && Result != Match_MissingFeature
3392                                    && Result != Match_PreferE32) ||
3393         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3394                                    && Result != Match_MissingFeature
3395                                    && Result != Match_PreferE32)) {
3396       Result = R;
3397       ErrorInfo = EI;
3398     }
3399     if (R == Match_Success)
3400       break;
3401   }
3402 
3403   switch (Result) {
3404   default: break;
3405   case Match_Success:
3406     if (!validateInstruction(Inst, IDLoc, Operands)) {
3407       return true;
3408     }
3409     Inst.setLoc(IDLoc);
3410     Out.EmitInstruction(Inst, getSTI());
3411     return false;
3412 
3413   case Match_MissingFeature:
3414     return Error(IDLoc, "instruction not supported on this GPU");
3415 
3416   case Match_MnemonicFail: {
3417     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3418     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3419         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3420     return Error(IDLoc, "invalid instruction" + Suggestion,
3421                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3422   }
3423 
3424   case Match_InvalidOperand: {
3425     SMLoc ErrorLoc = IDLoc;
3426     if (ErrorInfo != ~0ULL) {
3427       if (ErrorInfo >= Operands.size()) {
3428         return Error(IDLoc, "too few operands for instruction");
3429       }
3430       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3431       if (ErrorLoc == SMLoc())
3432         ErrorLoc = IDLoc;
3433     }
3434     return Error(ErrorLoc, "invalid operand for instruction");
3435   }
3436 
3437   case Match_PreferE32:
3438     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3439                         "should be encoded as e32");
3440   }
3441   llvm_unreachable("Implement any new match types added!");
3442 }
3443 
3444 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3445   int64_t Tmp = -1;
3446   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3447     return true;
3448   }
3449   if (getParser().parseAbsoluteExpression(Tmp)) {
3450     return true;
3451   }
3452   Ret = static_cast<uint32_t>(Tmp);
3453   return false;
3454 }
3455 
3456 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3457                                                uint32_t &Minor) {
3458   if (ParseAsAbsoluteExpression(Major))
3459     return TokError("invalid major version");
3460 
3461   if (getLexer().isNot(AsmToken::Comma))
3462     return TokError("minor version number required, comma expected");
3463   Lex();
3464 
3465   if (ParseAsAbsoluteExpression(Minor))
3466     return TokError("invalid minor version");
3467 
3468   return false;
3469 }
3470 
3471 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3472   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3473     return TokError("directive only supported for amdgcn architecture");
3474 
3475   std::string Target;
3476 
3477   SMLoc TargetStart = getTok().getLoc();
3478   if (getParser().parseEscapedString(Target))
3479     return true;
3480   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3481 
3482   std::string ExpectedTarget;
3483   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3484   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3485 
3486   if (Target != ExpectedTargetOS.str())
3487     return getParser().Error(TargetRange.Start, "target must match options",
3488                              TargetRange);
3489 
3490   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3491   return false;
3492 }
3493 
3494 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3495   return getParser().Error(Range.Start, "value out of range", Range);
3496 }
3497 
3498 bool AMDGPUAsmParser::calculateGPRBlocks(
3499     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3500     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3501     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3502     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3503   // TODO(scott.linder): These calculations are duplicated from
3504   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3505   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3506 
3507   unsigned NumVGPRs = NextFreeVGPR;
3508   unsigned NumSGPRs = NextFreeSGPR;
3509 
3510   if (Version.Major >= 10)
3511     NumSGPRs = 0;
3512   else {
3513     unsigned MaxAddressableNumSGPRs =
3514         IsaInfo::getAddressableNumSGPRs(&getSTI());
3515 
3516     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3517         NumSGPRs > MaxAddressableNumSGPRs)
3518       return OutOfRangeError(SGPRRange);
3519 
3520     NumSGPRs +=
3521         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3522 
3523     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3524         NumSGPRs > MaxAddressableNumSGPRs)
3525       return OutOfRangeError(SGPRRange);
3526 
3527     if (Features.test(FeatureSGPRInitBug))
3528       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3529   }
3530 
3531   VGPRBlocks =
3532       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3533   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3534 
3535   return false;
3536 }
3537 
3538 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3539   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3540     return TokError("directive only supported for amdgcn architecture");
3541 
3542   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3543     return TokError("directive only supported for amdhsa OS");
3544 
3545   StringRef KernelName;
3546   if (getParser().parseIdentifier(KernelName))
3547     return true;
3548 
3549   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3550 
3551   StringSet<> Seen;
3552 
3553   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3554 
3555   SMRange VGPRRange;
3556   uint64_t NextFreeVGPR = 0;
3557   SMRange SGPRRange;
3558   uint64_t NextFreeSGPR = 0;
3559   unsigned UserSGPRCount = 0;
3560   bool ReserveVCC = true;
3561   bool ReserveFlatScr = true;
3562   bool ReserveXNACK = hasXNACK();
3563   Optional<bool> EnableWavefrontSize32;
3564 
3565   while (true) {
3566     while (getLexer().is(AsmToken::EndOfStatement))
3567       Lex();
3568 
3569     if (getLexer().isNot(AsmToken::Identifier))
3570       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3571 
3572     StringRef ID = getTok().getIdentifier();
3573     SMRange IDRange = getTok().getLocRange();
3574     Lex();
3575 
3576     if (ID == ".end_amdhsa_kernel")
3577       break;
3578 
3579     if (Seen.find(ID) != Seen.end())
3580       return TokError(".amdhsa_ directives cannot be repeated");
3581     Seen.insert(ID);
3582 
3583     SMLoc ValStart = getTok().getLoc();
3584     int64_t IVal;
3585     if (getParser().parseAbsoluteExpression(IVal))
3586       return true;
3587     SMLoc ValEnd = getTok().getLoc();
3588     SMRange ValRange = SMRange(ValStart, ValEnd);
3589 
3590     if (IVal < 0)
3591       return OutOfRangeError(ValRange);
3592 
3593     uint64_t Val = IVal;
3594 
3595 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3596   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3597     return OutOfRangeError(RANGE);                                             \
3598   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3599 
3600     if (ID == ".amdhsa_group_segment_fixed_size") {
3601       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3602         return OutOfRangeError(ValRange);
3603       KD.group_segment_fixed_size = Val;
3604     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3605       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3606         return OutOfRangeError(ValRange);
3607       KD.private_segment_fixed_size = Val;
3608     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3609       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3610                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3611                        Val, ValRange);
3612       UserSGPRCount += 4;
3613     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3614       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3615                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3616                        ValRange);
3617       UserSGPRCount += 2;
3618     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3619       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3620                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3621                        ValRange);
3622       UserSGPRCount += 2;
3623     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3624       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3625                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3626                        Val, ValRange);
3627       UserSGPRCount += 2;
3628     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3629       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3630                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3631                        ValRange);
3632       UserSGPRCount += 2;
3633     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3634       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3635                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3636                        ValRange);
3637       UserSGPRCount += 2;
3638     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3639       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3640                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3641                        Val, ValRange);
3642       UserSGPRCount += 1;
3643     } else if (ID == ".amdhsa_wavefront_size32") {
3644       if (IVersion.Major < 10)
3645         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3646                                  IDRange);
3647       EnableWavefrontSize32 = Val;
3648       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3649                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3650                        Val, ValRange);
3651     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3652       PARSE_BITS_ENTRY(
3653           KD.compute_pgm_rsrc2,
3654           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3655           ValRange);
3656     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3657       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3658                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3659                        ValRange);
3660     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3661       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3662                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3663                        ValRange);
3664     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3665       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3666                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3667                        ValRange);
3668     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3669       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3670                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3671                        ValRange);
3672     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3673       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3674                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3675                        ValRange);
3676     } else if (ID == ".amdhsa_next_free_vgpr") {
3677       VGPRRange = ValRange;
3678       NextFreeVGPR = Val;
3679     } else if (ID == ".amdhsa_next_free_sgpr") {
3680       SGPRRange = ValRange;
3681       NextFreeSGPR = Val;
3682     } else if (ID == ".amdhsa_reserve_vcc") {
3683       if (!isUInt<1>(Val))
3684         return OutOfRangeError(ValRange);
3685       ReserveVCC = Val;
3686     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3687       if (IVersion.Major < 7)
3688         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3689                                  IDRange);
3690       if (!isUInt<1>(Val))
3691         return OutOfRangeError(ValRange);
3692       ReserveFlatScr = Val;
3693     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3694       if (IVersion.Major < 8)
3695         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3696                                  IDRange);
3697       if (!isUInt<1>(Val))
3698         return OutOfRangeError(ValRange);
3699       ReserveXNACK = Val;
3700     } else if (ID == ".amdhsa_float_round_mode_32") {
3701       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3702                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3703     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3704       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3705                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3706     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3707       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3708                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3709     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3710       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3711                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3712                        ValRange);
3713     } else if (ID == ".amdhsa_dx10_clamp") {
3714       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3715                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3716     } else if (ID == ".amdhsa_ieee_mode") {
3717       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3718                        Val, ValRange);
3719     } else if (ID == ".amdhsa_fp16_overflow") {
3720       if (IVersion.Major < 9)
3721         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3722                                  IDRange);
3723       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3724                        ValRange);
3725     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3726       if (IVersion.Major < 10)
3727         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3728                                  IDRange);
3729       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3730                        ValRange);
3731     } else if (ID == ".amdhsa_memory_ordered") {
3732       if (IVersion.Major < 10)
3733         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3734                                  IDRange);
3735       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3736                        ValRange);
3737     } else if (ID == ".amdhsa_forward_progress") {
3738       if (IVersion.Major < 10)
3739         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3740                                  IDRange);
3741       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3742                        ValRange);
3743     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3744       PARSE_BITS_ENTRY(
3745           KD.compute_pgm_rsrc2,
3746           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3747           ValRange);
3748     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3749       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3750                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3751                        Val, ValRange);
3752     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3753       PARSE_BITS_ENTRY(
3754           KD.compute_pgm_rsrc2,
3755           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3756           ValRange);
3757     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3758       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3759                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3760                        Val, ValRange);
3761     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3762       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3763                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3764                        Val, ValRange);
3765     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3766       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3767                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3768                        Val, ValRange);
3769     } else if (ID == ".amdhsa_exception_int_div_zero") {
3770       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3771                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3772                        Val, ValRange);
3773     } else {
3774       return getParser().Error(IDRange.Start,
3775                                "unknown .amdhsa_kernel directive", IDRange);
3776     }
3777 
3778 #undef PARSE_BITS_ENTRY
3779   }
3780 
3781   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3782     return TokError(".amdhsa_next_free_vgpr directive is required");
3783 
3784   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3785     return TokError(".amdhsa_next_free_sgpr directive is required");
3786 
3787   unsigned VGPRBlocks;
3788   unsigned SGPRBlocks;
3789   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3790                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3791                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3792                          SGPRBlocks))
3793     return true;
3794 
3795   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3796           VGPRBlocks))
3797     return OutOfRangeError(VGPRRange);
3798   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3799                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3800 
3801   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3802           SGPRBlocks))
3803     return OutOfRangeError(SGPRRange);
3804   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3805                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3806                   SGPRBlocks);
3807 
3808   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3809     return TokError("too many user SGPRs enabled");
3810   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3811                   UserSGPRCount);
3812 
3813   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3814       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3815       ReserveFlatScr, ReserveXNACK);
3816   return false;
3817 }
3818 
3819 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3820   uint32_t Major;
3821   uint32_t Minor;
3822 
3823   if (ParseDirectiveMajorMinor(Major, Minor))
3824     return true;
3825 
3826   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3827   return false;
3828 }
3829 
3830 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3831   uint32_t Major;
3832   uint32_t Minor;
3833   uint32_t Stepping;
3834   StringRef VendorName;
3835   StringRef ArchName;
3836 
3837   // If this directive has no arguments, then use the ISA version for the
3838   // targeted GPU.
3839   if (getLexer().is(AsmToken::EndOfStatement)) {
3840     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3841     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3842                                                       ISA.Stepping,
3843                                                       "AMD", "AMDGPU");
3844     return false;
3845   }
3846 
3847   if (ParseDirectiveMajorMinor(Major, Minor))
3848     return true;
3849 
3850   if (getLexer().isNot(AsmToken::Comma))
3851     return TokError("stepping version number required, comma expected");
3852   Lex();
3853 
3854   if (ParseAsAbsoluteExpression(Stepping))
3855     return TokError("invalid stepping version");
3856 
3857   if (getLexer().isNot(AsmToken::Comma))
3858     return TokError("vendor name required, comma expected");
3859   Lex();
3860 
3861   if (getLexer().isNot(AsmToken::String))
3862     return TokError("invalid vendor name");
3863 
3864   VendorName = getLexer().getTok().getStringContents();
3865   Lex();
3866 
3867   if (getLexer().isNot(AsmToken::Comma))
3868     return TokError("arch name required, comma expected");
3869   Lex();
3870 
3871   if (getLexer().isNot(AsmToken::String))
3872     return TokError("invalid arch name");
3873 
3874   ArchName = getLexer().getTok().getStringContents();
3875   Lex();
3876 
3877   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3878                                                     VendorName, ArchName);
3879   return false;
3880 }
3881 
3882 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3883                                                amd_kernel_code_t &Header) {
3884   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3885   // assembly for backwards compatibility.
3886   if (ID == "max_scratch_backing_memory_byte_size") {
3887     Parser.eatToEndOfStatement();
3888     return false;
3889   }
3890 
3891   SmallString<40> ErrStr;
3892   raw_svector_ostream Err(ErrStr);
3893   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3894     return TokError(Err.str());
3895   }
3896   Lex();
3897 
3898   if (ID == "enable_wavefront_size32") {
3899     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
3900       if (!isGFX10())
3901         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
3902       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3903         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
3904     } else {
3905       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3906         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
3907     }
3908   }
3909 
3910   if (ID == "wavefront_size") {
3911     if (Header.wavefront_size == 5) {
3912       if (!isGFX10())
3913         return TokError("wavefront_size=5 is only allowed on GFX10+");
3914       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3915         return TokError("wavefront_size=5 requires +WavefrontSize32");
3916     } else if (Header.wavefront_size == 6) {
3917       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3918         return TokError("wavefront_size=6 requires +WavefrontSize64");
3919     }
3920   }
3921 
3922   if (ID == "enable_wgp_mode") {
3923     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
3924       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3925   }
3926 
3927   if (ID == "enable_mem_ordered") {
3928     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
3929       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3930   }
3931 
3932   if (ID == "enable_fwd_progress") {
3933     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
3934       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3935   }
3936 
3937   return false;
3938 }
3939 
3940 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3941   amd_kernel_code_t Header;
3942   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3943 
3944   while (true) {
3945     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3946     // will set the current token to EndOfStatement.
3947     while(getLexer().is(AsmToken::EndOfStatement))
3948       Lex();
3949 
3950     if (getLexer().isNot(AsmToken::Identifier))
3951       return TokError("expected value identifier or .end_amd_kernel_code_t");
3952 
3953     StringRef ID = getLexer().getTok().getIdentifier();
3954     Lex();
3955 
3956     if (ID == ".end_amd_kernel_code_t")
3957       break;
3958 
3959     if (ParseAMDKernelCodeTValue(ID, Header))
3960       return true;
3961   }
3962 
3963   getTargetStreamer().EmitAMDKernelCodeT(Header);
3964 
3965   return false;
3966 }
3967 
3968 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3969   if (getLexer().isNot(AsmToken::Identifier))
3970     return TokError("expected symbol name");
3971 
3972   StringRef KernelName = Parser.getTok().getString();
3973 
3974   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3975                                            ELF::STT_AMDGPU_HSA_KERNEL);
3976   Lex();
3977   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3978     KernelScope.initialize(getContext());
3979   return false;
3980 }
3981 
3982 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3983   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3984     return Error(getParser().getTok().getLoc(),
3985                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3986                  "architectures");
3987   }
3988 
3989   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3990 
3991   std::string ISAVersionStringFromSTI;
3992   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3993   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3994 
3995   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3996     return Error(getParser().getTok().getLoc(),
3997                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3998                  "arguments specified through the command line");
3999   }
4000 
4001   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4002   Lex();
4003 
4004   return false;
4005 }
4006 
4007 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4008   const char *AssemblerDirectiveBegin;
4009   const char *AssemblerDirectiveEnd;
4010   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4011       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4012           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4013                             HSAMD::V3::AssemblerDirectiveEnd)
4014           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4015                             HSAMD::AssemblerDirectiveEnd);
4016 
4017   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4018     return Error(getParser().getTok().getLoc(),
4019                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4020                  "not available on non-amdhsa OSes")).str());
4021   }
4022 
4023   std::string HSAMetadataString;
4024   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4025                           HSAMetadataString))
4026     return true;
4027 
4028   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4029     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4030       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4031   } else {
4032     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4033       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4034   }
4035 
4036   return false;
4037 }
4038 
4039 /// Common code to parse out a block of text (typically YAML) between start and
4040 /// end directives.
4041 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4042                                           const char *AssemblerDirectiveEnd,
4043                                           std::string &CollectString) {
4044 
4045   raw_string_ostream CollectStream(CollectString);
4046 
4047   getLexer().setSkipSpace(false);
4048 
4049   bool FoundEnd = false;
4050   while (!getLexer().is(AsmToken::Eof)) {
4051     while (getLexer().is(AsmToken::Space)) {
4052       CollectStream << getLexer().getTok().getString();
4053       Lex();
4054     }
4055 
4056     if (getLexer().is(AsmToken::Identifier)) {
4057       StringRef ID = getLexer().getTok().getIdentifier();
4058       if (ID == AssemblerDirectiveEnd) {
4059         Lex();
4060         FoundEnd = true;
4061         break;
4062       }
4063     }
4064 
4065     CollectStream << Parser.parseStringToEndOfStatement()
4066                   << getContext().getAsmInfo()->getSeparatorString();
4067 
4068     Parser.eatToEndOfStatement();
4069   }
4070 
4071   getLexer().setSkipSpace(true);
4072 
4073   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4074     return TokError(Twine("expected directive ") +
4075                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4076   }
4077 
4078   CollectStream.flush();
4079   return false;
4080 }
4081 
4082 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4083 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4084   std::string String;
4085   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4086                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4087     return true;
4088 
4089   auto PALMetadata = getTargetStreamer().getPALMetadata();
4090   if (!PALMetadata->setFromString(String))
4091     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4092   return false;
4093 }
4094 
4095 /// Parse the assembler directive for old linear-format PAL metadata.
4096 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4097   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4098     return Error(getParser().getTok().getLoc(),
4099                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4100                  "not available on non-amdpal OSes")).str());
4101   }
4102 
4103   auto PALMetadata = getTargetStreamer().getPALMetadata();
4104   PALMetadata->setLegacy();
4105   for (;;) {
4106     uint32_t Key, Value;
4107     if (ParseAsAbsoluteExpression(Key)) {
4108       return TokError(Twine("invalid value in ") +
4109                       Twine(PALMD::AssemblerDirective));
4110     }
4111     if (getLexer().isNot(AsmToken::Comma)) {
4112       return TokError(Twine("expected an even number of values in ") +
4113                       Twine(PALMD::AssemblerDirective));
4114     }
4115     Lex();
4116     if (ParseAsAbsoluteExpression(Value)) {
4117       return TokError(Twine("invalid value in ") +
4118                       Twine(PALMD::AssemblerDirective));
4119     }
4120     PALMetadata->setRegister(Key, Value);
4121     if (getLexer().isNot(AsmToken::Comma))
4122       break;
4123     Lex();
4124   }
4125   return false;
4126 }
4127 
4128 /// ParseDirectiveAMDGPULDS
4129 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4130 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4131   if (getParser().checkForValidSection())
4132     return true;
4133 
4134   StringRef Name;
4135   SMLoc NameLoc = getLexer().getLoc();
4136   if (getParser().parseIdentifier(Name))
4137     return TokError("expected identifier in directive");
4138 
4139   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4140   if (parseToken(AsmToken::Comma, "expected ','"))
4141     return true;
4142 
4143   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4144 
4145   int64_t Size;
4146   SMLoc SizeLoc = getLexer().getLoc();
4147   if (getParser().parseAbsoluteExpression(Size))
4148     return true;
4149   if (Size < 0)
4150     return Error(SizeLoc, "size must be non-negative");
4151   if (Size > LocalMemorySize)
4152     return Error(SizeLoc, "size is too large");
4153 
4154   int64_t Align = 4;
4155   if (getLexer().is(AsmToken::Comma)) {
4156     Lex();
4157     SMLoc AlignLoc = getLexer().getLoc();
4158     if (getParser().parseAbsoluteExpression(Align))
4159       return true;
4160     if (Align < 0 || !isPowerOf2_64(Align))
4161       return Error(AlignLoc, "alignment must be a power of two");
4162 
4163     // Alignment larger than the size of LDS is possible in theory, as long
4164     // as the linker manages to place to symbol at address 0, but we do want
4165     // to make sure the alignment fits nicely into a 32-bit integer.
4166     if (Align >= 1u << 31)
4167       return Error(AlignLoc, "alignment is too large");
4168   }
4169 
4170   if (parseToken(AsmToken::EndOfStatement,
4171                  "unexpected token in '.amdgpu_lds' directive"))
4172     return true;
4173 
4174   Symbol->redefineIfPossible();
4175   if (!Symbol->isUndefined())
4176     return Error(NameLoc, "invalid symbol redefinition");
4177 
4178   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4179   return false;
4180 }
4181 
4182 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4183   StringRef IDVal = DirectiveID.getString();
4184 
4185   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4186     if (IDVal == ".amdgcn_target")
4187       return ParseDirectiveAMDGCNTarget();
4188 
4189     if (IDVal == ".amdhsa_kernel")
4190       return ParseDirectiveAMDHSAKernel();
4191 
4192     // TODO: Restructure/combine with PAL metadata directive.
4193     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4194       return ParseDirectiveHSAMetadata();
4195   } else {
4196     if (IDVal == ".hsa_code_object_version")
4197       return ParseDirectiveHSACodeObjectVersion();
4198 
4199     if (IDVal == ".hsa_code_object_isa")
4200       return ParseDirectiveHSACodeObjectISA();
4201 
4202     if (IDVal == ".amd_kernel_code_t")
4203       return ParseDirectiveAMDKernelCodeT();
4204 
4205     if (IDVal == ".amdgpu_hsa_kernel")
4206       return ParseDirectiveAMDGPUHsaKernel();
4207 
4208     if (IDVal == ".amd_amdgpu_isa")
4209       return ParseDirectiveISAVersion();
4210 
4211     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4212       return ParseDirectiveHSAMetadata();
4213   }
4214 
4215   if (IDVal == ".amdgpu_lds")
4216     return ParseDirectiveAMDGPULDS();
4217 
4218   if (IDVal == PALMD::AssemblerDirectiveBegin)
4219     return ParseDirectivePALMetadataBegin();
4220 
4221   if (IDVal == PALMD::AssemblerDirective)
4222     return ParseDirectivePALMetadata();
4223 
4224   return true;
4225 }
4226 
4227 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4228                                            unsigned RegNo) const {
4229 
4230   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4231        R.isValid(); ++R) {
4232     if (*R == RegNo)
4233       return isGFX9() || isGFX10();
4234   }
4235 
4236   // GFX10 has 2 more SGPRs 104 and 105.
4237   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4238        R.isValid(); ++R) {
4239     if (*R == RegNo)
4240       return hasSGPR104_SGPR105();
4241   }
4242 
4243   switch (RegNo) {
4244   case AMDGPU::SRC_SHARED_BASE:
4245   case AMDGPU::SRC_SHARED_LIMIT:
4246   case AMDGPU::SRC_PRIVATE_BASE:
4247   case AMDGPU::SRC_PRIVATE_LIMIT:
4248   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4249     return !isCI() && !isSI() && !isVI();
4250   case AMDGPU::TBA:
4251   case AMDGPU::TBA_LO:
4252   case AMDGPU::TBA_HI:
4253   case AMDGPU::TMA:
4254   case AMDGPU::TMA_LO:
4255   case AMDGPU::TMA_HI:
4256     return !isGFX9() && !isGFX10();
4257   case AMDGPU::XNACK_MASK:
4258   case AMDGPU::XNACK_MASK_LO:
4259   case AMDGPU::XNACK_MASK_HI:
4260     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4261   case AMDGPU::SGPR_NULL:
4262     return isGFX10();
4263   default:
4264     break;
4265   }
4266 
4267   if (isCI())
4268     return true;
4269 
4270   if (isSI() || isGFX10()) {
4271     // No flat_scr on SI.
4272     // On GFX10 flat scratch is not a valid register operand and can only be
4273     // accessed with s_setreg/s_getreg.
4274     switch (RegNo) {
4275     case AMDGPU::FLAT_SCR:
4276     case AMDGPU::FLAT_SCR_LO:
4277     case AMDGPU::FLAT_SCR_HI:
4278       return false;
4279     default:
4280       return true;
4281     }
4282   }
4283 
4284   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4285   // SI/CI have.
4286   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4287        R.isValid(); ++R) {
4288     if (*R == RegNo)
4289       return hasSGPR102_SGPR103();
4290   }
4291 
4292   return true;
4293 }
4294 
4295 OperandMatchResultTy
4296 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4297                               OperandMode Mode) {
4298   // Try to parse with a custom parser
4299   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4300 
4301   // If we successfully parsed the operand or if there as an error parsing,
4302   // we are done.
4303   //
4304   // If we are parsing after we reach EndOfStatement then this means we
4305   // are appending default values to the Operands list.  This is only done
4306   // by custom parser, so we shouldn't continue on to the generic parsing.
4307   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4308       getLexer().is(AsmToken::EndOfStatement))
4309     return ResTy;
4310 
4311   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4312     unsigned Prefix = Operands.size();
4313     SMLoc LBraceLoc = getTok().getLoc();
4314     Parser.Lex(); // eat the '['
4315 
4316     for (;;) {
4317       ResTy = parseReg(Operands);
4318       if (ResTy != MatchOperand_Success)
4319         return ResTy;
4320 
4321       if (getLexer().is(AsmToken::RBrac))
4322         break;
4323 
4324       if (getLexer().isNot(AsmToken::Comma))
4325         return MatchOperand_ParseFail;
4326       Parser.Lex();
4327     }
4328 
4329     if (Operands.size() - Prefix > 1) {
4330       Operands.insert(Operands.begin() + Prefix,
4331                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4332       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4333                                                     getTok().getLoc()));
4334     }
4335 
4336     Parser.Lex(); // eat the ']'
4337     return MatchOperand_Success;
4338   }
4339 
4340   return parseRegOrImm(Operands);
4341 }
4342 
4343 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4344   // Clear any forced encodings from the previous instruction.
4345   setForcedEncodingSize(0);
4346   setForcedDPP(false);
4347   setForcedSDWA(false);
4348 
4349   if (Name.endswith("_e64")) {
4350     setForcedEncodingSize(64);
4351     return Name.substr(0, Name.size() - 4);
4352   } else if (Name.endswith("_e32")) {
4353     setForcedEncodingSize(32);
4354     return Name.substr(0, Name.size() - 4);
4355   } else if (Name.endswith("_dpp")) {
4356     setForcedDPP(true);
4357     return Name.substr(0, Name.size() - 4);
4358   } else if (Name.endswith("_sdwa")) {
4359     setForcedSDWA(true);
4360     return Name.substr(0, Name.size() - 5);
4361   }
4362   return Name;
4363 }
4364 
4365 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4366                                        StringRef Name,
4367                                        SMLoc NameLoc, OperandVector &Operands) {
4368   // Add the instruction mnemonic
4369   Name = parseMnemonicSuffix(Name);
4370   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4371 
4372   bool IsMIMG = Name.startswith("image_");
4373 
4374   while (!getLexer().is(AsmToken::EndOfStatement)) {
4375     OperandMode Mode = OperandMode_Default;
4376     if (IsMIMG && isGFX10() && Operands.size() == 2)
4377       Mode = OperandMode_NSA;
4378     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4379 
4380     // Eat the comma or space if there is one.
4381     if (getLexer().is(AsmToken::Comma))
4382       Parser.Lex();
4383 
4384     switch (Res) {
4385       case MatchOperand_Success: break;
4386       case MatchOperand_ParseFail:
4387         // FIXME: use real operand location rather than the current location.
4388         Error(getLexer().getLoc(), "failed parsing operand.");
4389         while (!getLexer().is(AsmToken::EndOfStatement)) {
4390           Parser.Lex();
4391         }
4392         return true;
4393       case MatchOperand_NoMatch:
4394         // FIXME: use real operand location rather than the current location.
4395         Error(getLexer().getLoc(), "not a valid operand.");
4396         while (!getLexer().is(AsmToken::EndOfStatement)) {
4397           Parser.Lex();
4398         }
4399         return true;
4400     }
4401   }
4402 
4403   return false;
4404 }
4405 
4406 //===----------------------------------------------------------------------===//
4407 // Utility functions
4408 //===----------------------------------------------------------------------===//
4409 
4410 OperandMatchResultTy
4411 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4412 
4413   if (!trySkipId(Prefix, AsmToken::Colon))
4414     return MatchOperand_NoMatch;
4415 
4416   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4417 }
4418 
4419 OperandMatchResultTy
4420 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4421                                     AMDGPUOperand::ImmTy ImmTy,
4422                                     bool (*ConvertResult)(int64_t&)) {
4423   SMLoc S = getLoc();
4424   int64_t Value = 0;
4425 
4426   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4427   if (Res != MatchOperand_Success)
4428     return Res;
4429 
4430   if (ConvertResult && !ConvertResult(Value)) {
4431     Error(S, "invalid " + StringRef(Prefix) + " value.");
4432   }
4433 
4434   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4435   return MatchOperand_Success;
4436 }
4437 
4438 OperandMatchResultTy
4439 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4440                                              OperandVector &Operands,
4441                                              AMDGPUOperand::ImmTy ImmTy,
4442                                              bool (*ConvertResult)(int64_t&)) {
4443   SMLoc S = getLoc();
4444   if (!trySkipId(Prefix, AsmToken::Colon))
4445     return MatchOperand_NoMatch;
4446 
4447   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4448     return MatchOperand_ParseFail;
4449 
4450   unsigned Val = 0;
4451   const unsigned MaxSize = 4;
4452 
4453   // FIXME: How to verify the number of elements matches the number of src
4454   // operands?
4455   for (int I = 0; ; ++I) {
4456     int64_t Op;
4457     SMLoc Loc = getLoc();
4458     if (!parseExpr(Op))
4459       return MatchOperand_ParseFail;
4460 
4461     if (Op != 0 && Op != 1) {
4462       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4463       return MatchOperand_ParseFail;
4464     }
4465 
4466     Val |= (Op << I);
4467 
4468     if (trySkipToken(AsmToken::RBrac))
4469       break;
4470 
4471     if (I + 1 == MaxSize) {
4472       Error(getLoc(), "expected a closing square bracket");
4473       return MatchOperand_ParseFail;
4474     }
4475 
4476     if (!skipToken(AsmToken::Comma, "expected a comma"))
4477       return MatchOperand_ParseFail;
4478   }
4479 
4480   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4481   return MatchOperand_Success;
4482 }
4483 
4484 OperandMatchResultTy
4485 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4486                                AMDGPUOperand::ImmTy ImmTy) {
4487   int64_t Bit = 0;
4488   SMLoc S = Parser.getTok().getLoc();
4489 
4490   // We are at the end of the statement, and this is a default argument, so
4491   // use a default value.
4492   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4493     switch(getLexer().getKind()) {
4494       case AsmToken::Identifier: {
4495         StringRef Tok = Parser.getTok().getString();
4496         if (Tok == Name) {
4497           if (Tok == "r128" && isGFX9())
4498             Error(S, "r128 modifier is not supported on this GPU");
4499           if (Tok == "a16" && !isGFX9() && !isGFX10())
4500             Error(S, "a16 modifier is not supported on this GPU");
4501           Bit = 1;
4502           Parser.Lex();
4503         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4504           Bit = 0;
4505           Parser.Lex();
4506         } else {
4507           return MatchOperand_NoMatch;
4508         }
4509         break;
4510       }
4511       default:
4512         return MatchOperand_NoMatch;
4513     }
4514   }
4515 
4516   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4517     return MatchOperand_ParseFail;
4518 
4519   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4520   return MatchOperand_Success;
4521 }
4522 
4523 static void addOptionalImmOperand(
4524   MCInst& Inst, const OperandVector& Operands,
4525   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4526   AMDGPUOperand::ImmTy ImmT,
4527   int64_t Default = 0) {
4528   auto i = OptionalIdx.find(ImmT);
4529   if (i != OptionalIdx.end()) {
4530     unsigned Idx = i->second;
4531     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4532   } else {
4533     Inst.addOperand(MCOperand::createImm(Default));
4534   }
4535 }
4536 
4537 OperandMatchResultTy
4538 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4539   if (getLexer().isNot(AsmToken::Identifier)) {
4540     return MatchOperand_NoMatch;
4541   }
4542   StringRef Tok = Parser.getTok().getString();
4543   if (Tok != Prefix) {
4544     return MatchOperand_NoMatch;
4545   }
4546 
4547   Parser.Lex();
4548   if (getLexer().isNot(AsmToken::Colon)) {
4549     return MatchOperand_ParseFail;
4550   }
4551 
4552   Parser.Lex();
4553   if (getLexer().isNot(AsmToken::Identifier)) {
4554     return MatchOperand_ParseFail;
4555   }
4556 
4557   Value = Parser.getTok().getString();
4558   return MatchOperand_Success;
4559 }
4560 
4561 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4562 // values to live in a joint format operand in the MCInst encoding.
4563 OperandMatchResultTy
4564 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4565   SMLoc S = Parser.getTok().getLoc();
4566   int64_t Dfmt = 0, Nfmt = 0;
4567   // dfmt and nfmt can appear in either order, and each is optional.
4568   bool GotDfmt = false, GotNfmt = false;
4569   while (!GotDfmt || !GotNfmt) {
4570     if (!GotDfmt) {
4571       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4572       if (Res != MatchOperand_NoMatch) {
4573         if (Res != MatchOperand_Success)
4574           return Res;
4575         if (Dfmt >= 16) {
4576           Error(Parser.getTok().getLoc(), "out of range dfmt");
4577           return MatchOperand_ParseFail;
4578         }
4579         GotDfmt = true;
4580         Parser.Lex();
4581         continue;
4582       }
4583     }
4584     if (!GotNfmt) {
4585       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4586       if (Res != MatchOperand_NoMatch) {
4587         if (Res != MatchOperand_Success)
4588           return Res;
4589         if (Nfmt >= 8) {
4590           Error(Parser.getTok().getLoc(), "out of range nfmt");
4591           return MatchOperand_ParseFail;
4592         }
4593         GotNfmt = true;
4594         Parser.Lex();
4595         continue;
4596       }
4597     }
4598     break;
4599   }
4600   if (!GotDfmt && !GotNfmt)
4601     return MatchOperand_NoMatch;
4602   auto Format = Dfmt | Nfmt << 4;
4603   Operands.push_back(
4604       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4605   return MatchOperand_Success;
4606 }
4607 
4608 //===----------------------------------------------------------------------===//
4609 // ds
4610 //===----------------------------------------------------------------------===//
4611 
4612 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4613                                     const OperandVector &Operands) {
4614   OptionalImmIndexMap OptionalIdx;
4615 
4616   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4617     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4618 
4619     // Add the register arguments
4620     if (Op.isReg()) {
4621       Op.addRegOperands(Inst, 1);
4622       continue;
4623     }
4624 
4625     // Handle optional arguments
4626     OptionalIdx[Op.getImmTy()] = i;
4627   }
4628 
4629   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4630   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4631   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4632 
4633   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4634 }
4635 
4636 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4637                                 bool IsGdsHardcoded) {
4638   OptionalImmIndexMap OptionalIdx;
4639 
4640   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4641     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4642 
4643     // Add the register arguments
4644     if (Op.isReg()) {
4645       Op.addRegOperands(Inst, 1);
4646       continue;
4647     }
4648 
4649     if (Op.isToken() && Op.getToken() == "gds") {
4650       IsGdsHardcoded = true;
4651       continue;
4652     }
4653 
4654     // Handle optional arguments
4655     OptionalIdx[Op.getImmTy()] = i;
4656   }
4657 
4658   AMDGPUOperand::ImmTy OffsetType =
4659     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4660      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4661      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4662                                                       AMDGPUOperand::ImmTyOffset;
4663 
4664   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4665 
4666   if (!IsGdsHardcoded) {
4667     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4668   }
4669   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4670 }
4671 
4672 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4673   OptionalImmIndexMap OptionalIdx;
4674 
4675   unsigned OperandIdx[4];
4676   unsigned EnMask = 0;
4677   int SrcIdx = 0;
4678 
4679   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4680     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4681 
4682     // Add the register arguments
4683     if (Op.isReg()) {
4684       assert(SrcIdx < 4);
4685       OperandIdx[SrcIdx] = Inst.size();
4686       Op.addRegOperands(Inst, 1);
4687       ++SrcIdx;
4688       continue;
4689     }
4690 
4691     if (Op.isOff()) {
4692       assert(SrcIdx < 4);
4693       OperandIdx[SrcIdx] = Inst.size();
4694       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4695       ++SrcIdx;
4696       continue;
4697     }
4698 
4699     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4700       Op.addImmOperands(Inst, 1);
4701       continue;
4702     }
4703 
4704     if (Op.isToken() && Op.getToken() == "done")
4705       continue;
4706 
4707     // Handle optional arguments
4708     OptionalIdx[Op.getImmTy()] = i;
4709   }
4710 
4711   assert(SrcIdx == 4);
4712 
4713   bool Compr = false;
4714   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4715     Compr = true;
4716     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4717     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4718     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4719   }
4720 
4721   for (auto i = 0; i < SrcIdx; ++i) {
4722     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4723       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4724     }
4725   }
4726 
4727   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4728   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4729 
4730   Inst.addOperand(MCOperand::createImm(EnMask));
4731 }
4732 
4733 //===----------------------------------------------------------------------===//
4734 // s_waitcnt
4735 //===----------------------------------------------------------------------===//
4736 
4737 static bool
4738 encodeCnt(
4739   const AMDGPU::IsaVersion ISA,
4740   int64_t &IntVal,
4741   int64_t CntVal,
4742   bool Saturate,
4743   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4744   unsigned (*decode)(const IsaVersion &Version, unsigned))
4745 {
4746   bool Failed = false;
4747 
4748   IntVal = encode(ISA, IntVal, CntVal);
4749   if (CntVal != decode(ISA, IntVal)) {
4750     if (Saturate) {
4751       IntVal = encode(ISA, IntVal, -1);
4752     } else {
4753       Failed = true;
4754     }
4755   }
4756   return Failed;
4757 }
4758 
4759 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4760 
4761   SMLoc CntLoc = getLoc();
4762   StringRef CntName = getTokenStr();
4763 
4764   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4765       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4766     return false;
4767 
4768   int64_t CntVal;
4769   SMLoc ValLoc = getLoc();
4770   if (!parseExpr(CntVal))
4771     return false;
4772 
4773   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4774 
4775   bool Failed = true;
4776   bool Sat = CntName.endswith("_sat");
4777 
4778   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4779     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4780   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4781     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4782   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4783     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4784   } else {
4785     Error(CntLoc, "invalid counter name " + CntName);
4786     return false;
4787   }
4788 
4789   if (Failed) {
4790     Error(ValLoc, "too large value for " + CntName);
4791     return false;
4792   }
4793 
4794   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4795     return false;
4796 
4797   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4798     if (isToken(AsmToken::EndOfStatement)) {
4799       Error(getLoc(), "expected a counter name");
4800       return false;
4801     }
4802   }
4803 
4804   return true;
4805 }
4806 
4807 OperandMatchResultTy
4808 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4809   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4810   int64_t Waitcnt = getWaitcntBitMask(ISA);
4811   SMLoc S = getLoc();
4812 
4813   // If parse failed, do not return error code
4814   // to avoid excessive error messages.
4815   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4816     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4817   } else {
4818     parseExpr(Waitcnt);
4819   }
4820 
4821   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4822   return MatchOperand_Success;
4823 }
4824 
4825 bool
4826 AMDGPUOperand::isSWaitCnt() const {
4827   return isImm();
4828 }
4829 
4830 //===----------------------------------------------------------------------===//
4831 // hwreg
4832 //===----------------------------------------------------------------------===//
4833 
4834 bool
4835 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4836                                 int64_t &Offset,
4837                                 int64_t &Width) {
4838   using namespace llvm::AMDGPU::Hwreg;
4839 
4840   // The register may be specified by name or using a numeric code
4841   if (isToken(AsmToken::Identifier) &&
4842       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4843     HwReg.IsSymbolic = true;
4844     lex(); // skip message name
4845   } else if (!parseExpr(HwReg.Id)) {
4846     return false;
4847   }
4848 
4849   if (trySkipToken(AsmToken::RParen))
4850     return true;
4851 
4852   // parse optional params
4853   return
4854     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
4855     parseExpr(Offset) &&
4856     skipToken(AsmToken::Comma, "expected a comma") &&
4857     parseExpr(Width) &&
4858     skipToken(AsmToken::RParen, "expected a closing parenthesis");
4859 }
4860 
4861 bool
4862 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
4863                                const int64_t Offset,
4864                                const int64_t Width,
4865                                const SMLoc Loc) {
4866 
4867   using namespace llvm::AMDGPU::Hwreg;
4868 
4869   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
4870     Error(Loc, "specified hardware register is not supported on this GPU");
4871     return false;
4872   } else if (!isValidHwreg(HwReg.Id)) {
4873     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
4874     return false;
4875   } else if (!isValidHwregOffset(Offset)) {
4876     Error(Loc, "invalid bit offset: only 5-bit values are legal");
4877     return false;
4878   } else if (!isValidHwregWidth(Width)) {
4879     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
4880     return false;
4881   }
4882   return true;
4883 }
4884 
4885 OperandMatchResultTy
4886 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4887   using namespace llvm::AMDGPU::Hwreg;
4888 
4889   int64_t ImmVal = 0;
4890   SMLoc Loc = getLoc();
4891 
4892   // If parse failed, do not return error code
4893   // to avoid excessive error messages.
4894   if (trySkipId("hwreg", AsmToken::LParen)) {
4895     OperandInfoTy HwReg(ID_UNKNOWN_);
4896     int64_t Offset = OFFSET_DEFAULT_;
4897     int64_t Width = WIDTH_DEFAULT_;
4898     if (parseHwregBody(HwReg, Offset, Width) &&
4899         validateHwreg(HwReg, Offset, Width, Loc)) {
4900       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
4901     }
4902   } else if (parseExpr(ImmVal)) {
4903     if (ImmVal < 0 || !isUInt<16>(ImmVal))
4904       Error(Loc, "invalid immediate: only 16-bit values are legal");
4905   }
4906 
4907   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
4908   return MatchOperand_Success;
4909 }
4910 
4911 bool AMDGPUOperand::isHwreg() const {
4912   return isImmTy(ImmTyHwreg);
4913 }
4914 
4915 //===----------------------------------------------------------------------===//
4916 // sendmsg
4917 //===----------------------------------------------------------------------===//
4918 
4919 bool
4920 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
4921                                   OperandInfoTy &Op,
4922                                   OperandInfoTy &Stream) {
4923   using namespace llvm::AMDGPU::SendMsg;
4924 
4925   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
4926     Msg.IsSymbolic = true;
4927     lex(); // skip message name
4928   } else if (!parseExpr(Msg.Id)) {
4929     return false;
4930   }
4931 
4932   if (trySkipToken(AsmToken::Comma)) {
4933     Op.IsDefined = true;
4934     if (isToken(AsmToken::Identifier) &&
4935         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
4936       lex(); // skip operation name
4937     } else if (!parseExpr(Op.Id)) {
4938       return false;
4939     }
4940 
4941     if (trySkipToken(AsmToken::Comma)) {
4942       Stream.IsDefined = true;
4943       if (!parseExpr(Stream.Id))
4944         return false;
4945     }
4946   }
4947 
4948   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
4949 }
4950 
4951 bool
4952 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
4953                                  const OperandInfoTy &Op,
4954                                  const OperandInfoTy &Stream,
4955                                  const SMLoc S) {
4956   using namespace llvm::AMDGPU::SendMsg;
4957 
4958   // Validation strictness depends on whether message is specified
4959   // in a symbolc or in a numeric form. In the latter case
4960   // only encoding possibility is checked.
4961   bool Strict = Msg.IsSymbolic;
4962 
4963   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
4964     Error(S, "invalid message id");
4965     return false;
4966   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
4967     Error(S, Op.IsDefined ?
4968              "message does not support operations" :
4969              "missing message operation");
4970     return false;
4971   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
4972     Error(S, "invalid operation id");
4973     return false;
4974   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
4975     Error(S, "message operation does not support streams");
4976     return false;
4977   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
4978     Error(S, "invalid message stream id");
4979     return false;
4980   }
4981   return true;
4982 }
4983 
4984 OperandMatchResultTy
4985 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4986   using namespace llvm::AMDGPU::SendMsg;
4987 
4988   int64_t ImmVal = 0;
4989   SMLoc Loc = getLoc();
4990 
4991   // If parse failed, do not return error code
4992   // to avoid excessive error messages.
4993   if (trySkipId("sendmsg", AsmToken::LParen)) {
4994     OperandInfoTy Msg(ID_UNKNOWN_);
4995     OperandInfoTy Op(OP_NONE_);
4996     OperandInfoTy Stream(STREAM_ID_NONE_);
4997     if (parseSendMsgBody(Msg, Op, Stream) &&
4998         validateSendMsg(Msg, Op, Stream, Loc)) {
4999       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5000     }
5001   } else if (parseExpr(ImmVal)) {
5002     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5003       Error(Loc, "invalid immediate: only 16-bit values are legal");
5004   }
5005 
5006   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5007   return MatchOperand_Success;
5008 }
5009 
5010 bool AMDGPUOperand::isSendMsg() const {
5011   return isImmTy(ImmTySendMsg);
5012 }
5013 
5014 //===----------------------------------------------------------------------===//
5015 // v_interp
5016 //===----------------------------------------------------------------------===//
5017 
5018 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5019   if (getLexer().getKind() != AsmToken::Identifier)
5020     return MatchOperand_NoMatch;
5021 
5022   StringRef Str = Parser.getTok().getString();
5023   int Slot = StringSwitch<int>(Str)
5024     .Case("p10", 0)
5025     .Case("p20", 1)
5026     .Case("p0", 2)
5027     .Default(-1);
5028 
5029   SMLoc S = Parser.getTok().getLoc();
5030   if (Slot == -1)
5031     return MatchOperand_ParseFail;
5032 
5033   Parser.Lex();
5034   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5035                                               AMDGPUOperand::ImmTyInterpSlot));
5036   return MatchOperand_Success;
5037 }
5038 
5039 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5040   if (getLexer().getKind() != AsmToken::Identifier)
5041     return MatchOperand_NoMatch;
5042 
5043   StringRef Str = Parser.getTok().getString();
5044   if (!Str.startswith("attr"))
5045     return MatchOperand_NoMatch;
5046 
5047   StringRef Chan = Str.take_back(2);
5048   int AttrChan = StringSwitch<int>(Chan)
5049     .Case(".x", 0)
5050     .Case(".y", 1)
5051     .Case(".z", 2)
5052     .Case(".w", 3)
5053     .Default(-1);
5054   if (AttrChan == -1)
5055     return MatchOperand_ParseFail;
5056 
5057   Str = Str.drop_back(2).drop_front(4);
5058 
5059   uint8_t Attr;
5060   if (Str.getAsInteger(10, Attr))
5061     return MatchOperand_ParseFail;
5062 
5063   SMLoc S = Parser.getTok().getLoc();
5064   Parser.Lex();
5065   if (Attr > 63) {
5066     Error(S, "out of bounds attr");
5067     return MatchOperand_Success;
5068   }
5069 
5070   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5071 
5072   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5073                                               AMDGPUOperand::ImmTyInterpAttr));
5074   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5075                                               AMDGPUOperand::ImmTyAttrChan));
5076   return MatchOperand_Success;
5077 }
5078 
5079 //===----------------------------------------------------------------------===//
5080 // exp
5081 //===----------------------------------------------------------------------===//
5082 
5083 void AMDGPUAsmParser::errorExpTgt() {
5084   Error(Parser.getTok().getLoc(), "invalid exp target");
5085 }
5086 
5087 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5088                                                       uint8_t &Val) {
5089   if (Str == "null") {
5090     Val = 9;
5091     return MatchOperand_Success;
5092   }
5093 
5094   if (Str.startswith("mrt")) {
5095     Str = Str.drop_front(3);
5096     if (Str == "z") { // == mrtz
5097       Val = 8;
5098       return MatchOperand_Success;
5099     }
5100 
5101     if (Str.getAsInteger(10, Val))
5102       return MatchOperand_ParseFail;
5103 
5104     if (Val > 7)
5105       errorExpTgt();
5106 
5107     return MatchOperand_Success;
5108   }
5109 
5110   if (Str.startswith("pos")) {
5111     Str = Str.drop_front(3);
5112     if (Str.getAsInteger(10, Val))
5113       return MatchOperand_ParseFail;
5114 
5115     if (Val > 4 || (Val == 4 && !isGFX10()))
5116       errorExpTgt();
5117 
5118     Val += 12;
5119     return MatchOperand_Success;
5120   }
5121 
5122   if (isGFX10() && Str == "prim") {
5123     Val = 20;
5124     return MatchOperand_Success;
5125   }
5126 
5127   if (Str.startswith("param")) {
5128     Str = Str.drop_front(5);
5129     if (Str.getAsInteger(10, Val))
5130       return MatchOperand_ParseFail;
5131 
5132     if (Val >= 32)
5133       errorExpTgt();
5134 
5135     Val += 32;
5136     return MatchOperand_Success;
5137   }
5138 
5139   if (Str.startswith("invalid_target_")) {
5140     Str = Str.drop_front(15);
5141     if (Str.getAsInteger(10, Val))
5142       return MatchOperand_ParseFail;
5143 
5144     errorExpTgt();
5145     return MatchOperand_Success;
5146   }
5147 
5148   return MatchOperand_NoMatch;
5149 }
5150 
5151 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5152   uint8_t Val;
5153   StringRef Str = Parser.getTok().getString();
5154 
5155   auto Res = parseExpTgtImpl(Str, Val);
5156   if (Res != MatchOperand_Success)
5157     return Res;
5158 
5159   SMLoc S = Parser.getTok().getLoc();
5160   Parser.Lex();
5161 
5162   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5163                                               AMDGPUOperand::ImmTyExpTgt));
5164   return MatchOperand_Success;
5165 }
5166 
5167 //===----------------------------------------------------------------------===//
5168 // parser helpers
5169 //===----------------------------------------------------------------------===//
5170 
5171 bool
5172 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5173   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5174 }
5175 
5176 bool
5177 AMDGPUAsmParser::isId(const StringRef Id) const {
5178   return isId(getToken(), Id);
5179 }
5180 
5181 bool
5182 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5183   return getTokenKind() == Kind;
5184 }
5185 
5186 bool
5187 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5188   if (isId(Id)) {
5189     lex();
5190     return true;
5191   }
5192   return false;
5193 }
5194 
5195 bool
5196 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5197   if (isId(Id) && peekToken().is(Kind)) {
5198     lex();
5199     lex();
5200     return true;
5201   }
5202   return false;
5203 }
5204 
5205 bool
5206 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5207   if (isToken(Kind)) {
5208     lex();
5209     return true;
5210   }
5211   return false;
5212 }
5213 
5214 bool
5215 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5216                            const StringRef ErrMsg) {
5217   if (!trySkipToken(Kind)) {
5218     Error(getLoc(), ErrMsg);
5219     return false;
5220   }
5221   return true;
5222 }
5223 
5224 bool
5225 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5226   return !getParser().parseAbsoluteExpression(Imm);
5227 }
5228 
5229 bool
5230 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5231   SMLoc S = getLoc();
5232 
5233   const MCExpr *Expr;
5234   if (Parser.parseExpression(Expr))
5235     return false;
5236 
5237   int64_t IntVal;
5238   if (Expr->evaluateAsAbsolute(IntVal)) {
5239     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5240   } else {
5241     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5242   }
5243   return true;
5244 }
5245 
5246 bool
5247 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5248   if (isToken(AsmToken::String)) {
5249     Val = getToken().getStringContents();
5250     lex();
5251     return true;
5252   } else {
5253     Error(getLoc(), ErrMsg);
5254     return false;
5255   }
5256 }
5257 
5258 AsmToken
5259 AMDGPUAsmParser::getToken() const {
5260   return Parser.getTok();
5261 }
5262 
5263 AsmToken
5264 AMDGPUAsmParser::peekToken() {
5265   return getLexer().peekTok();
5266 }
5267 
5268 void
5269 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5270   auto TokCount = getLexer().peekTokens(Tokens);
5271 
5272   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5273     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5274 }
5275 
5276 AsmToken::TokenKind
5277 AMDGPUAsmParser::getTokenKind() const {
5278   return getLexer().getKind();
5279 }
5280 
5281 SMLoc
5282 AMDGPUAsmParser::getLoc() const {
5283   return getToken().getLoc();
5284 }
5285 
5286 StringRef
5287 AMDGPUAsmParser::getTokenStr() const {
5288   return getToken().getString();
5289 }
5290 
5291 void
5292 AMDGPUAsmParser::lex() {
5293   Parser.Lex();
5294 }
5295 
5296 //===----------------------------------------------------------------------===//
5297 // swizzle
5298 //===----------------------------------------------------------------------===//
5299 
5300 LLVM_READNONE
5301 static unsigned
5302 encodeBitmaskPerm(const unsigned AndMask,
5303                   const unsigned OrMask,
5304                   const unsigned XorMask) {
5305   using namespace llvm::AMDGPU::Swizzle;
5306 
5307   return BITMASK_PERM_ENC |
5308          (AndMask << BITMASK_AND_SHIFT) |
5309          (OrMask  << BITMASK_OR_SHIFT)  |
5310          (XorMask << BITMASK_XOR_SHIFT);
5311 }
5312 
5313 bool
5314 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5315                                       const unsigned MinVal,
5316                                       const unsigned MaxVal,
5317                                       const StringRef ErrMsg) {
5318   for (unsigned i = 0; i < OpNum; ++i) {
5319     if (!skipToken(AsmToken::Comma, "expected a comma")){
5320       return false;
5321     }
5322     SMLoc ExprLoc = Parser.getTok().getLoc();
5323     if (!parseExpr(Op[i])) {
5324       return false;
5325     }
5326     if (Op[i] < MinVal || Op[i] > MaxVal) {
5327       Error(ExprLoc, ErrMsg);
5328       return false;
5329     }
5330   }
5331 
5332   return true;
5333 }
5334 
5335 bool
5336 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5337   using namespace llvm::AMDGPU::Swizzle;
5338 
5339   int64_t Lane[LANE_NUM];
5340   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5341                            "expected a 2-bit lane id")) {
5342     Imm = QUAD_PERM_ENC;
5343     for (unsigned I = 0; I < LANE_NUM; ++I) {
5344       Imm |= Lane[I] << (LANE_SHIFT * I);
5345     }
5346     return true;
5347   }
5348   return false;
5349 }
5350 
5351 bool
5352 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5353   using namespace llvm::AMDGPU::Swizzle;
5354 
5355   SMLoc S = Parser.getTok().getLoc();
5356   int64_t GroupSize;
5357   int64_t LaneIdx;
5358 
5359   if (!parseSwizzleOperands(1, &GroupSize,
5360                             2, 32,
5361                             "group size must be in the interval [2,32]")) {
5362     return false;
5363   }
5364   if (!isPowerOf2_64(GroupSize)) {
5365     Error(S, "group size must be a power of two");
5366     return false;
5367   }
5368   if (parseSwizzleOperands(1, &LaneIdx,
5369                            0, GroupSize - 1,
5370                            "lane id must be in the interval [0,group size - 1]")) {
5371     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5372     return true;
5373   }
5374   return false;
5375 }
5376 
5377 bool
5378 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5379   using namespace llvm::AMDGPU::Swizzle;
5380 
5381   SMLoc S = Parser.getTok().getLoc();
5382   int64_t GroupSize;
5383 
5384   if (!parseSwizzleOperands(1, &GroupSize,
5385       2, 32, "group size must be in the interval [2,32]")) {
5386     return false;
5387   }
5388   if (!isPowerOf2_64(GroupSize)) {
5389     Error(S, "group size must be a power of two");
5390     return false;
5391   }
5392 
5393   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5394   return true;
5395 }
5396 
5397 bool
5398 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5399   using namespace llvm::AMDGPU::Swizzle;
5400 
5401   SMLoc S = Parser.getTok().getLoc();
5402   int64_t GroupSize;
5403 
5404   if (!parseSwizzleOperands(1, &GroupSize,
5405       1, 16, "group size must be in the interval [1,16]")) {
5406     return false;
5407   }
5408   if (!isPowerOf2_64(GroupSize)) {
5409     Error(S, "group size must be a power of two");
5410     return false;
5411   }
5412 
5413   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5414   return true;
5415 }
5416 
5417 bool
5418 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5419   using namespace llvm::AMDGPU::Swizzle;
5420 
5421   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5422     return false;
5423   }
5424 
5425   StringRef Ctl;
5426   SMLoc StrLoc = Parser.getTok().getLoc();
5427   if (!parseString(Ctl)) {
5428     return false;
5429   }
5430   if (Ctl.size() != BITMASK_WIDTH) {
5431     Error(StrLoc, "expected a 5-character mask");
5432     return false;
5433   }
5434 
5435   unsigned AndMask = 0;
5436   unsigned OrMask = 0;
5437   unsigned XorMask = 0;
5438 
5439   for (size_t i = 0; i < Ctl.size(); ++i) {
5440     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5441     switch(Ctl[i]) {
5442     default:
5443       Error(StrLoc, "invalid mask");
5444       return false;
5445     case '0':
5446       break;
5447     case '1':
5448       OrMask |= Mask;
5449       break;
5450     case 'p':
5451       AndMask |= Mask;
5452       break;
5453     case 'i':
5454       AndMask |= Mask;
5455       XorMask |= Mask;
5456       break;
5457     }
5458   }
5459 
5460   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5461   return true;
5462 }
5463 
5464 bool
5465 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5466 
5467   SMLoc OffsetLoc = Parser.getTok().getLoc();
5468 
5469   if (!parseExpr(Imm)) {
5470     return false;
5471   }
5472   if (!isUInt<16>(Imm)) {
5473     Error(OffsetLoc, "expected a 16-bit offset");
5474     return false;
5475   }
5476   return true;
5477 }
5478 
5479 bool
5480 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5481   using namespace llvm::AMDGPU::Swizzle;
5482 
5483   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5484 
5485     SMLoc ModeLoc = Parser.getTok().getLoc();
5486     bool Ok = false;
5487 
5488     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5489       Ok = parseSwizzleQuadPerm(Imm);
5490     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5491       Ok = parseSwizzleBitmaskPerm(Imm);
5492     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5493       Ok = parseSwizzleBroadcast(Imm);
5494     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5495       Ok = parseSwizzleSwap(Imm);
5496     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5497       Ok = parseSwizzleReverse(Imm);
5498     } else {
5499       Error(ModeLoc, "expected a swizzle mode");
5500     }
5501 
5502     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5503   }
5504 
5505   return false;
5506 }
5507 
5508 OperandMatchResultTy
5509 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5510   SMLoc S = Parser.getTok().getLoc();
5511   int64_t Imm = 0;
5512 
5513   if (trySkipId("offset")) {
5514 
5515     bool Ok = false;
5516     if (skipToken(AsmToken::Colon, "expected a colon")) {
5517       if (trySkipId("swizzle")) {
5518         Ok = parseSwizzleMacro(Imm);
5519       } else {
5520         Ok = parseSwizzleOffset(Imm);
5521       }
5522     }
5523 
5524     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5525 
5526     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5527   } else {
5528     // Swizzle "offset" operand is optional.
5529     // If it is omitted, try parsing other optional operands.
5530     return parseOptionalOpr(Operands);
5531   }
5532 }
5533 
5534 bool
5535 AMDGPUOperand::isSwizzle() const {
5536   return isImmTy(ImmTySwizzle);
5537 }
5538 
5539 //===----------------------------------------------------------------------===//
5540 // VGPR Index Mode
5541 //===----------------------------------------------------------------------===//
5542 
5543 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5544 
5545   using namespace llvm::AMDGPU::VGPRIndexMode;
5546 
5547   if (trySkipToken(AsmToken::RParen)) {
5548     return OFF;
5549   }
5550 
5551   int64_t Imm = 0;
5552 
5553   while (true) {
5554     unsigned Mode = 0;
5555     SMLoc S = Parser.getTok().getLoc();
5556 
5557     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5558       if (trySkipId(IdSymbolic[ModeId])) {
5559         Mode = 1 << ModeId;
5560         break;
5561       }
5562     }
5563 
5564     if (Mode == 0) {
5565       Error(S, (Imm == 0)?
5566                "expected a VGPR index mode or a closing parenthesis" :
5567                "expected a VGPR index mode");
5568       break;
5569     }
5570 
5571     if (Imm & Mode) {
5572       Error(S, "duplicate VGPR index mode");
5573       break;
5574     }
5575     Imm |= Mode;
5576 
5577     if (trySkipToken(AsmToken::RParen))
5578       break;
5579     if (!skipToken(AsmToken::Comma,
5580                    "expected a comma or a closing parenthesis"))
5581       break;
5582   }
5583 
5584   return Imm;
5585 }
5586 
5587 OperandMatchResultTy
5588 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5589 
5590   int64_t Imm = 0;
5591   SMLoc S = Parser.getTok().getLoc();
5592 
5593   if (getLexer().getKind() == AsmToken::Identifier &&
5594       Parser.getTok().getString() == "gpr_idx" &&
5595       getLexer().peekTok().is(AsmToken::LParen)) {
5596 
5597     Parser.Lex();
5598     Parser.Lex();
5599 
5600     // If parse failed, trigger an error but do not return error code
5601     // to avoid excessive error messages.
5602     Imm = parseGPRIdxMacro();
5603 
5604   } else {
5605     if (getParser().parseAbsoluteExpression(Imm))
5606       return MatchOperand_NoMatch;
5607     if (Imm < 0 || !isUInt<4>(Imm)) {
5608       Error(S, "invalid immediate: only 4-bit values are legal");
5609     }
5610   }
5611 
5612   Operands.push_back(
5613       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5614   return MatchOperand_Success;
5615 }
5616 
5617 bool AMDGPUOperand::isGPRIdxMode() const {
5618   return isImmTy(ImmTyGprIdxMode);
5619 }
5620 
5621 //===----------------------------------------------------------------------===//
5622 // sopp branch targets
5623 //===----------------------------------------------------------------------===//
5624 
5625 OperandMatchResultTy
5626 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5627 
5628   // Make sure we are not parsing something
5629   // that looks like a label or an expression but is not.
5630   // This will improve error messages.
5631   if (isRegister() || isModifier())
5632     return MatchOperand_NoMatch;
5633 
5634   if (parseExpr(Operands)) {
5635 
5636     AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5637     assert(Opr.isImm() || Opr.isExpr());
5638     SMLoc Loc = Opr.getStartLoc();
5639 
5640     // Currently we do not support arbitrary expressions as branch targets.
5641     // Only labels and absolute expressions are accepted.
5642     if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5643       Error(Loc, "expected an absolute expression or a label");
5644     } else if (Opr.isImm() && !Opr.isS16Imm()) {
5645       Error(Loc, "expected a 16-bit signed jump offset");
5646     }
5647   }
5648 
5649   return MatchOperand_Success; // avoid excessive error messages
5650 }
5651 
5652 //===----------------------------------------------------------------------===//
5653 // Boolean holding registers
5654 //===----------------------------------------------------------------------===//
5655 
5656 OperandMatchResultTy
5657 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5658   return parseReg(Operands);
5659 }
5660 
5661 //===----------------------------------------------------------------------===//
5662 // mubuf
5663 //===----------------------------------------------------------------------===//
5664 
5665 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5666   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5667 }
5668 
5669 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5670   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5671 }
5672 
5673 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5674   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5675 }
5676 
5677 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5678                                const OperandVector &Operands,
5679                                bool IsAtomic,
5680                                bool IsAtomicReturn,
5681                                bool IsLds) {
5682   bool IsLdsOpcode = IsLds;
5683   bool HasLdsModifier = false;
5684   OptionalImmIndexMap OptionalIdx;
5685   assert(IsAtomicReturn ? IsAtomic : true);
5686   unsigned FirstOperandIdx = 1;
5687 
5688   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5689     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5690 
5691     // Add the register arguments
5692     if (Op.isReg()) {
5693       Op.addRegOperands(Inst, 1);
5694       // Insert a tied src for atomic return dst.
5695       // This cannot be postponed as subsequent calls to
5696       // addImmOperands rely on correct number of MC operands.
5697       if (IsAtomicReturn && i == FirstOperandIdx)
5698         Op.addRegOperands(Inst, 1);
5699       continue;
5700     }
5701 
5702     // Handle the case where soffset is an immediate
5703     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5704       Op.addImmOperands(Inst, 1);
5705       continue;
5706     }
5707 
5708     HasLdsModifier |= Op.isLDS();
5709 
5710     // Handle tokens like 'offen' which are sometimes hard-coded into the
5711     // asm string.  There are no MCInst operands for these.
5712     if (Op.isToken()) {
5713       continue;
5714     }
5715     assert(Op.isImm());
5716 
5717     // Handle optional arguments
5718     OptionalIdx[Op.getImmTy()] = i;
5719   }
5720 
5721   // This is a workaround for an llvm quirk which may result in an
5722   // incorrect instruction selection. Lds and non-lds versions of
5723   // MUBUF instructions are identical except that lds versions
5724   // have mandatory 'lds' modifier. However this modifier follows
5725   // optional modifiers and llvm asm matcher regards this 'lds'
5726   // modifier as an optional one. As a result, an lds version
5727   // of opcode may be selected even if it has no 'lds' modifier.
5728   if (IsLdsOpcode && !HasLdsModifier) {
5729     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5730     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5731       Inst.setOpcode(NoLdsOpcode);
5732       IsLdsOpcode = false;
5733     }
5734   }
5735 
5736   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5737   if (!IsAtomic) { // glc is hard-coded.
5738     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5739   }
5740   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5741 
5742   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5743     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5744   }
5745 
5746   if (isGFX10())
5747     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5748 }
5749 
5750 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5751   OptionalImmIndexMap OptionalIdx;
5752 
5753   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5754     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5755 
5756     // Add the register arguments
5757     if (Op.isReg()) {
5758       Op.addRegOperands(Inst, 1);
5759       continue;
5760     }
5761 
5762     // Handle the case where soffset is an immediate
5763     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5764       Op.addImmOperands(Inst, 1);
5765       continue;
5766     }
5767 
5768     // Handle tokens like 'offen' which are sometimes hard-coded into the
5769     // asm string.  There are no MCInst operands for these.
5770     if (Op.isToken()) {
5771       continue;
5772     }
5773     assert(Op.isImm());
5774 
5775     // Handle optional arguments
5776     OptionalIdx[Op.getImmTy()] = i;
5777   }
5778 
5779   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5780                         AMDGPUOperand::ImmTyOffset);
5781   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5782   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5783   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5784   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5785 
5786   if (isGFX10())
5787     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5788 }
5789 
5790 //===----------------------------------------------------------------------===//
5791 // mimg
5792 //===----------------------------------------------------------------------===//
5793 
5794 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5795                               bool IsAtomic) {
5796   unsigned I = 1;
5797   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5798   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5799     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5800   }
5801 
5802   if (IsAtomic) {
5803     // Add src, same as dst
5804     assert(Desc.getNumDefs() == 1);
5805     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5806   }
5807 
5808   OptionalImmIndexMap OptionalIdx;
5809 
5810   for (unsigned E = Operands.size(); I != E; ++I) {
5811     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5812 
5813     // Add the register arguments
5814     if (Op.isReg()) {
5815       Op.addRegOperands(Inst, 1);
5816     } else if (Op.isImmModifier()) {
5817       OptionalIdx[Op.getImmTy()] = I;
5818     } else if (!Op.isToken()) {
5819       llvm_unreachable("unexpected operand type");
5820     }
5821   }
5822 
5823   bool IsGFX10 = isGFX10();
5824 
5825   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5826   if (IsGFX10)
5827     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5828   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5829   if (IsGFX10)
5830     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5831   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5832   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5833   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5834   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5835   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5836   if (!IsGFX10)
5837     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5838   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5839 }
5840 
5841 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5842   cvtMIMG(Inst, Operands, true);
5843 }
5844 
5845 //===----------------------------------------------------------------------===//
5846 // smrd
5847 //===----------------------------------------------------------------------===//
5848 
5849 bool AMDGPUOperand::isSMRDOffset8() const {
5850   return isImm() && isUInt<8>(getImm());
5851 }
5852 
5853 bool AMDGPUOperand::isSMRDOffset20() const {
5854   return isImm() && isUInt<20>(getImm());
5855 }
5856 
5857 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5858   // 32-bit literals are only supported on CI and we only want to use them
5859   // when the offset is > 8-bits.
5860   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5861 }
5862 
5863 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5864   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5865 }
5866 
5867 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5868   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5869 }
5870 
5871 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5872   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5873 }
5874 
5875 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
5876   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5877 }
5878 
5879 //===----------------------------------------------------------------------===//
5880 // vop3
5881 //===----------------------------------------------------------------------===//
5882 
5883 static bool ConvertOmodMul(int64_t &Mul) {
5884   if (Mul != 1 && Mul != 2 && Mul != 4)
5885     return false;
5886 
5887   Mul >>= 1;
5888   return true;
5889 }
5890 
5891 static bool ConvertOmodDiv(int64_t &Div) {
5892   if (Div == 1) {
5893     Div = 0;
5894     return true;
5895   }
5896 
5897   if (Div == 2) {
5898     Div = 3;
5899     return true;
5900   }
5901 
5902   return false;
5903 }
5904 
5905 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5906   if (BoundCtrl == 0) {
5907     BoundCtrl = 1;
5908     return true;
5909   }
5910 
5911   if (BoundCtrl == -1) {
5912     BoundCtrl = 0;
5913     return true;
5914   }
5915 
5916   return false;
5917 }
5918 
5919 // Note: the order in this table matches the order of operands in AsmString.
5920 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5921   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5922   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5923   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5924   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5925   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5926   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5927   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5928   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5929   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5930   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
5931   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5932   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5933   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5934   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5935   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5936   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5937   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5938   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5939   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5940   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5941   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5942   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5943   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5944   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5945   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5946   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
5947   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5948   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5949   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5950   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
5951   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5952   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5953   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5954   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5955   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5956   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5957   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5958   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5959   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5960   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
5961   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
5962   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
5963   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
5964 };
5965 
5966 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5967   unsigned size = Operands.size();
5968   assert(size > 0);
5969 
5970   OperandMatchResultTy res = parseOptionalOpr(Operands);
5971 
5972   // This is a hack to enable hardcoded mandatory operands which follow
5973   // optional operands.
5974   //
5975   // Current design assumes that all operands after the first optional operand
5976   // are also optional. However implementation of some instructions violates
5977   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5978   //
5979   // To alleviate this problem, we have to (implicitly) parse extra operands
5980   // to make sure autogenerated parser of custom operands never hit hardcoded
5981   // mandatory operands.
5982 
5983   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5984 
5985     // We have parsed the first optional operand.
5986     // Parse as many operands as necessary to skip all mandatory operands.
5987 
5988     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5989       if (res != MatchOperand_Success ||
5990           getLexer().is(AsmToken::EndOfStatement)) break;
5991       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5992       res = parseOptionalOpr(Operands);
5993     }
5994   }
5995 
5996   return res;
5997 }
5998 
5999 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6000   OperandMatchResultTy res;
6001   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6002     // try to parse any optional operand here
6003     if (Op.IsBit) {
6004       res = parseNamedBit(Op.Name, Operands, Op.Type);
6005     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6006       res = parseOModOperand(Operands);
6007     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6008                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6009                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6010       res = parseSDWASel(Operands, Op.Name, Op.Type);
6011     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6012       res = parseSDWADstUnused(Operands);
6013     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6014                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6015                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6016                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6017       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6018                                         Op.ConvertResult);
6019     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6020       res = parseDim(Operands);
6021     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6022       res = parseDfmtNfmt(Operands);
6023     } else {
6024       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6025     }
6026     if (res != MatchOperand_NoMatch) {
6027       return res;
6028     }
6029   }
6030   return MatchOperand_NoMatch;
6031 }
6032 
6033 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6034   StringRef Name = Parser.getTok().getString();
6035   if (Name == "mul") {
6036     return parseIntWithPrefix("mul", Operands,
6037                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6038   }
6039 
6040   if (Name == "div") {
6041     return parseIntWithPrefix("div", Operands,
6042                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6043   }
6044 
6045   return MatchOperand_NoMatch;
6046 }
6047 
6048 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6049   cvtVOP3P(Inst, Operands);
6050 
6051   int Opc = Inst.getOpcode();
6052 
6053   int SrcNum;
6054   const int Ops[] = { AMDGPU::OpName::src0,
6055                       AMDGPU::OpName::src1,
6056                       AMDGPU::OpName::src2 };
6057   for (SrcNum = 0;
6058        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6059        ++SrcNum);
6060   assert(SrcNum > 0);
6061 
6062   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6063   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6064 
6065   if ((OpSel & (1 << SrcNum)) != 0) {
6066     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6067     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6068     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6069   }
6070 }
6071 
6072 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6073       // 1. This operand is input modifiers
6074   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6075       // 2. This is not last operand
6076       && Desc.NumOperands > (OpNum + 1)
6077       // 3. Next operand is register class
6078       && Desc.OpInfo[OpNum + 1].RegClass != -1
6079       // 4. Next register is not tied to any other operand
6080       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6081 }
6082 
6083 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6084 {
6085   OptionalImmIndexMap OptionalIdx;
6086   unsigned Opc = Inst.getOpcode();
6087 
6088   unsigned I = 1;
6089   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6090   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6091     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6092   }
6093 
6094   for (unsigned E = Operands.size(); I != E; ++I) {
6095     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6096     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6097       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6098     } else if (Op.isInterpSlot() ||
6099                Op.isInterpAttr() ||
6100                Op.isAttrChan()) {
6101       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6102     } else if (Op.isImmModifier()) {
6103       OptionalIdx[Op.getImmTy()] = I;
6104     } else {
6105       llvm_unreachable("unhandled operand type");
6106     }
6107   }
6108 
6109   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6110     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6111   }
6112 
6113   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6114     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6115   }
6116 
6117   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6118     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6119   }
6120 }
6121 
6122 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6123                               OptionalImmIndexMap &OptionalIdx) {
6124   unsigned Opc = Inst.getOpcode();
6125 
6126   unsigned I = 1;
6127   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6128   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6129     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6130   }
6131 
6132   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6133     // This instruction has src modifiers
6134     for (unsigned E = Operands.size(); I != E; ++I) {
6135       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6136       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6137         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6138       } else if (Op.isImmModifier()) {
6139         OptionalIdx[Op.getImmTy()] = I;
6140       } else if (Op.isRegOrImm()) {
6141         Op.addRegOrImmOperands(Inst, 1);
6142       } else {
6143         llvm_unreachable("unhandled operand type");
6144       }
6145     }
6146   } else {
6147     // No src modifiers
6148     for (unsigned E = Operands.size(); I != E; ++I) {
6149       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6150       if (Op.isMod()) {
6151         OptionalIdx[Op.getImmTy()] = I;
6152       } else {
6153         Op.addRegOrImmOperands(Inst, 1);
6154       }
6155     }
6156   }
6157 
6158   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6159     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6160   }
6161 
6162   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6163     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6164   }
6165 
6166   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6167   // it has src2 register operand that is tied to dst operand
6168   // we don't allow modifiers for this operand in assembler so src2_modifiers
6169   // should be 0.
6170   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6171       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6172       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6173       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6174       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6175       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6176       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6177     auto it = Inst.begin();
6178     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6179     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6180     ++it;
6181     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6182   }
6183 }
6184 
6185 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6186   OptionalImmIndexMap OptionalIdx;
6187   cvtVOP3(Inst, Operands, OptionalIdx);
6188 }
6189 
6190 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6191                                const OperandVector &Operands) {
6192   OptionalImmIndexMap OptIdx;
6193   const int Opc = Inst.getOpcode();
6194   const MCInstrDesc &Desc = MII.get(Opc);
6195 
6196   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6197 
6198   cvtVOP3(Inst, Operands, OptIdx);
6199 
6200   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6201     assert(!IsPacked);
6202     Inst.addOperand(Inst.getOperand(0));
6203   }
6204 
6205   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6206   // instruction, and then figure out where to actually put the modifiers
6207 
6208   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6209 
6210   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6211   if (OpSelHiIdx != -1) {
6212     int DefaultVal = IsPacked ? -1 : 0;
6213     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6214                           DefaultVal);
6215   }
6216 
6217   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6218   if (NegLoIdx != -1) {
6219     assert(IsPacked);
6220     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6221     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6222   }
6223 
6224   const int Ops[] = { AMDGPU::OpName::src0,
6225                       AMDGPU::OpName::src1,
6226                       AMDGPU::OpName::src2 };
6227   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6228                          AMDGPU::OpName::src1_modifiers,
6229                          AMDGPU::OpName::src2_modifiers };
6230 
6231   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6232 
6233   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6234   unsigned OpSelHi = 0;
6235   unsigned NegLo = 0;
6236   unsigned NegHi = 0;
6237 
6238   if (OpSelHiIdx != -1) {
6239     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6240   }
6241 
6242   if (NegLoIdx != -1) {
6243     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6244     NegLo = Inst.getOperand(NegLoIdx).getImm();
6245     NegHi = Inst.getOperand(NegHiIdx).getImm();
6246   }
6247 
6248   for (int J = 0; J < 3; ++J) {
6249     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6250     if (OpIdx == -1)
6251       break;
6252 
6253     uint32_t ModVal = 0;
6254 
6255     if ((OpSel & (1 << J)) != 0)
6256       ModVal |= SISrcMods::OP_SEL_0;
6257 
6258     if ((OpSelHi & (1 << J)) != 0)
6259       ModVal |= SISrcMods::OP_SEL_1;
6260 
6261     if ((NegLo & (1 << J)) != 0)
6262       ModVal |= SISrcMods::NEG;
6263 
6264     if ((NegHi & (1 << J)) != 0)
6265       ModVal |= SISrcMods::NEG_HI;
6266 
6267     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6268 
6269     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6270   }
6271 }
6272 
6273 //===----------------------------------------------------------------------===//
6274 // dpp
6275 //===----------------------------------------------------------------------===//
6276 
6277 bool AMDGPUOperand::isDPP8() const {
6278   return isImmTy(ImmTyDPP8);
6279 }
6280 
6281 bool AMDGPUOperand::isDPPCtrl() const {
6282   using namespace AMDGPU::DPP;
6283 
6284   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6285   if (result) {
6286     int64_t Imm = getImm();
6287     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6288            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6289            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6290            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6291            (Imm == DppCtrl::WAVE_SHL1) ||
6292            (Imm == DppCtrl::WAVE_ROL1) ||
6293            (Imm == DppCtrl::WAVE_SHR1) ||
6294            (Imm == DppCtrl::WAVE_ROR1) ||
6295            (Imm == DppCtrl::ROW_MIRROR) ||
6296            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6297            (Imm == DppCtrl::BCAST15) ||
6298            (Imm == DppCtrl::BCAST31) ||
6299            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6300            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6301   }
6302   return false;
6303 }
6304 
6305 //===----------------------------------------------------------------------===//
6306 // mAI
6307 //===----------------------------------------------------------------------===//
6308 
6309 bool AMDGPUOperand::isBLGP() const {
6310   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6311 }
6312 
6313 bool AMDGPUOperand::isCBSZ() const {
6314   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6315 }
6316 
6317 bool AMDGPUOperand::isABID() const {
6318   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6319 }
6320 
6321 bool AMDGPUOperand::isS16Imm() const {
6322   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6323 }
6324 
6325 bool AMDGPUOperand::isU16Imm() const {
6326   return isImm() && isUInt<16>(getImm());
6327 }
6328 
6329 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6330   if (!isGFX10())
6331     return MatchOperand_NoMatch;
6332 
6333   SMLoc S = Parser.getTok().getLoc();
6334 
6335   if (getLexer().isNot(AsmToken::Identifier))
6336     return MatchOperand_NoMatch;
6337   if (getLexer().getTok().getString() != "dim")
6338     return MatchOperand_NoMatch;
6339 
6340   Parser.Lex();
6341   if (getLexer().isNot(AsmToken::Colon))
6342     return MatchOperand_ParseFail;
6343 
6344   Parser.Lex();
6345 
6346   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6347   // integer.
6348   std::string Token;
6349   if (getLexer().is(AsmToken::Integer)) {
6350     SMLoc Loc = getLexer().getTok().getEndLoc();
6351     Token = getLexer().getTok().getString();
6352     Parser.Lex();
6353     if (getLexer().getTok().getLoc() != Loc)
6354       return MatchOperand_ParseFail;
6355   }
6356   if (getLexer().isNot(AsmToken::Identifier))
6357     return MatchOperand_ParseFail;
6358   Token += getLexer().getTok().getString();
6359 
6360   StringRef DimId = Token;
6361   if (DimId.startswith("SQ_RSRC_IMG_"))
6362     DimId = DimId.substr(12);
6363 
6364   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6365   if (!DimInfo)
6366     return MatchOperand_ParseFail;
6367 
6368   Parser.Lex();
6369 
6370   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6371                                               AMDGPUOperand::ImmTyDim));
6372   return MatchOperand_Success;
6373 }
6374 
6375 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6376   SMLoc S = Parser.getTok().getLoc();
6377   StringRef Prefix;
6378 
6379   if (getLexer().getKind() == AsmToken::Identifier) {
6380     Prefix = Parser.getTok().getString();
6381   } else {
6382     return MatchOperand_NoMatch;
6383   }
6384 
6385   if (Prefix != "dpp8")
6386     return parseDPPCtrl(Operands);
6387   if (!isGFX10())
6388     return MatchOperand_NoMatch;
6389 
6390   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6391 
6392   int64_t Sels[8];
6393 
6394   Parser.Lex();
6395   if (getLexer().isNot(AsmToken::Colon))
6396     return MatchOperand_ParseFail;
6397 
6398   Parser.Lex();
6399   if (getLexer().isNot(AsmToken::LBrac))
6400     return MatchOperand_ParseFail;
6401 
6402   Parser.Lex();
6403   if (getParser().parseAbsoluteExpression(Sels[0]))
6404     return MatchOperand_ParseFail;
6405   if (0 > Sels[0] || 7 < Sels[0])
6406     return MatchOperand_ParseFail;
6407 
6408   for (size_t i = 1; i < 8; ++i) {
6409     if (getLexer().isNot(AsmToken::Comma))
6410       return MatchOperand_ParseFail;
6411 
6412     Parser.Lex();
6413     if (getParser().parseAbsoluteExpression(Sels[i]))
6414       return MatchOperand_ParseFail;
6415     if (0 > Sels[i] || 7 < Sels[i])
6416       return MatchOperand_ParseFail;
6417   }
6418 
6419   if (getLexer().isNot(AsmToken::RBrac))
6420     return MatchOperand_ParseFail;
6421   Parser.Lex();
6422 
6423   unsigned DPP8 = 0;
6424   for (size_t i = 0; i < 8; ++i)
6425     DPP8 |= (Sels[i] << (i * 3));
6426 
6427   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6428   return MatchOperand_Success;
6429 }
6430 
6431 OperandMatchResultTy
6432 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6433   using namespace AMDGPU::DPP;
6434 
6435   SMLoc S = Parser.getTok().getLoc();
6436   StringRef Prefix;
6437   int64_t Int;
6438 
6439   if (getLexer().getKind() == AsmToken::Identifier) {
6440     Prefix = Parser.getTok().getString();
6441   } else {
6442     return MatchOperand_NoMatch;
6443   }
6444 
6445   if (Prefix == "row_mirror") {
6446     Int = DppCtrl::ROW_MIRROR;
6447     Parser.Lex();
6448   } else if (Prefix == "row_half_mirror") {
6449     Int = DppCtrl::ROW_HALF_MIRROR;
6450     Parser.Lex();
6451   } else {
6452     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6453     if (Prefix != "quad_perm"
6454         && Prefix != "row_shl"
6455         && Prefix != "row_shr"
6456         && Prefix != "row_ror"
6457         && Prefix != "wave_shl"
6458         && Prefix != "wave_rol"
6459         && Prefix != "wave_shr"
6460         && Prefix != "wave_ror"
6461         && Prefix != "row_bcast"
6462         && Prefix != "row_share"
6463         && Prefix != "row_xmask") {
6464       return MatchOperand_NoMatch;
6465     }
6466 
6467     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6468       return MatchOperand_NoMatch;
6469 
6470     if (!isVI() && !isGFX9() &&
6471         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6472          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6473          Prefix == "row_bcast"))
6474       return MatchOperand_NoMatch;
6475 
6476     Parser.Lex();
6477     if (getLexer().isNot(AsmToken::Colon))
6478       return MatchOperand_ParseFail;
6479 
6480     if (Prefix == "quad_perm") {
6481       // quad_perm:[%d,%d,%d,%d]
6482       Parser.Lex();
6483       if (getLexer().isNot(AsmToken::LBrac))
6484         return MatchOperand_ParseFail;
6485       Parser.Lex();
6486 
6487       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6488         return MatchOperand_ParseFail;
6489 
6490       for (int i = 0; i < 3; ++i) {
6491         if (getLexer().isNot(AsmToken::Comma))
6492           return MatchOperand_ParseFail;
6493         Parser.Lex();
6494 
6495         int64_t Temp;
6496         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6497           return MatchOperand_ParseFail;
6498         const int shift = i*2 + 2;
6499         Int += (Temp << shift);
6500       }
6501 
6502       if (getLexer().isNot(AsmToken::RBrac))
6503         return MatchOperand_ParseFail;
6504       Parser.Lex();
6505     } else {
6506       // sel:%d
6507       Parser.Lex();
6508       if (getParser().parseAbsoluteExpression(Int))
6509         return MatchOperand_ParseFail;
6510 
6511       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6512         Int |= DppCtrl::ROW_SHL0;
6513       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6514         Int |= DppCtrl::ROW_SHR0;
6515       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6516         Int |= DppCtrl::ROW_ROR0;
6517       } else if (Prefix == "wave_shl" && 1 == Int) {
6518         Int = DppCtrl::WAVE_SHL1;
6519       } else if (Prefix == "wave_rol" && 1 == Int) {
6520         Int = DppCtrl::WAVE_ROL1;
6521       } else if (Prefix == "wave_shr" && 1 == Int) {
6522         Int = DppCtrl::WAVE_SHR1;
6523       } else if (Prefix == "wave_ror" && 1 == Int) {
6524         Int = DppCtrl::WAVE_ROR1;
6525       } else if (Prefix == "row_bcast") {
6526         if (Int == 15) {
6527           Int = DppCtrl::BCAST15;
6528         } else if (Int == 31) {
6529           Int = DppCtrl::BCAST31;
6530         } else {
6531           return MatchOperand_ParseFail;
6532         }
6533       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6534         Int |= DppCtrl::ROW_SHARE_FIRST;
6535       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6536         Int |= DppCtrl::ROW_XMASK_FIRST;
6537       } else {
6538         return MatchOperand_ParseFail;
6539       }
6540     }
6541   }
6542 
6543   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6544   return MatchOperand_Success;
6545 }
6546 
6547 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6548   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6549 }
6550 
6551 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6552   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6553 }
6554 
6555 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6556   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6557 }
6558 
6559 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6560   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6561 }
6562 
6563 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6564   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6565 }
6566 
6567 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6568   OptionalImmIndexMap OptionalIdx;
6569 
6570   unsigned I = 1;
6571   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6572   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6573     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6574   }
6575 
6576   int Fi = 0;
6577   for (unsigned E = Operands.size(); I != E; ++I) {
6578     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6579                                             MCOI::TIED_TO);
6580     if (TiedTo != -1) {
6581       assert((unsigned)TiedTo < Inst.getNumOperands());
6582       // handle tied old or src2 for MAC instructions
6583       Inst.addOperand(Inst.getOperand(TiedTo));
6584     }
6585     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6586     // Add the register arguments
6587     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6588       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6589       // Skip it.
6590       continue;
6591     }
6592 
6593     if (IsDPP8) {
6594       if (Op.isDPP8()) {
6595         Op.addImmOperands(Inst, 1);
6596       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6597         Op.addRegWithFPInputModsOperands(Inst, 2);
6598       } else if (Op.isFI()) {
6599         Fi = Op.getImm();
6600       } else if (Op.isReg()) {
6601         Op.addRegOperands(Inst, 1);
6602       } else {
6603         llvm_unreachable("Invalid operand type");
6604       }
6605     } else {
6606       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6607         Op.addRegWithFPInputModsOperands(Inst, 2);
6608       } else if (Op.isDPPCtrl()) {
6609         Op.addImmOperands(Inst, 1);
6610       } else if (Op.isImm()) {
6611         // Handle optional arguments
6612         OptionalIdx[Op.getImmTy()] = I;
6613       } else {
6614         llvm_unreachable("Invalid operand type");
6615       }
6616     }
6617   }
6618 
6619   if (IsDPP8) {
6620     using namespace llvm::AMDGPU::DPP;
6621     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6622   } else {
6623     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6624     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6625     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6626     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6627       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6628     }
6629   }
6630 }
6631 
6632 //===----------------------------------------------------------------------===//
6633 // sdwa
6634 //===----------------------------------------------------------------------===//
6635 
6636 OperandMatchResultTy
6637 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6638                               AMDGPUOperand::ImmTy Type) {
6639   using namespace llvm::AMDGPU::SDWA;
6640 
6641   SMLoc S = Parser.getTok().getLoc();
6642   StringRef Value;
6643   OperandMatchResultTy res;
6644 
6645   res = parseStringWithPrefix(Prefix, Value);
6646   if (res != MatchOperand_Success) {
6647     return res;
6648   }
6649 
6650   int64_t Int;
6651   Int = StringSwitch<int64_t>(Value)
6652         .Case("BYTE_0", SdwaSel::BYTE_0)
6653         .Case("BYTE_1", SdwaSel::BYTE_1)
6654         .Case("BYTE_2", SdwaSel::BYTE_2)
6655         .Case("BYTE_3", SdwaSel::BYTE_3)
6656         .Case("WORD_0", SdwaSel::WORD_0)
6657         .Case("WORD_1", SdwaSel::WORD_1)
6658         .Case("DWORD", SdwaSel::DWORD)
6659         .Default(0xffffffff);
6660   Parser.Lex(); // eat last token
6661 
6662   if (Int == 0xffffffff) {
6663     return MatchOperand_ParseFail;
6664   }
6665 
6666   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6667   return MatchOperand_Success;
6668 }
6669 
6670 OperandMatchResultTy
6671 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6672   using namespace llvm::AMDGPU::SDWA;
6673 
6674   SMLoc S = Parser.getTok().getLoc();
6675   StringRef Value;
6676   OperandMatchResultTy res;
6677 
6678   res = parseStringWithPrefix("dst_unused", Value);
6679   if (res != MatchOperand_Success) {
6680     return res;
6681   }
6682 
6683   int64_t Int;
6684   Int = StringSwitch<int64_t>(Value)
6685         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6686         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6687         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6688         .Default(0xffffffff);
6689   Parser.Lex(); // eat last token
6690 
6691   if (Int == 0xffffffff) {
6692     return MatchOperand_ParseFail;
6693   }
6694 
6695   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6696   return MatchOperand_Success;
6697 }
6698 
6699 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6700   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6701 }
6702 
6703 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6704   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6705 }
6706 
6707 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6708   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6709 }
6710 
6711 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6712   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6713 }
6714 
6715 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6716                               uint64_t BasicInstType, bool skipVcc) {
6717   using namespace llvm::AMDGPU::SDWA;
6718 
6719   OptionalImmIndexMap OptionalIdx;
6720   bool skippedVcc = false;
6721 
6722   unsigned I = 1;
6723   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6724   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6725     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6726   }
6727 
6728   for (unsigned E = Operands.size(); I != E; ++I) {
6729     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6730     if (skipVcc && !skippedVcc && Op.isReg() &&
6731         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6732       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6733       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6734       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6735       // Skip VCC only if we didn't skip it on previous iteration.
6736       if (BasicInstType == SIInstrFlags::VOP2 &&
6737           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6738         skippedVcc = true;
6739         continue;
6740       } else if (BasicInstType == SIInstrFlags::VOPC &&
6741                  Inst.getNumOperands() == 0) {
6742         skippedVcc = true;
6743         continue;
6744       }
6745     }
6746     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6747       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6748     } else if (Op.isImm()) {
6749       // Handle optional arguments
6750       OptionalIdx[Op.getImmTy()] = I;
6751     } else {
6752       llvm_unreachable("Invalid operand type");
6753     }
6754     skippedVcc = false;
6755   }
6756 
6757   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6758       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6759       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6760     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6761     switch (BasicInstType) {
6762     case SIInstrFlags::VOP1:
6763       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6764       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6765         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6766       }
6767       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6768       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6769       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6770       break;
6771 
6772     case SIInstrFlags::VOP2:
6773       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6774       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6775         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6776       }
6777       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6778       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6779       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6780       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6781       break;
6782 
6783     case SIInstrFlags::VOPC:
6784       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6785         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6786       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6787       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6788       break;
6789 
6790     default:
6791       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6792     }
6793   }
6794 
6795   // special case v_mac_{f16, f32}:
6796   // it has src2 register operand that is tied to dst operand
6797   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6798       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6799     auto it = Inst.begin();
6800     std::advance(
6801       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6802     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6803   }
6804 }
6805 
6806 //===----------------------------------------------------------------------===//
6807 // mAI
6808 //===----------------------------------------------------------------------===//
6809 
6810 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
6811   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
6812 }
6813 
6814 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
6815   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
6816 }
6817 
6818 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
6819   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
6820 }
6821 
6822 /// Force static initialization.
6823 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6824   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6825   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6826 }
6827 
6828 #define GET_REGISTER_MATCHER
6829 #define GET_MATCHER_IMPLEMENTATION
6830 #define GET_MNEMONIC_SPELL_CHECKER
6831 #include "AMDGPUGenAsmMatcher.inc"
6832 
6833 // This fuction should be defined after auto-generated include so that we have
6834 // MatchClassKind enum defined
6835 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6836                                                      unsigned Kind) {
6837   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6838   // But MatchInstructionImpl() expects to meet token and fails to validate
6839   // operand. This method checks if we are given immediate operand but expect to
6840   // get corresponding token.
6841   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6842   switch (Kind) {
6843   case MCK_addr64:
6844     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6845   case MCK_gds:
6846     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6847   case MCK_lds:
6848     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6849   case MCK_glc:
6850     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6851   case MCK_idxen:
6852     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6853   case MCK_offen:
6854     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6855   case MCK_SSrcB32:
6856     // When operands have expression values, they will return true for isToken,
6857     // because it is not possible to distinguish between a token and an
6858     // expression at parse time. MatchInstructionImpl() will always try to
6859     // match an operand as a token, when isToken returns true, and when the
6860     // name of the expression is not a valid token, the match will fail,
6861     // so we need to handle it here.
6862     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6863   case MCK_SSrcF32:
6864     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6865   case MCK_SoppBrTarget:
6866     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6867   case MCK_VReg32OrOff:
6868     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6869   case MCK_InterpSlot:
6870     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6871   case MCK_Attr:
6872     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6873   case MCK_AttrChan:
6874     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6875   default:
6876     return Match_InvalidOperand;
6877   }
6878 }
6879 
6880 //===----------------------------------------------------------------------===//
6881 // endpgm
6882 //===----------------------------------------------------------------------===//
6883 
6884 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6885   SMLoc S = Parser.getTok().getLoc();
6886   int64_t Imm = 0;
6887 
6888   if (!parseExpr(Imm)) {
6889     // The operand is optional, if not present default to 0
6890     Imm = 0;
6891   }
6892 
6893   if (!isUInt<16>(Imm)) {
6894     Error(S, "expected a 16-bit value");
6895     return MatchOperand_ParseFail;
6896   }
6897 
6898   Operands.push_back(
6899       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6900   return MatchOperand_Success;
6901 }
6902 
6903 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
6904