1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyLWE,
167     ImmTyExpTgt,
168     ImmTyExpCompr,
169     ImmTyExpVM,
170     ImmTyFORMAT,
171     ImmTyHwreg,
172     ImmTyOff,
173     ImmTySendMsg,
174     ImmTyInterpSlot,
175     ImmTyInterpAttr,
176     ImmTyAttrChan,
177     ImmTyOpSel,
178     ImmTyOpSelHi,
179     ImmTyNegLo,
180     ImmTyNegHi,
181     ImmTySwizzle,
182     ImmTyGprIdxMode,
183     ImmTyHigh,
184     ImmTyBLGP,
185     ImmTyCBSZ,
186     ImmTyABID,
187     ImmTyEndpgm,
188   };
189 
190 private:
191   struct TokOp {
192     const char *Data;
193     unsigned Length;
194   };
195 
196   struct ImmOp {
197     int64_t Val;
198     ImmTy Type;
199     bool IsFPImm;
200     Modifiers Mods;
201   };
202 
203   struct RegOp {
204     unsigned RegNo;
205     Modifiers Mods;
206   };
207 
208   union {
209     TokOp Tok;
210     ImmOp Imm;
211     RegOp Reg;
212     const MCExpr *Expr;
213   };
214 
215 public:
216   bool isToken() const override {
217     if (Kind == Token)
218       return true;
219 
220     // When parsing operands, we can't always tell if something was meant to be
221     // a token, like 'gds', or an expression that references a global variable.
222     // In this case, we assume the string is an expression, and if we need to
223     // interpret is a token, then we treat the symbol name as the token.
224     return isSymbolRefExpr();
225   }
226 
227   bool isSymbolRefExpr() const {
228     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
229   }
230 
231   bool isImm() const override {
232     return Kind == Immediate;
233   }
234 
235   bool isInlinableImm(MVT type) const;
236   bool isLiteralImm(MVT type) const;
237 
238   bool isRegKind() const {
239     return Kind == Register;
240   }
241 
242   bool isReg() const override {
243     return isRegKind() && !hasModifiers();
244   }
245 
246   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
247     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
248   }
249 
250   bool isRegOrImmWithInt16InputMods() const {
251     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
252   }
253 
254   bool isRegOrImmWithInt32InputMods() const {
255     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
256   }
257 
258   bool isRegOrImmWithInt64InputMods() const {
259     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
260   }
261 
262   bool isRegOrImmWithFP16InputMods() const {
263     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
264   }
265 
266   bool isRegOrImmWithFP32InputMods() const {
267     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
268   }
269 
270   bool isRegOrImmWithFP64InputMods() const {
271     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
272   }
273 
274   bool isVReg() const {
275     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
276            isRegClass(AMDGPU::VReg_64RegClassID) ||
277            isRegClass(AMDGPU::VReg_96RegClassID) ||
278            isRegClass(AMDGPU::VReg_128RegClassID) ||
279            isRegClass(AMDGPU::VReg_160RegClassID) ||
280            isRegClass(AMDGPU::VReg_256RegClassID) ||
281            isRegClass(AMDGPU::VReg_512RegClassID) ||
282            isRegClass(AMDGPU::VReg_1024RegClassID);
283   }
284 
285   bool isVReg32() const {
286     return isRegClass(AMDGPU::VGPR_32RegClassID);
287   }
288 
289   bool isVReg32OrOff() const {
290     return isOff() || isVReg32();
291   }
292 
293   bool isNull() const {
294     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
295   }
296 
297   bool isSDWAOperand(MVT type) const;
298   bool isSDWAFP16Operand() const;
299   bool isSDWAFP32Operand() const;
300   bool isSDWAInt16Operand() const;
301   bool isSDWAInt32Operand() const;
302 
303   bool isImmTy(ImmTy ImmT) const {
304     return isImm() && Imm.Type == ImmT;
305   }
306 
307   bool isImmModifier() const {
308     return isImm() && Imm.Type != ImmTyNone;
309   }
310 
311   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
312   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
313   bool isDMask() const { return isImmTy(ImmTyDMask); }
314   bool isDim() const { return isImmTy(ImmTyDim); }
315   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
316   bool isDA() const { return isImmTy(ImmTyDA); }
317   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
318   bool isLWE() const { return isImmTy(ImmTyLWE); }
319   bool isOff() const { return isImmTy(ImmTyOff); }
320   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
321   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
322   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
323   bool isOffen() const { return isImmTy(ImmTyOffen); }
324   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
325   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
326   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
327   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
328   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
329 
330   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
331   bool isGDS() const { return isImmTy(ImmTyGDS); }
332   bool isLDS() const { return isImmTy(ImmTyLDS); }
333   bool isDLC() const { return isImmTy(ImmTyDLC); }
334   bool isGLC() const { return isImmTy(ImmTyGLC); }
335   bool isSLC() const { return isImmTy(ImmTySLC); }
336   bool isSWZ() const { return isImmTy(ImmTySWZ); }
337   bool isTFE() const { return isImmTy(ImmTyTFE); }
338   bool isD16() const { return isImmTy(ImmTyD16); }
339   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
340   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
341   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
342   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
343   bool isFI() const { return isImmTy(ImmTyDppFi); }
344   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
345   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
346   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
347   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
348   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
349   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
350   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
351   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
352   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
353   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
354   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
355   bool isHigh() const { return isImmTy(ImmTyHigh); }
356 
357   bool isMod() const {
358     return isClampSI() || isOModSI();
359   }
360 
361   bool isRegOrImm() const {
362     return isReg() || isImm();
363   }
364 
365   bool isRegClass(unsigned RCID) const;
366 
367   bool isInlineValue() const;
368 
369   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
370     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
371   }
372 
373   bool isSCSrcB16() const {
374     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
375   }
376 
377   bool isSCSrcV2B16() const {
378     return isSCSrcB16();
379   }
380 
381   bool isSCSrcB32() const {
382     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
383   }
384 
385   bool isSCSrcB64() const {
386     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
387   }
388 
389   bool isBoolReg() const;
390 
391   bool isSCSrcF16() const {
392     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
393   }
394 
395   bool isSCSrcV2F16() const {
396     return isSCSrcF16();
397   }
398 
399   bool isSCSrcF32() const {
400     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
401   }
402 
403   bool isSCSrcF64() const {
404     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
405   }
406 
407   bool isSSrcB32() const {
408     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
409   }
410 
411   bool isSSrcB16() const {
412     return isSCSrcB16() || isLiteralImm(MVT::i16);
413   }
414 
415   bool isSSrcV2B16() const {
416     llvm_unreachable("cannot happen");
417     return isSSrcB16();
418   }
419 
420   bool isSSrcB64() const {
421     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
422     // See isVSrc64().
423     return isSCSrcB64() || isLiteralImm(MVT::i64);
424   }
425 
426   bool isSSrcF32() const {
427     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
428   }
429 
430   bool isSSrcF64() const {
431     return isSCSrcB64() || isLiteralImm(MVT::f64);
432   }
433 
434   bool isSSrcF16() const {
435     return isSCSrcB16() || isLiteralImm(MVT::f16);
436   }
437 
438   bool isSSrcV2F16() const {
439     llvm_unreachable("cannot happen");
440     return isSSrcF16();
441   }
442 
443   bool isSSrcOrLdsB32() const {
444     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
445            isLiteralImm(MVT::i32) || isExpr();
446   }
447 
448   bool isVCSrcB32() const {
449     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
450   }
451 
452   bool isVCSrcB64() const {
453     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
454   }
455 
456   bool isVCSrcB16() const {
457     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
458   }
459 
460   bool isVCSrcV2B16() const {
461     return isVCSrcB16();
462   }
463 
464   bool isVCSrcF32() const {
465     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
466   }
467 
468   bool isVCSrcF64() const {
469     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
470   }
471 
472   bool isVCSrcF16() const {
473     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
474   }
475 
476   bool isVCSrcV2F16() const {
477     return isVCSrcF16();
478   }
479 
480   bool isVSrcB32() const {
481     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
482   }
483 
484   bool isVSrcB64() const {
485     return isVCSrcF64() || isLiteralImm(MVT::i64);
486   }
487 
488   bool isVSrcB16() const {
489     return isVCSrcF16() || isLiteralImm(MVT::i16);
490   }
491 
492   bool isVSrcV2B16() const {
493     return isVSrcB16() || isLiteralImm(MVT::v2i16);
494   }
495 
496   bool isVSrcF32() const {
497     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
498   }
499 
500   bool isVSrcF64() const {
501     return isVCSrcF64() || isLiteralImm(MVT::f64);
502   }
503 
504   bool isVSrcF16() const {
505     return isVCSrcF16() || isLiteralImm(MVT::f16);
506   }
507 
508   bool isVSrcV2F16() const {
509     return isVSrcF16() || isLiteralImm(MVT::v2f16);
510   }
511 
512   bool isVISrcB32() const {
513     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
514   }
515 
516   bool isVISrcB16() const {
517     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
518   }
519 
520   bool isVISrcV2B16() const {
521     return isVISrcB16();
522   }
523 
524   bool isVISrcF32() const {
525     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
526   }
527 
528   bool isVISrcF16() const {
529     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
530   }
531 
532   bool isVISrcV2F16() const {
533     return isVISrcF16() || isVISrcB32();
534   }
535 
536   bool isAISrcB32() const {
537     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
538   }
539 
540   bool isAISrcB16() const {
541     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
542   }
543 
544   bool isAISrcV2B16() const {
545     return isAISrcB16();
546   }
547 
548   bool isAISrcF32() const {
549     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
550   }
551 
552   bool isAISrcF16() const {
553     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
554   }
555 
556   bool isAISrcV2F16() const {
557     return isAISrcF16() || isAISrcB32();
558   }
559 
560   bool isAISrc_128B32() const {
561     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
562   }
563 
564   bool isAISrc_128B16() const {
565     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
566   }
567 
568   bool isAISrc_128V2B16() const {
569     return isAISrc_128B16();
570   }
571 
572   bool isAISrc_128F32() const {
573     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
574   }
575 
576   bool isAISrc_128F16() const {
577     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
578   }
579 
580   bool isAISrc_128V2F16() const {
581     return isAISrc_128F16() || isAISrc_128B32();
582   }
583 
584   bool isAISrc_512B32() const {
585     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
586   }
587 
588   bool isAISrc_512B16() const {
589     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
590   }
591 
592   bool isAISrc_512V2B16() const {
593     return isAISrc_512B16();
594   }
595 
596   bool isAISrc_512F32() const {
597     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
598   }
599 
600   bool isAISrc_512F16() const {
601     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
602   }
603 
604   bool isAISrc_512V2F16() const {
605     return isAISrc_512F16() || isAISrc_512B32();
606   }
607 
608   bool isAISrc_1024B32() const {
609     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
610   }
611 
612   bool isAISrc_1024B16() const {
613     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
614   }
615 
616   bool isAISrc_1024V2B16() const {
617     return isAISrc_1024B16();
618   }
619 
620   bool isAISrc_1024F32() const {
621     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
622   }
623 
624   bool isAISrc_1024F16() const {
625     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
626   }
627 
628   bool isAISrc_1024V2F16() const {
629     return isAISrc_1024F16() || isAISrc_1024B32();
630   }
631 
632   bool isKImmFP32() const {
633     return isLiteralImm(MVT::f32);
634   }
635 
636   bool isKImmFP16() const {
637     return isLiteralImm(MVT::f16);
638   }
639 
640   bool isMem() const override {
641     return false;
642   }
643 
644   bool isExpr() const {
645     return Kind == Expression;
646   }
647 
648   bool isSoppBrTarget() const {
649     return isExpr() || isImm();
650   }
651 
652   bool isSWaitCnt() const;
653   bool isHwreg() const;
654   bool isSendMsg() const;
655   bool isSwizzle() const;
656   bool isSMRDOffset8() const;
657   bool isSMRDOffset20() const;
658   bool isSMRDLiteralOffset() const;
659   bool isDPP8() const;
660   bool isDPPCtrl() const;
661   bool isBLGP() const;
662   bool isCBSZ() const;
663   bool isABID() const;
664   bool isGPRIdxMode() const;
665   bool isS16Imm() const;
666   bool isU16Imm() const;
667   bool isEndpgm() const;
668 
669   StringRef getExpressionAsToken() const {
670     assert(isExpr());
671     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
672     return S->getSymbol().getName();
673   }
674 
675   StringRef getToken() const {
676     assert(isToken());
677 
678     if (Kind == Expression)
679       return getExpressionAsToken();
680 
681     return StringRef(Tok.Data, Tok.Length);
682   }
683 
684   int64_t getImm() const {
685     assert(isImm());
686     return Imm.Val;
687   }
688 
689   ImmTy getImmTy() const {
690     assert(isImm());
691     return Imm.Type;
692   }
693 
694   unsigned getReg() const override {
695     assert(isRegKind());
696     return Reg.RegNo;
697   }
698 
699   SMLoc getStartLoc() const override {
700     return StartLoc;
701   }
702 
703   SMLoc getEndLoc() const override {
704     return EndLoc;
705   }
706 
707   SMRange getLocRange() const {
708     return SMRange(StartLoc, EndLoc);
709   }
710 
711   Modifiers getModifiers() const {
712     assert(isRegKind() || isImmTy(ImmTyNone));
713     return isRegKind() ? Reg.Mods : Imm.Mods;
714   }
715 
716   void setModifiers(Modifiers Mods) {
717     assert(isRegKind() || isImmTy(ImmTyNone));
718     if (isRegKind())
719       Reg.Mods = Mods;
720     else
721       Imm.Mods = Mods;
722   }
723 
724   bool hasModifiers() const {
725     return getModifiers().hasModifiers();
726   }
727 
728   bool hasFPModifiers() const {
729     return getModifiers().hasFPModifiers();
730   }
731 
732   bool hasIntModifiers() const {
733     return getModifiers().hasIntModifiers();
734   }
735 
736   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
737 
738   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
739 
740   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
741 
742   template <unsigned Bitwidth>
743   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
744 
745   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
746     addKImmFPOperands<16>(Inst, N);
747   }
748 
749   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
750     addKImmFPOperands<32>(Inst, N);
751   }
752 
753   void addRegOperands(MCInst &Inst, unsigned N) const;
754 
755   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
756     addRegOperands(Inst, N);
757   }
758 
759   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
760     if (isRegKind())
761       addRegOperands(Inst, N);
762     else if (isExpr())
763       Inst.addOperand(MCOperand::createExpr(Expr));
764     else
765       addImmOperands(Inst, N);
766   }
767 
768   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
769     Modifiers Mods = getModifiers();
770     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
771     if (isRegKind()) {
772       addRegOperands(Inst, N);
773     } else {
774       addImmOperands(Inst, N, false);
775     }
776   }
777 
778   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
779     assert(!hasIntModifiers());
780     addRegOrImmWithInputModsOperands(Inst, N);
781   }
782 
783   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
784     assert(!hasFPModifiers());
785     addRegOrImmWithInputModsOperands(Inst, N);
786   }
787 
788   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
789     Modifiers Mods = getModifiers();
790     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
791     assert(isRegKind());
792     addRegOperands(Inst, N);
793   }
794 
795   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
796     assert(!hasIntModifiers());
797     addRegWithInputModsOperands(Inst, N);
798   }
799 
800   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
801     assert(!hasFPModifiers());
802     addRegWithInputModsOperands(Inst, N);
803   }
804 
805   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
806     if (isImm())
807       addImmOperands(Inst, N);
808     else {
809       assert(isExpr());
810       Inst.addOperand(MCOperand::createExpr(Expr));
811     }
812   }
813 
814   static void printImmTy(raw_ostream& OS, ImmTy Type) {
815     switch (Type) {
816     case ImmTyNone: OS << "None"; break;
817     case ImmTyGDS: OS << "GDS"; break;
818     case ImmTyLDS: OS << "LDS"; break;
819     case ImmTyOffen: OS << "Offen"; break;
820     case ImmTyIdxen: OS << "Idxen"; break;
821     case ImmTyAddr64: OS << "Addr64"; break;
822     case ImmTyOffset: OS << "Offset"; break;
823     case ImmTyInstOffset: OS << "InstOffset"; break;
824     case ImmTyOffset0: OS << "Offset0"; break;
825     case ImmTyOffset1: OS << "Offset1"; break;
826     case ImmTyDLC: OS << "DLC"; break;
827     case ImmTyGLC: OS << "GLC"; break;
828     case ImmTySLC: OS << "SLC"; break;
829     case ImmTySWZ: OS << "SWZ"; break;
830     case ImmTyTFE: OS << "TFE"; break;
831     case ImmTyD16: OS << "D16"; break;
832     case ImmTyFORMAT: OS << "FORMAT"; break;
833     case ImmTyClampSI: OS << "ClampSI"; break;
834     case ImmTyOModSI: OS << "OModSI"; break;
835     case ImmTyDPP8: OS << "DPP8"; break;
836     case ImmTyDppCtrl: OS << "DppCtrl"; break;
837     case ImmTyDppRowMask: OS << "DppRowMask"; break;
838     case ImmTyDppBankMask: OS << "DppBankMask"; break;
839     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
840     case ImmTyDppFi: OS << "FI"; break;
841     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
842     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
843     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
844     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
845     case ImmTyDMask: OS << "DMask"; break;
846     case ImmTyDim: OS << "Dim"; break;
847     case ImmTyUNorm: OS << "UNorm"; break;
848     case ImmTyDA: OS << "DA"; break;
849     case ImmTyR128A16: OS << "R128A16"; break;
850     case ImmTyLWE: OS << "LWE"; break;
851     case ImmTyOff: OS << "Off"; break;
852     case ImmTyExpTgt: OS << "ExpTgt"; break;
853     case ImmTyExpCompr: OS << "ExpCompr"; break;
854     case ImmTyExpVM: OS << "ExpVM"; break;
855     case ImmTyHwreg: OS << "Hwreg"; break;
856     case ImmTySendMsg: OS << "SendMsg"; break;
857     case ImmTyInterpSlot: OS << "InterpSlot"; break;
858     case ImmTyInterpAttr: OS << "InterpAttr"; break;
859     case ImmTyAttrChan: OS << "AttrChan"; break;
860     case ImmTyOpSel: OS << "OpSel"; break;
861     case ImmTyOpSelHi: OS << "OpSelHi"; break;
862     case ImmTyNegLo: OS << "NegLo"; break;
863     case ImmTyNegHi: OS << "NegHi"; break;
864     case ImmTySwizzle: OS << "Swizzle"; break;
865     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
866     case ImmTyHigh: OS << "High"; break;
867     case ImmTyBLGP: OS << "BLGP"; break;
868     case ImmTyCBSZ: OS << "CBSZ"; break;
869     case ImmTyABID: OS << "ABID"; break;
870     case ImmTyEndpgm: OS << "Endpgm"; break;
871     }
872   }
873 
874   void print(raw_ostream &OS) const override {
875     switch (Kind) {
876     case Register:
877       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
878       break;
879     case Immediate:
880       OS << '<' << getImm();
881       if (getImmTy() != ImmTyNone) {
882         OS << " type: "; printImmTy(OS, getImmTy());
883       }
884       OS << " mods: " << Imm.Mods << '>';
885       break;
886     case Token:
887       OS << '\'' << getToken() << '\'';
888       break;
889     case Expression:
890       OS << "<expr " << *Expr << '>';
891       break;
892     }
893   }
894 
895   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
896                                       int64_t Val, SMLoc Loc,
897                                       ImmTy Type = ImmTyNone,
898                                       bool IsFPImm = false) {
899     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
900     Op->Imm.Val = Val;
901     Op->Imm.IsFPImm = IsFPImm;
902     Op->Imm.Type = Type;
903     Op->Imm.Mods = Modifiers();
904     Op->StartLoc = Loc;
905     Op->EndLoc = Loc;
906     return Op;
907   }
908 
909   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
910                                         StringRef Str, SMLoc Loc,
911                                         bool HasExplicitEncodingSize = true) {
912     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
913     Res->Tok.Data = Str.data();
914     Res->Tok.Length = Str.size();
915     Res->StartLoc = Loc;
916     Res->EndLoc = Loc;
917     return Res;
918   }
919 
920   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
921                                       unsigned RegNo, SMLoc S,
922                                       SMLoc E) {
923     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
924     Op->Reg.RegNo = RegNo;
925     Op->Reg.Mods = Modifiers();
926     Op->StartLoc = S;
927     Op->EndLoc = E;
928     return Op;
929   }
930 
931   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
932                                        const class MCExpr *Expr, SMLoc S) {
933     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
934     Op->Expr = Expr;
935     Op->StartLoc = S;
936     Op->EndLoc = S;
937     return Op;
938   }
939 };
940 
941 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
942   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
943   return OS;
944 }
945 
946 //===----------------------------------------------------------------------===//
947 // AsmParser
948 //===----------------------------------------------------------------------===//
949 
950 // Holds info related to the current kernel, e.g. count of SGPRs used.
951 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
952 // .amdgpu_hsa_kernel or at EOF.
953 class KernelScopeInfo {
954   int SgprIndexUnusedMin = -1;
955   int VgprIndexUnusedMin = -1;
956   MCContext *Ctx = nullptr;
957 
958   void usesSgprAt(int i) {
959     if (i >= SgprIndexUnusedMin) {
960       SgprIndexUnusedMin = ++i;
961       if (Ctx) {
962         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
963         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
964       }
965     }
966   }
967 
968   void usesVgprAt(int i) {
969     if (i >= VgprIndexUnusedMin) {
970       VgprIndexUnusedMin = ++i;
971       if (Ctx) {
972         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
973         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
974       }
975     }
976   }
977 
978 public:
979   KernelScopeInfo() = default;
980 
981   void initialize(MCContext &Context) {
982     Ctx = &Context;
983     usesSgprAt(SgprIndexUnusedMin = -1);
984     usesVgprAt(VgprIndexUnusedMin = -1);
985   }
986 
987   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
988     switch (RegKind) {
989       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
990       case IS_AGPR: // fall through
991       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
992       default: break;
993     }
994   }
995 };
996 
997 class AMDGPUAsmParser : public MCTargetAsmParser {
998   MCAsmParser &Parser;
999 
1000   // Number of extra operands parsed after the first optional operand.
1001   // This may be necessary to skip hardcoded mandatory operands.
1002   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1003 
1004   unsigned ForcedEncodingSize = 0;
1005   bool ForcedDPP = false;
1006   bool ForcedSDWA = false;
1007   KernelScopeInfo KernelScope;
1008 
1009   /// @name Auto-generated Match Functions
1010   /// {
1011 
1012 #define GET_ASSEMBLER_HEADER
1013 #include "AMDGPUGenAsmMatcher.inc"
1014 
1015   /// }
1016 
1017 private:
1018   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1019   bool OutOfRangeError(SMRange Range);
1020   /// Calculate VGPR/SGPR blocks required for given target, reserved
1021   /// registers, and user-specified NextFreeXGPR values.
1022   ///
1023   /// \param Features [in] Target features, used for bug corrections.
1024   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1025   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1026   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1027   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1028   /// descriptor field, if valid.
1029   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1030   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1031   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1032   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1033   /// \param VGPRBlocks [out] Result VGPR block count.
1034   /// \param SGPRBlocks [out] Result SGPR block count.
1035   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1036                           bool FlatScrUsed, bool XNACKUsed,
1037                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1038                           SMRange VGPRRange, unsigned NextFreeSGPR,
1039                           SMRange SGPRRange, unsigned &VGPRBlocks,
1040                           unsigned &SGPRBlocks);
1041   bool ParseDirectiveAMDGCNTarget();
1042   bool ParseDirectiveAMDHSAKernel();
1043   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1044   bool ParseDirectiveHSACodeObjectVersion();
1045   bool ParseDirectiveHSACodeObjectISA();
1046   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1047   bool ParseDirectiveAMDKernelCodeT();
1048   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1049   bool ParseDirectiveAMDGPUHsaKernel();
1050 
1051   bool ParseDirectiveISAVersion();
1052   bool ParseDirectiveHSAMetadata();
1053   bool ParseDirectivePALMetadataBegin();
1054   bool ParseDirectivePALMetadata();
1055   bool ParseDirectiveAMDGPULDS();
1056 
1057   /// Common code to parse out a block of text (typically YAML) between start and
1058   /// end directives.
1059   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1060                            const char *AssemblerDirectiveEnd,
1061                            std::string &CollectString);
1062 
1063   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1064                              RegisterKind RegKind, unsigned Reg1);
1065   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
1066                            unsigned& RegNum, unsigned& RegWidth);
1067   unsigned ParseRegularReg(RegisterKind &RegKind,
1068                            unsigned &RegNum,
1069                            unsigned &RegWidth);
1070   unsigned ParseSpecialReg(RegisterKind &RegKind,
1071                            unsigned &RegNum,
1072                            unsigned &RegWidth);
1073   unsigned ParseRegList(RegisterKind &RegKind,
1074                         unsigned &RegNum,
1075                         unsigned &RegWidth);
1076   bool ParseRegRange(unsigned& Num, unsigned& Width);
1077   unsigned getRegularReg(RegisterKind RegKind,
1078                          unsigned RegNum,
1079                          unsigned RegWidth);
1080 
1081   bool isRegister();
1082   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1083   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1084   void initializeGprCountSymbol(RegisterKind RegKind);
1085   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1086                              unsigned RegWidth);
1087   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1088                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1089   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1090                  bool IsGdsHardcoded);
1091 
1092 public:
1093   enum AMDGPUMatchResultTy {
1094     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1095   };
1096   enum OperandMode {
1097     OperandMode_Default,
1098     OperandMode_NSA,
1099   };
1100 
1101   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1102 
1103   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1104                const MCInstrInfo &MII,
1105                const MCTargetOptions &Options)
1106       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1107     MCAsmParserExtension::Initialize(Parser);
1108 
1109     if (getFeatureBits().none()) {
1110       // Set default features.
1111       copySTI().ToggleFeature("southern-islands");
1112     }
1113 
1114     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1115 
1116     {
1117       // TODO: make those pre-defined variables read-only.
1118       // Currently there is none suitable machinery in the core llvm-mc for this.
1119       // MCSymbol::isRedefinable is intended for another purpose, and
1120       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1121       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1122       MCContext &Ctx = getContext();
1123       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1124         MCSymbol *Sym =
1125             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1126         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1127         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1128         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1129         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1130         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1131       } else {
1132         MCSymbol *Sym =
1133             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1134         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1135         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1136         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1137         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1138         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1139       }
1140       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1141         initializeGprCountSymbol(IS_VGPR);
1142         initializeGprCountSymbol(IS_SGPR);
1143       } else
1144         KernelScope.initialize(getContext());
1145     }
1146   }
1147 
1148   bool hasXNACK() const {
1149     return AMDGPU::hasXNACK(getSTI());
1150   }
1151 
1152   bool hasMIMG_R128() const {
1153     return AMDGPU::hasMIMG_R128(getSTI());
1154   }
1155 
1156   bool hasPackedD16() const {
1157     return AMDGPU::hasPackedD16(getSTI());
1158   }
1159 
1160   bool isSI() const {
1161     return AMDGPU::isSI(getSTI());
1162   }
1163 
1164   bool isCI() const {
1165     return AMDGPU::isCI(getSTI());
1166   }
1167 
1168   bool isVI() const {
1169     return AMDGPU::isVI(getSTI());
1170   }
1171 
1172   bool isGFX9() const {
1173     return AMDGPU::isGFX9(getSTI());
1174   }
1175 
1176   bool isGFX10() const {
1177     return AMDGPU::isGFX10(getSTI());
1178   }
1179 
1180   bool hasInv2PiInlineImm() const {
1181     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1182   }
1183 
1184   bool hasFlatOffsets() const {
1185     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1186   }
1187 
1188   bool hasSGPR102_SGPR103() const {
1189     return !isVI() && !isGFX9();
1190   }
1191 
1192   bool hasSGPR104_SGPR105() const {
1193     return isGFX10();
1194   }
1195 
1196   bool hasIntClamp() const {
1197     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1198   }
1199 
1200   AMDGPUTargetStreamer &getTargetStreamer() {
1201     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1202     return static_cast<AMDGPUTargetStreamer &>(TS);
1203   }
1204 
1205   const MCRegisterInfo *getMRI() const {
1206     // We need this const_cast because for some reason getContext() is not const
1207     // in MCAsmParser.
1208     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1209   }
1210 
1211   const MCInstrInfo *getMII() const {
1212     return &MII;
1213   }
1214 
1215   const FeatureBitset &getFeatureBits() const {
1216     return getSTI().getFeatureBits();
1217   }
1218 
1219   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1220   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1221   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1222 
1223   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1224   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1225   bool isForcedDPP() const { return ForcedDPP; }
1226   bool isForcedSDWA() const { return ForcedSDWA; }
1227   ArrayRef<unsigned> getMatchedVariants() const;
1228 
1229   std::unique_ptr<AMDGPUOperand> parseRegister();
1230   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1231   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1232   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1233                                       unsigned Kind) override;
1234   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1235                                OperandVector &Operands, MCStreamer &Out,
1236                                uint64_t &ErrorInfo,
1237                                bool MatchingInlineAsm) override;
1238   bool ParseDirective(AsmToken DirectiveID) override;
1239   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1240                                     OperandMode Mode = OperandMode_Default);
1241   StringRef parseMnemonicSuffix(StringRef Name);
1242   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1243                         SMLoc NameLoc, OperandVector &Operands) override;
1244   //bool ProcessInstruction(MCInst &Inst);
1245 
1246   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1247 
1248   OperandMatchResultTy
1249   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1250                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1251                      bool (*ConvertResult)(int64_t &) = nullptr);
1252 
1253   OperandMatchResultTy
1254   parseOperandArrayWithPrefix(const char *Prefix,
1255                               OperandVector &Operands,
1256                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1257                               bool (*ConvertResult)(int64_t&) = nullptr);
1258 
1259   OperandMatchResultTy
1260   parseNamedBit(const char *Name, OperandVector &Operands,
1261                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1262   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1263                                              StringRef &Value);
1264 
1265   bool isModifier();
1266   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1267   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1268   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1269   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1270   bool parseSP3NegModifier();
1271   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1272   OperandMatchResultTy parseReg(OperandVector &Operands);
1273   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1274   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1275   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1276   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1277   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1278   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1279   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1280 
1281   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1282   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1283   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1284   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1285 
1286   bool parseCnt(int64_t &IntVal);
1287   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1288   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1289 
1290 private:
1291   struct OperandInfoTy {
1292     int64_t Id;
1293     bool IsSymbolic = false;
1294     bool IsDefined = false;
1295 
1296     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1297   };
1298 
1299   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1300   bool validateSendMsg(const OperandInfoTy &Msg,
1301                        const OperandInfoTy &Op,
1302                        const OperandInfoTy &Stream,
1303                        const SMLoc Loc);
1304 
1305   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1306   bool validateHwreg(const OperandInfoTy &HwReg,
1307                      const int64_t Offset,
1308                      const int64_t Width,
1309                      const SMLoc Loc);
1310 
1311   void errorExpTgt();
1312   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1313   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1314 
1315   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1316   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1317   bool validateSOPLiteral(const MCInst &Inst) const;
1318   bool validateConstantBusLimitations(const MCInst &Inst);
1319   bool validateEarlyClobberLimitations(const MCInst &Inst);
1320   bool validateIntClampSupported(const MCInst &Inst);
1321   bool validateMIMGAtomicDMask(const MCInst &Inst);
1322   bool validateMIMGGatherDMask(const MCInst &Inst);
1323   bool validateMIMGDataSize(const MCInst &Inst);
1324   bool validateMIMGAddrSize(const MCInst &Inst);
1325   bool validateMIMGD16(const MCInst &Inst);
1326   bool validateMIMGDim(const MCInst &Inst);
1327   bool validateLdsDirect(const MCInst &Inst);
1328   bool validateOpSel(const MCInst &Inst);
1329   bool validateVccOperand(unsigned Reg) const;
1330   bool validateVOP3Literal(const MCInst &Inst) const;
1331   unsigned getConstantBusLimit(unsigned Opcode) const;
1332   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1333   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1334   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1335 
1336   bool isId(const StringRef Id) const;
1337   bool isId(const AsmToken &Token, const StringRef Id) const;
1338   bool isToken(const AsmToken::TokenKind Kind) const;
1339   bool trySkipId(const StringRef Id);
1340   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1341   bool trySkipToken(const AsmToken::TokenKind Kind);
1342   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1343   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1344   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1345   AsmToken::TokenKind getTokenKind() const;
1346   bool parseExpr(int64_t &Imm);
1347   bool parseExpr(OperandVector &Operands);
1348   StringRef getTokenStr() const;
1349   AsmToken peekToken();
1350   AsmToken getToken() const;
1351   SMLoc getLoc() const;
1352   void lex();
1353 
1354 public:
1355   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1356   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1357 
1358   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1359   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1360   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1361   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1362   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1363   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1364 
1365   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1366                             const unsigned MinVal,
1367                             const unsigned MaxVal,
1368                             const StringRef ErrMsg);
1369   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1370   bool parseSwizzleOffset(int64_t &Imm);
1371   bool parseSwizzleMacro(int64_t &Imm);
1372   bool parseSwizzleQuadPerm(int64_t &Imm);
1373   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1374   bool parseSwizzleBroadcast(int64_t &Imm);
1375   bool parseSwizzleSwap(int64_t &Imm);
1376   bool parseSwizzleReverse(int64_t &Imm);
1377 
1378   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1379   int64_t parseGPRIdxMacro();
1380 
1381   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1382   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1383   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1384   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1385   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1386 
1387   AMDGPUOperand::Ptr defaultDLC() const;
1388   AMDGPUOperand::Ptr defaultGLC() const;
1389   AMDGPUOperand::Ptr defaultSLC() const;
1390 
1391   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1392   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1393   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1394   AMDGPUOperand::Ptr defaultFlatOffset() const;
1395 
1396   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1397 
1398   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1399                OptionalImmIndexMap &OptionalIdx);
1400   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1401   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1402   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1403 
1404   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1405 
1406   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1407                bool IsAtomic = false);
1408   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1409 
1410   OperandMatchResultTy parseDim(OperandVector &Operands);
1411   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1412   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1413   AMDGPUOperand::Ptr defaultRowMask() const;
1414   AMDGPUOperand::Ptr defaultBankMask() const;
1415   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1416   AMDGPUOperand::Ptr defaultFI() const;
1417   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1418   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1419 
1420   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1421                                     AMDGPUOperand::ImmTy Type);
1422   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1423   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1424   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1425   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1426   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1427   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1428                 uint64_t BasicInstType, bool skipVcc = false);
1429 
1430   AMDGPUOperand::Ptr defaultBLGP() const;
1431   AMDGPUOperand::Ptr defaultCBSZ() const;
1432   AMDGPUOperand::Ptr defaultABID() const;
1433 
1434   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1435   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1436 };
1437 
1438 struct OptionalOperand {
1439   const char *Name;
1440   AMDGPUOperand::ImmTy Type;
1441   bool IsBit;
1442   bool (*ConvertResult)(int64_t&);
1443 };
1444 
1445 } // end anonymous namespace
1446 
1447 // May be called with integer type with equivalent bitwidth.
1448 static const fltSemantics *getFltSemantics(unsigned Size) {
1449   switch (Size) {
1450   case 4:
1451     return &APFloat::IEEEsingle();
1452   case 8:
1453     return &APFloat::IEEEdouble();
1454   case 2:
1455     return &APFloat::IEEEhalf();
1456   default:
1457     llvm_unreachable("unsupported fp type");
1458   }
1459 }
1460 
1461 static const fltSemantics *getFltSemantics(MVT VT) {
1462   return getFltSemantics(VT.getSizeInBits() / 8);
1463 }
1464 
1465 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1466   switch (OperandType) {
1467   case AMDGPU::OPERAND_REG_IMM_INT32:
1468   case AMDGPU::OPERAND_REG_IMM_FP32:
1469   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1470   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1471   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1472   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1473     return &APFloat::IEEEsingle();
1474   case AMDGPU::OPERAND_REG_IMM_INT64:
1475   case AMDGPU::OPERAND_REG_IMM_FP64:
1476   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1477   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1478     return &APFloat::IEEEdouble();
1479   case AMDGPU::OPERAND_REG_IMM_INT16:
1480   case AMDGPU::OPERAND_REG_IMM_FP16:
1481   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1482   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1483   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1484   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1485   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1486   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1487   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1488   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1489   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1490   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1491     return &APFloat::IEEEhalf();
1492   default:
1493     llvm_unreachable("unsupported fp type");
1494   }
1495 }
1496 
1497 //===----------------------------------------------------------------------===//
1498 // Operand
1499 //===----------------------------------------------------------------------===//
1500 
1501 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1502   bool Lost;
1503 
1504   // Convert literal to single precision
1505   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1506                                                APFloat::rmNearestTiesToEven,
1507                                                &Lost);
1508   // We allow precision lost but not overflow or underflow
1509   if (Status != APFloat::opOK &&
1510       Lost &&
1511       ((Status & APFloat::opOverflow)  != 0 ||
1512        (Status & APFloat::opUnderflow) != 0)) {
1513     return false;
1514   }
1515 
1516   return true;
1517 }
1518 
1519 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1520   return isUIntN(Size, Val) || isIntN(Size, Val);
1521 }
1522 
1523 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1524 
1525   // This is a hack to enable named inline values like
1526   // shared_base with both 32-bit and 64-bit operands.
1527   // Note that these values are defined as
1528   // 32-bit operands only.
1529   if (isInlineValue()) {
1530     return true;
1531   }
1532 
1533   if (!isImmTy(ImmTyNone)) {
1534     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1535     return false;
1536   }
1537   // TODO: We should avoid using host float here. It would be better to
1538   // check the float bit values which is what a few other places do.
1539   // We've had bot failures before due to weird NaN support on mips hosts.
1540 
1541   APInt Literal(64, Imm.Val);
1542 
1543   if (Imm.IsFPImm) { // We got fp literal token
1544     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1545       return AMDGPU::isInlinableLiteral64(Imm.Val,
1546                                           AsmParser->hasInv2PiInlineImm());
1547     }
1548 
1549     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1550     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1551       return false;
1552 
1553     if (type.getScalarSizeInBits() == 16) {
1554       return AMDGPU::isInlinableLiteral16(
1555         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1556         AsmParser->hasInv2PiInlineImm());
1557     }
1558 
1559     // Check if single precision literal is inlinable
1560     return AMDGPU::isInlinableLiteral32(
1561       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1562       AsmParser->hasInv2PiInlineImm());
1563   }
1564 
1565   // We got int literal token.
1566   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1567     return AMDGPU::isInlinableLiteral64(Imm.Val,
1568                                         AsmParser->hasInv2PiInlineImm());
1569   }
1570 
1571   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1572     return false;
1573   }
1574 
1575   if (type.getScalarSizeInBits() == 16) {
1576     return AMDGPU::isInlinableLiteral16(
1577       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1578       AsmParser->hasInv2PiInlineImm());
1579   }
1580 
1581   return AMDGPU::isInlinableLiteral32(
1582     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1583     AsmParser->hasInv2PiInlineImm());
1584 }
1585 
1586 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1587   // Check that this immediate can be added as literal
1588   if (!isImmTy(ImmTyNone)) {
1589     return false;
1590   }
1591 
1592   if (!Imm.IsFPImm) {
1593     // We got int literal token.
1594 
1595     if (type == MVT::f64 && hasFPModifiers()) {
1596       // Cannot apply fp modifiers to int literals preserving the same semantics
1597       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1598       // disable these cases.
1599       return false;
1600     }
1601 
1602     unsigned Size = type.getSizeInBits();
1603     if (Size == 64)
1604       Size = 32;
1605 
1606     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1607     // types.
1608     return isSafeTruncation(Imm.Val, Size);
1609   }
1610 
1611   // We got fp literal token
1612   if (type == MVT::f64) { // Expected 64-bit fp operand
1613     // We would set low 64-bits of literal to zeroes but we accept this literals
1614     return true;
1615   }
1616 
1617   if (type == MVT::i64) { // Expected 64-bit int operand
1618     // We don't allow fp literals in 64-bit integer instructions. It is
1619     // unclear how we should encode them.
1620     return false;
1621   }
1622 
1623   // We allow fp literals with f16x2 operands assuming that the specified
1624   // literal goes into the lower half and the upper half is zero. We also
1625   // require that the literal may be losslesly converted to f16.
1626   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1627                      (type == MVT::v2i16)? MVT::i16 : type;
1628 
1629   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1630   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1631 }
1632 
1633 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1634   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1635 }
1636 
1637 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1638   if (AsmParser->isVI())
1639     return isVReg32();
1640   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1641     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1642   else
1643     return false;
1644 }
1645 
1646 bool AMDGPUOperand::isSDWAFP16Operand() const {
1647   return isSDWAOperand(MVT::f16);
1648 }
1649 
1650 bool AMDGPUOperand::isSDWAFP32Operand() const {
1651   return isSDWAOperand(MVT::f32);
1652 }
1653 
1654 bool AMDGPUOperand::isSDWAInt16Operand() const {
1655   return isSDWAOperand(MVT::i16);
1656 }
1657 
1658 bool AMDGPUOperand::isSDWAInt32Operand() const {
1659   return isSDWAOperand(MVT::i32);
1660 }
1661 
1662 bool AMDGPUOperand::isBoolReg() const {
1663   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1664          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1665 }
1666 
1667 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1668 {
1669   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1670   assert(Size == 2 || Size == 4 || Size == 8);
1671 
1672   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1673 
1674   if (Imm.Mods.Abs) {
1675     Val &= ~FpSignMask;
1676   }
1677   if (Imm.Mods.Neg) {
1678     Val ^= FpSignMask;
1679   }
1680 
1681   return Val;
1682 }
1683 
1684 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1685   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1686                              Inst.getNumOperands())) {
1687     addLiteralImmOperand(Inst, Imm.Val,
1688                          ApplyModifiers &
1689                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1690   } else {
1691     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1692     Inst.addOperand(MCOperand::createImm(Imm.Val));
1693   }
1694 }
1695 
1696 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1697   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1698   auto OpNum = Inst.getNumOperands();
1699   // Check that this operand accepts literals
1700   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1701 
1702   if (ApplyModifiers) {
1703     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1704     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1705     Val = applyInputFPModifiers(Val, Size);
1706   }
1707 
1708   APInt Literal(64, Val);
1709   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1710 
1711   if (Imm.IsFPImm) { // We got fp literal token
1712     switch (OpTy) {
1713     case AMDGPU::OPERAND_REG_IMM_INT64:
1714     case AMDGPU::OPERAND_REG_IMM_FP64:
1715     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1716     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1717       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1718                                        AsmParser->hasInv2PiInlineImm())) {
1719         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1720         return;
1721       }
1722 
1723       // Non-inlineable
1724       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1725         // For fp operands we check if low 32 bits are zeros
1726         if (Literal.getLoBits(32) != 0) {
1727           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1728           "Can't encode literal as exact 64-bit floating-point operand. "
1729           "Low 32-bits will be set to zero");
1730         }
1731 
1732         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1733         return;
1734       }
1735 
1736       // We don't allow fp literals in 64-bit integer instructions. It is
1737       // unclear how we should encode them. This case should be checked earlier
1738       // in predicate methods (isLiteralImm())
1739       llvm_unreachable("fp literal in 64-bit integer instruction.");
1740 
1741     case AMDGPU::OPERAND_REG_IMM_INT32:
1742     case AMDGPU::OPERAND_REG_IMM_FP32:
1743     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1744     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1745     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1746     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1747     case AMDGPU::OPERAND_REG_IMM_INT16:
1748     case AMDGPU::OPERAND_REG_IMM_FP16:
1749     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1750     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1751     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1752     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1753     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1754     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1755     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1756     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1757     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1758     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1759       bool lost;
1760       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1761       // Convert literal to single precision
1762       FPLiteral.convert(*getOpFltSemantics(OpTy),
1763                         APFloat::rmNearestTiesToEven, &lost);
1764       // We allow precision lost but not overflow or underflow. This should be
1765       // checked earlier in isLiteralImm()
1766 
1767       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1768       Inst.addOperand(MCOperand::createImm(ImmVal));
1769       return;
1770     }
1771     default:
1772       llvm_unreachable("invalid operand size");
1773     }
1774 
1775     return;
1776   }
1777 
1778   // We got int literal token.
1779   // Only sign extend inline immediates.
1780   switch (OpTy) {
1781   case AMDGPU::OPERAND_REG_IMM_INT32:
1782   case AMDGPU::OPERAND_REG_IMM_FP32:
1783   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1784   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1785   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1786   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1787   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1788   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1789     if (isSafeTruncation(Val, 32) &&
1790         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1791                                      AsmParser->hasInv2PiInlineImm())) {
1792       Inst.addOperand(MCOperand::createImm(Val));
1793       return;
1794     }
1795 
1796     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1797     return;
1798 
1799   case AMDGPU::OPERAND_REG_IMM_INT64:
1800   case AMDGPU::OPERAND_REG_IMM_FP64:
1801   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1802   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1803     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1804       Inst.addOperand(MCOperand::createImm(Val));
1805       return;
1806     }
1807 
1808     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1809     return;
1810 
1811   case AMDGPU::OPERAND_REG_IMM_INT16:
1812   case AMDGPU::OPERAND_REG_IMM_FP16:
1813   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1814   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1815   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1816   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1817     if (isSafeTruncation(Val, 16) &&
1818         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1819                                      AsmParser->hasInv2PiInlineImm())) {
1820       Inst.addOperand(MCOperand::createImm(Val));
1821       return;
1822     }
1823 
1824     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1825     return;
1826 
1827   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1828   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1829   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1830   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1831     assert(isSafeTruncation(Val, 16));
1832     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1833                                         AsmParser->hasInv2PiInlineImm()));
1834 
1835     Inst.addOperand(MCOperand::createImm(Val));
1836     return;
1837   }
1838   default:
1839     llvm_unreachable("invalid operand size");
1840   }
1841 }
1842 
1843 template <unsigned Bitwidth>
1844 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1845   APInt Literal(64, Imm.Val);
1846 
1847   if (!Imm.IsFPImm) {
1848     // We got int literal token.
1849     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1850     return;
1851   }
1852 
1853   bool Lost;
1854   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1855   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1856                     APFloat::rmNearestTiesToEven, &Lost);
1857   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1858 }
1859 
1860 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1861   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1862 }
1863 
1864 static bool isInlineValue(unsigned Reg) {
1865   switch (Reg) {
1866   case AMDGPU::SRC_SHARED_BASE:
1867   case AMDGPU::SRC_SHARED_LIMIT:
1868   case AMDGPU::SRC_PRIVATE_BASE:
1869   case AMDGPU::SRC_PRIVATE_LIMIT:
1870   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1871     return true;
1872   case AMDGPU::SRC_VCCZ:
1873   case AMDGPU::SRC_EXECZ:
1874   case AMDGPU::SRC_SCC:
1875     return true;
1876   case AMDGPU::SGPR_NULL:
1877     return true;
1878   default:
1879     return false;
1880   }
1881 }
1882 
1883 bool AMDGPUOperand::isInlineValue() const {
1884   return isRegKind() && ::isInlineValue(getReg());
1885 }
1886 
1887 //===----------------------------------------------------------------------===//
1888 // AsmParser
1889 //===----------------------------------------------------------------------===//
1890 
1891 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1892   if (Is == IS_VGPR) {
1893     switch (RegWidth) {
1894       default: return -1;
1895       case 1: return AMDGPU::VGPR_32RegClassID;
1896       case 2: return AMDGPU::VReg_64RegClassID;
1897       case 3: return AMDGPU::VReg_96RegClassID;
1898       case 4: return AMDGPU::VReg_128RegClassID;
1899       case 5: return AMDGPU::VReg_160RegClassID;
1900       case 8: return AMDGPU::VReg_256RegClassID;
1901       case 16: return AMDGPU::VReg_512RegClassID;
1902       case 32: return AMDGPU::VReg_1024RegClassID;
1903     }
1904   } else if (Is == IS_TTMP) {
1905     switch (RegWidth) {
1906       default: return -1;
1907       case 1: return AMDGPU::TTMP_32RegClassID;
1908       case 2: return AMDGPU::TTMP_64RegClassID;
1909       case 4: return AMDGPU::TTMP_128RegClassID;
1910       case 8: return AMDGPU::TTMP_256RegClassID;
1911       case 16: return AMDGPU::TTMP_512RegClassID;
1912     }
1913   } else if (Is == IS_SGPR) {
1914     switch (RegWidth) {
1915       default: return -1;
1916       case 1: return AMDGPU::SGPR_32RegClassID;
1917       case 2: return AMDGPU::SGPR_64RegClassID;
1918       case 4: return AMDGPU::SGPR_128RegClassID;
1919       case 8: return AMDGPU::SGPR_256RegClassID;
1920       case 16: return AMDGPU::SGPR_512RegClassID;
1921     }
1922   } else if (Is == IS_AGPR) {
1923     switch (RegWidth) {
1924       default: return -1;
1925       case 1: return AMDGPU::AGPR_32RegClassID;
1926       case 2: return AMDGPU::AReg_64RegClassID;
1927       case 4: return AMDGPU::AReg_128RegClassID;
1928       case 16: return AMDGPU::AReg_512RegClassID;
1929       case 32: return AMDGPU::AReg_1024RegClassID;
1930     }
1931   }
1932   return -1;
1933 }
1934 
1935 static unsigned getSpecialRegForName(StringRef RegName) {
1936   return StringSwitch<unsigned>(RegName)
1937     .Case("exec", AMDGPU::EXEC)
1938     .Case("vcc", AMDGPU::VCC)
1939     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1940     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1941     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1942     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1943     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1944     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1945     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1946     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1947     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1948     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1949     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1950     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1951     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1952     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1953     .Case("m0", AMDGPU::M0)
1954     .Case("vccz", AMDGPU::SRC_VCCZ)
1955     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1956     .Case("execz", AMDGPU::SRC_EXECZ)
1957     .Case("src_execz", AMDGPU::SRC_EXECZ)
1958     .Case("scc", AMDGPU::SRC_SCC)
1959     .Case("src_scc", AMDGPU::SRC_SCC)
1960     .Case("tba", AMDGPU::TBA)
1961     .Case("tma", AMDGPU::TMA)
1962     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1963     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1964     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1965     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1966     .Case("vcc_lo", AMDGPU::VCC_LO)
1967     .Case("vcc_hi", AMDGPU::VCC_HI)
1968     .Case("exec_lo", AMDGPU::EXEC_LO)
1969     .Case("exec_hi", AMDGPU::EXEC_HI)
1970     .Case("tma_lo", AMDGPU::TMA_LO)
1971     .Case("tma_hi", AMDGPU::TMA_HI)
1972     .Case("tba_lo", AMDGPU::TBA_LO)
1973     .Case("tba_hi", AMDGPU::TBA_HI)
1974     .Case("null", AMDGPU::SGPR_NULL)
1975     .Default(AMDGPU::NoRegister);
1976 }
1977 
1978 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1979                                     SMLoc &EndLoc) {
1980   auto R = parseRegister();
1981   if (!R) return true;
1982   assert(R->isReg());
1983   RegNo = R->getReg();
1984   StartLoc = R->getStartLoc();
1985   EndLoc = R->getEndLoc();
1986   return false;
1987 }
1988 
1989 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1990                                             RegisterKind RegKind, unsigned Reg1) {
1991   switch (RegKind) {
1992   case IS_SPECIAL:
1993     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1994       Reg = AMDGPU::EXEC;
1995       RegWidth = 2;
1996       return true;
1997     }
1998     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1999       Reg = AMDGPU::FLAT_SCR;
2000       RegWidth = 2;
2001       return true;
2002     }
2003     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2004       Reg = AMDGPU::XNACK_MASK;
2005       RegWidth = 2;
2006       return true;
2007     }
2008     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2009       Reg = AMDGPU::VCC;
2010       RegWidth = 2;
2011       return true;
2012     }
2013     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2014       Reg = AMDGPU::TBA;
2015       RegWidth = 2;
2016       return true;
2017     }
2018     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2019       Reg = AMDGPU::TMA;
2020       RegWidth = 2;
2021       return true;
2022     }
2023     return false;
2024   case IS_VGPR:
2025   case IS_SGPR:
2026   case IS_AGPR:
2027   case IS_TTMP:
2028     if (Reg1 != Reg + RegWidth) {
2029       return false;
2030     }
2031     RegWidth++;
2032     return true;
2033   default:
2034     llvm_unreachable("unexpected register kind");
2035   }
2036 }
2037 
2038 struct RegInfo {
2039   StringLiteral Name;
2040   RegisterKind Kind;
2041 };
2042 
2043 static constexpr RegInfo RegularRegisters[] = {
2044   {{"v"},    IS_VGPR},
2045   {{"s"},    IS_SGPR},
2046   {{"ttmp"}, IS_TTMP},
2047   {{"acc"},  IS_AGPR},
2048   {{"a"},    IS_AGPR},
2049 };
2050 
2051 static bool isRegularReg(RegisterKind Kind) {
2052   return Kind == IS_VGPR ||
2053          Kind == IS_SGPR ||
2054          Kind == IS_TTMP ||
2055          Kind == IS_AGPR;
2056 }
2057 
2058 static const RegInfo* getRegularRegInfo(StringRef Str) {
2059   for (const RegInfo &Reg : RegularRegisters)
2060     if (Str.startswith(Reg.Name))
2061       return &Reg;
2062   return nullptr;
2063 }
2064 
2065 static bool getRegNum(StringRef Str, unsigned& Num) {
2066   return !Str.getAsInteger(10, Num);
2067 }
2068 
2069 bool
2070 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2071                             const AsmToken &NextToken) const {
2072 
2073   // A list of consecutive registers: [s0,s1,s2,s3]
2074   if (Token.is(AsmToken::LBrac))
2075     return true;
2076 
2077   if (!Token.is(AsmToken::Identifier))
2078     return false;
2079 
2080   // A single register like s0 or a range of registers like s[0:1]
2081 
2082   StringRef Str = Token.getString();
2083   const RegInfo *Reg = getRegularRegInfo(Str);
2084   if (Reg) {
2085     StringRef RegName = Reg->Name;
2086     StringRef RegSuffix = Str.substr(RegName.size());
2087     if (!RegSuffix.empty()) {
2088       unsigned Num;
2089       // A single register with an index: rXX
2090       if (getRegNum(RegSuffix, Num))
2091         return true;
2092     } else {
2093       // A range of registers: r[XX:YY].
2094       if (NextToken.is(AsmToken::LBrac))
2095         return true;
2096     }
2097   }
2098 
2099   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2100 }
2101 
2102 bool
2103 AMDGPUAsmParser::isRegister()
2104 {
2105   return isRegister(getToken(), peekToken());
2106 }
2107 
2108 unsigned
2109 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2110                                unsigned RegNum,
2111                                unsigned RegWidth) {
2112 
2113   assert(isRegularReg(RegKind));
2114 
2115   unsigned AlignSize = 1;
2116   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2117     // SGPR and TTMP registers must be aligned.
2118     // Max required alignment is 4 dwords.
2119     AlignSize = std::min(RegWidth, 4u);
2120   }
2121 
2122   if (RegNum % AlignSize != 0)
2123     return AMDGPU::NoRegister;
2124 
2125   unsigned RegIdx = RegNum / AlignSize;
2126   int RCID = getRegClass(RegKind, RegWidth);
2127   if (RCID == -1)
2128     return AMDGPU::NoRegister;
2129 
2130   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2131   const MCRegisterClass RC = TRI->getRegClass(RCID);
2132   if (RegIdx >= RC.getNumRegs())
2133     return AMDGPU::NoRegister;
2134 
2135   return RC.getRegister(RegIdx);
2136 }
2137 
2138 bool
2139 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2140   int64_t RegLo, RegHi;
2141   if (!trySkipToken(AsmToken::LBrac))
2142     return false;
2143 
2144   if (!parseExpr(RegLo))
2145     return false;
2146 
2147   if (trySkipToken(AsmToken::Colon)) {
2148     if (!parseExpr(RegHi))
2149       return false;
2150   } else {
2151     RegHi = RegLo;
2152   }
2153 
2154   if (!trySkipToken(AsmToken::RBrac))
2155     return false;
2156 
2157   if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi)
2158     return false;
2159 
2160   Num = static_cast<unsigned>(RegLo);
2161   Width = (RegHi - RegLo) + 1;
2162   return true;
2163 }
2164 
2165 unsigned
2166 AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2167                                  unsigned &RegNum,
2168                                  unsigned &RegWidth) {
2169   assert(isToken(AsmToken::Identifier));
2170   unsigned Reg = getSpecialRegForName(getTokenStr());
2171   if (Reg) {
2172     RegNum = 0;
2173     RegWidth = 1;
2174     RegKind = IS_SPECIAL;
2175     lex(); // skip register name
2176   }
2177   return Reg;
2178 }
2179 
2180 unsigned
2181 AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2182                                  unsigned &RegNum,
2183                                  unsigned &RegWidth) {
2184   assert(isToken(AsmToken::Identifier));
2185   StringRef RegName = getTokenStr();
2186 
2187   const RegInfo *RI = getRegularRegInfo(RegName);
2188   if (!RI)
2189     return AMDGPU::NoRegister;
2190   lex(); // skip register name
2191 
2192   RegKind = RI->Kind;
2193   StringRef RegSuffix = RegName.substr(RI->Name.size());
2194   if (!RegSuffix.empty()) {
2195     // Single 32-bit register: vXX.
2196     if (!getRegNum(RegSuffix, RegNum))
2197       return AMDGPU::NoRegister;
2198     RegWidth = 1;
2199   } else {
2200     // Range of registers: v[XX:YY]. ":YY" is optional.
2201     if (!ParseRegRange(RegNum, RegWidth))
2202       return AMDGPU::NoRegister;
2203   }
2204 
2205   return getRegularReg(RegKind, RegNum, RegWidth);
2206 }
2207 
2208 unsigned
2209 AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
2210                               unsigned &RegNum,
2211                               unsigned &RegWidth) {
2212   unsigned Reg = AMDGPU::NoRegister;
2213 
2214   if (!trySkipToken(AsmToken::LBrac))
2215     return AMDGPU::NoRegister;
2216 
2217   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2218 
2219   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2220     return AMDGPU::NoRegister;
2221   if (RegWidth != 1)
2222     return AMDGPU::NoRegister;
2223 
2224   for (; trySkipToken(AsmToken::Comma); ) {
2225     RegisterKind NextRegKind;
2226     unsigned NextReg, NextRegNum, NextRegWidth;
2227 
2228     if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth))
2229       return AMDGPU::NoRegister;
2230     if (NextRegWidth != 1)
2231       return AMDGPU::NoRegister;
2232     if (NextRegKind != RegKind)
2233       return AMDGPU::NoRegister;
2234     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg))
2235       return AMDGPU::NoRegister;
2236   }
2237 
2238   if (!trySkipToken(AsmToken::RBrac))
2239     return AMDGPU::NoRegister;
2240 
2241   if (isRegularReg(RegKind))
2242     Reg = getRegularReg(RegKind, RegNum, RegWidth);
2243 
2244   return Reg;
2245 }
2246 
2247 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
2248                                           unsigned &Reg,
2249                                           unsigned &RegNum,
2250                                           unsigned &RegWidth) {
2251   Reg = AMDGPU::NoRegister;
2252 
2253   if (isToken(AsmToken::Identifier)) {
2254     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth);
2255     if (Reg == AMDGPU::NoRegister)
2256       Reg = ParseRegularReg(RegKind, RegNum, RegWidth);
2257   } else {
2258     Reg = ParseRegList(RegKind, RegNum, RegWidth);
2259   }
2260 
2261   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2262   return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg);
2263 }
2264 
2265 Optional<StringRef>
2266 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2267   switch (RegKind) {
2268   case IS_VGPR:
2269     return StringRef(".amdgcn.next_free_vgpr");
2270   case IS_SGPR:
2271     return StringRef(".amdgcn.next_free_sgpr");
2272   default:
2273     return None;
2274   }
2275 }
2276 
2277 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2278   auto SymbolName = getGprCountSymbolName(RegKind);
2279   assert(SymbolName && "initializing invalid register kind");
2280   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2281   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2282 }
2283 
2284 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2285                                             unsigned DwordRegIndex,
2286                                             unsigned RegWidth) {
2287   // Symbols are only defined for GCN targets
2288   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2289     return true;
2290 
2291   auto SymbolName = getGprCountSymbolName(RegKind);
2292   if (!SymbolName)
2293     return true;
2294   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2295 
2296   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2297   int64_t OldCount;
2298 
2299   if (!Sym->isVariable())
2300     return !Error(getParser().getTok().getLoc(),
2301                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2302   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2303     return !Error(
2304         getParser().getTok().getLoc(),
2305         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2306 
2307   if (OldCount <= NewMax)
2308     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2309 
2310   return true;
2311 }
2312 
2313 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2314   const auto &Tok = Parser.getTok();
2315   SMLoc StartLoc = Tok.getLoc();
2316   SMLoc EndLoc = Tok.getEndLoc();
2317   RegisterKind RegKind;
2318   unsigned Reg, RegNum, RegWidth;
2319 
2320   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2321     //FIXME: improve error messages (bug 41303).
2322     Error(StartLoc, "not a valid operand.");
2323     return nullptr;
2324   }
2325   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2326     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2327       return nullptr;
2328   } else
2329     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2330   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2331 }
2332 
2333 OperandMatchResultTy
2334 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2335   // TODO: add syntactic sugar for 1/(2*PI)
2336 
2337   assert(!isRegister());
2338   assert(!isModifier());
2339 
2340   const auto& Tok = getToken();
2341   const auto& NextTok = peekToken();
2342   bool IsReal = Tok.is(AsmToken::Real);
2343   SMLoc S = getLoc();
2344   bool Negate = false;
2345 
2346   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2347     lex();
2348     IsReal = true;
2349     Negate = true;
2350   }
2351 
2352   if (IsReal) {
2353     // Floating-point expressions are not supported.
2354     // Can only allow floating-point literals with an
2355     // optional sign.
2356 
2357     StringRef Num = getTokenStr();
2358     lex();
2359 
2360     APFloat RealVal(APFloat::IEEEdouble());
2361     auto roundMode = APFloat::rmNearestTiesToEven;
2362     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2363       return MatchOperand_ParseFail;
2364     }
2365     if (Negate)
2366       RealVal.changeSign();
2367 
2368     Operands.push_back(
2369       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2370                                AMDGPUOperand::ImmTyNone, true));
2371 
2372     return MatchOperand_Success;
2373 
2374   } else {
2375     int64_t IntVal;
2376     const MCExpr *Expr;
2377     SMLoc S = getLoc();
2378 
2379     if (HasSP3AbsModifier) {
2380       // This is a workaround for handling expressions
2381       // as arguments of SP3 'abs' modifier, for example:
2382       //     |1.0|
2383       //     |-1|
2384       //     |1+x|
2385       // This syntax is not compatible with syntax of standard
2386       // MC expressions (due to the trailing '|').
2387       SMLoc EndLoc;
2388       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2389         return MatchOperand_ParseFail;
2390     } else {
2391       if (Parser.parseExpression(Expr))
2392         return MatchOperand_ParseFail;
2393     }
2394 
2395     if (Expr->evaluateAsAbsolute(IntVal)) {
2396       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2397     } else {
2398       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2399     }
2400 
2401     return MatchOperand_Success;
2402   }
2403 
2404   return MatchOperand_NoMatch;
2405 }
2406 
2407 OperandMatchResultTy
2408 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2409   if (!isRegister())
2410     return MatchOperand_NoMatch;
2411 
2412   if (auto R = parseRegister()) {
2413     assert(R->isReg());
2414     Operands.push_back(std::move(R));
2415     return MatchOperand_Success;
2416   }
2417   return MatchOperand_ParseFail;
2418 }
2419 
2420 OperandMatchResultTy
2421 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2422   auto res = parseReg(Operands);
2423   if (res != MatchOperand_NoMatch) {
2424     return res;
2425   } else if (isModifier()) {
2426     return MatchOperand_NoMatch;
2427   } else {
2428     return parseImm(Operands, HasSP3AbsMod);
2429   }
2430 }
2431 
2432 bool
2433 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2434   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2435     const auto &str = Token.getString();
2436     return str == "abs" || str == "neg" || str == "sext";
2437   }
2438   return false;
2439 }
2440 
2441 bool
2442 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2443   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2444 }
2445 
2446 bool
2447 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2448   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2449 }
2450 
2451 bool
2452 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2453   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2454 }
2455 
2456 // Check if this is an operand modifier or an opcode modifier
2457 // which may look like an expression but it is not. We should
2458 // avoid parsing these modifiers as expressions. Currently
2459 // recognized sequences are:
2460 //   |...|
2461 //   abs(...)
2462 //   neg(...)
2463 //   sext(...)
2464 //   -reg
2465 //   -|...|
2466 //   -abs(...)
2467 //   name:...
2468 // Note that simple opcode modifiers like 'gds' may be parsed as
2469 // expressions; this is a special case. See getExpressionAsToken.
2470 //
2471 bool
2472 AMDGPUAsmParser::isModifier() {
2473 
2474   AsmToken Tok = getToken();
2475   AsmToken NextToken[2];
2476   peekTokens(NextToken);
2477 
2478   return isOperandModifier(Tok, NextToken[0]) ||
2479          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2480          isOpcodeModifierWithVal(Tok, NextToken[0]);
2481 }
2482 
2483 // Check if the current token is an SP3 'neg' modifier.
2484 // Currently this modifier is allowed in the following context:
2485 //
2486 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2487 // 2. Before an 'abs' modifier: -abs(...)
2488 // 3. Before an SP3 'abs' modifier: -|...|
2489 //
2490 // In all other cases "-" is handled as a part
2491 // of an expression that follows the sign.
2492 //
2493 // Note: When "-" is followed by an integer literal,
2494 // this is interpreted as integer negation rather
2495 // than a floating-point NEG modifier applied to N.
2496 // Beside being contr-intuitive, such use of floating-point
2497 // NEG modifier would have resulted in different meaning
2498 // of integer literals used with VOP1/2/C and VOP3,
2499 // for example:
2500 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2501 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2502 // Negative fp literals with preceding "-" are
2503 // handled likewise for unifomtity
2504 //
2505 bool
2506 AMDGPUAsmParser::parseSP3NegModifier() {
2507 
2508   AsmToken NextToken[2];
2509   peekTokens(NextToken);
2510 
2511   if (isToken(AsmToken::Minus) &&
2512       (isRegister(NextToken[0], NextToken[1]) ||
2513        NextToken[0].is(AsmToken::Pipe) ||
2514        isId(NextToken[0], "abs"))) {
2515     lex();
2516     return true;
2517   }
2518 
2519   return false;
2520 }
2521 
2522 OperandMatchResultTy
2523 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2524                                               bool AllowImm) {
2525   bool Neg, SP3Neg;
2526   bool Abs, SP3Abs;
2527   SMLoc Loc;
2528 
2529   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2530   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2531     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2532     return MatchOperand_ParseFail;
2533   }
2534 
2535   SP3Neg = parseSP3NegModifier();
2536 
2537   Loc = getLoc();
2538   Neg = trySkipId("neg");
2539   if (Neg && SP3Neg) {
2540     Error(Loc, "expected register or immediate");
2541     return MatchOperand_ParseFail;
2542   }
2543   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2544     return MatchOperand_ParseFail;
2545 
2546   Abs = trySkipId("abs");
2547   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2548     return MatchOperand_ParseFail;
2549 
2550   Loc = getLoc();
2551   SP3Abs = trySkipToken(AsmToken::Pipe);
2552   if (Abs && SP3Abs) {
2553     Error(Loc, "expected register or immediate");
2554     return MatchOperand_ParseFail;
2555   }
2556 
2557   OperandMatchResultTy Res;
2558   if (AllowImm) {
2559     Res = parseRegOrImm(Operands, SP3Abs);
2560   } else {
2561     Res = parseReg(Operands);
2562   }
2563   if (Res != MatchOperand_Success) {
2564     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2565   }
2566 
2567   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2568     return MatchOperand_ParseFail;
2569   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2570     return MatchOperand_ParseFail;
2571   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2572     return MatchOperand_ParseFail;
2573 
2574   AMDGPUOperand::Modifiers Mods;
2575   Mods.Abs = Abs || SP3Abs;
2576   Mods.Neg = Neg || SP3Neg;
2577 
2578   if (Mods.hasFPModifiers()) {
2579     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2580     if (Op.isExpr()) {
2581       Error(Op.getStartLoc(), "expected an absolute expression");
2582       return MatchOperand_ParseFail;
2583     }
2584     Op.setModifiers(Mods);
2585   }
2586   return MatchOperand_Success;
2587 }
2588 
2589 OperandMatchResultTy
2590 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2591                                                bool AllowImm) {
2592   bool Sext = trySkipId("sext");
2593   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2594     return MatchOperand_ParseFail;
2595 
2596   OperandMatchResultTy Res;
2597   if (AllowImm) {
2598     Res = parseRegOrImm(Operands);
2599   } else {
2600     Res = parseReg(Operands);
2601   }
2602   if (Res != MatchOperand_Success) {
2603     return Sext? MatchOperand_ParseFail : Res;
2604   }
2605 
2606   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2607     return MatchOperand_ParseFail;
2608 
2609   AMDGPUOperand::Modifiers Mods;
2610   Mods.Sext = Sext;
2611 
2612   if (Mods.hasIntModifiers()) {
2613     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2614     if (Op.isExpr()) {
2615       Error(Op.getStartLoc(), "expected an absolute expression");
2616       return MatchOperand_ParseFail;
2617     }
2618     Op.setModifiers(Mods);
2619   }
2620 
2621   return MatchOperand_Success;
2622 }
2623 
2624 OperandMatchResultTy
2625 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2626   return parseRegOrImmWithFPInputMods(Operands, false);
2627 }
2628 
2629 OperandMatchResultTy
2630 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2631   return parseRegOrImmWithIntInputMods(Operands, false);
2632 }
2633 
2634 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2635   auto Loc = getLoc();
2636   if (trySkipId("off")) {
2637     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2638                                                 AMDGPUOperand::ImmTyOff, false));
2639     return MatchOperand_Success;
2640   }
2641 
2642   if (!isRegister())
2643     return MatchOperand_NoMatch;
2644 
2645   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2646   if (Reg) {
2647     Operands.push_back(std::move(Reg));
2648     return MatchOperand_Success;
2649   }
2650 
2651   return MatchOperand_ParseFail;
2652 
2653 }
2654 
2655 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2656   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2657 
2658   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2659       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2660       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2661       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2662     return Match_InvalidOperand;
2663 
2664   if ((TSFlags & SIInstrFlags::VOP3) &&
2665       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2666       getForcedEncodingSize() != 64)
2667     return Match_PreferE32;
2668 
2669   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2670       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2671     // v_mac_f32/16 allow only dst_sel == DWORD;
2672     auto OpNum =
2673         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2674     const auto &Op = Inst.getOperand(OpNum);
2675     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2676       return Match_InvalidOperand;
2677     }
2678   }
2679 
2680   return Match_Success;
2681 }
2682 
2683 // What asm variants we should check
2684 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2685   if (getForcedEncodingSize() == 32) {
2686     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2687     return makeArrayRef(Variants);
2688   }
2689 
2690   if (isForcedVOP3()) {
2691     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2692     return makeArrayRef(Variants);
2693   }
2694 
2695   if (isForcedSDWA()) {
2696     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2697                                         AMDGPUAsmVariants::SDWA9};
2698     return makeArrayRef(Variants);
2699   }
2700 
2701   if (isForcedDPP()) {
2702     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2703     return makeArrayRef(Variants);
2704   }
2705 
2706   static const unsigned Variants[] = {
2707     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2708     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2709   };
2710 
2711   return makeArrayRef(Variants);
2712 }
2713 
2714 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2715   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2716   const unsigned Num = Desc.getNumImplicitUses();
2717   for (unsigned i = 0; i < Num; ++i) {
2718     unsigned Reg = Desc.ImplicitUses[i];
2719     switch (Reg) {
2720     case AMDGPU::FLAT_SCR:
2721     case AMDGPU::VCC:
2722     case AMDGPU::VCC_LO:
2723     case AMDGPU::VCC_HI:
2724     case AMDGPU::M0:
2725       return Reg;
2726     default:
2727       break;
2728     }
2729   }
2730   return AMDGPU::NoRegister;
2731 }
2732 
2733 // NB: This code is correct only when used to check constant
2734 // bus limitations because GFX7 support no f16 inline constants.
2735 // Note that there are no cases when a GFX7 opcode violates
2736 // constant bus limitations due to the use of an f16 constant.
2737 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2738                                        unsigned OpIdx) const {
2739   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2740 
2741   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2742     return false;
2743   }
2744 
2745   const MCOperand &MO = Inst.getOperand(OpIdx);
2746 
2747   int64_t Val = MO.getImm();
2748   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2749 
2750   switch (OpSize) { // expected operand size
2751   case 8:
2752     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2753   case 4:
2754     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2755   case 2: {
2756     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2757     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2758         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2759         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2760         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2761         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2762         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2763       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2764     } else {
2765       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2766     }
2767   }
2768   default:
2769     llvm_unreachable("invalid operand size");
2770   }
2771 }
2772 
2773 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2774   if (!isGFX10())
2775     return 1;
2776 
2777   switch (Opcode) {
2778   // 64-bit shift instructions can use only one scalar value input
2779   case AMDGPU::V_LSHLREV_B64:
2780   case AMDGPU::V_LSHLREV_B64_gfx10:
2781   case AMDGPU::V_LSHL_B64:
2782   case AMDGPU::V_LSHRREV_B64:
2783   case AMDGPU::V_LSHRREV_B64_gfx10:
2784   case AMDGPU::V_LSHR_B64:
2785   case AMDGPU::V_ASHRREV_I64:
2786   case AMDGPU::V_ASHRREV_I64_gfx10:
2787   case AMDGPU::V_ASHR_I64:
2788     return 1;
2789   default:
2790     return 2;
2791   }
2792 }
2793 
2794 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2795   const MCOperand &MO = Inst.getOperand(OpIdx);
2796   if (MO.isImm()) {
2797     return !isInlineConstant(Inst, OpIdx);
2798   } else if (MO.isReg()) {
2799     auto Reg = MO.getReg();
2800     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2801     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2802   } else {
2803     return true;
2804   }
2805 }
2806 
2807 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2808   const unsigned Opcode = Inst.getOpcode();
2809   const MCInstrDesc &Desc = MII.get(Opcode);
2810   unsigned ConstantBusUseCount = 0;
2811   unsigned NumLiterals = 0;
2812   unsigned LiteralSize;
2813 
2814   if (Desc.TSFlags &
2815       (SIInstrFlags::VOPC |
2816        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2817        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2818        SIInstrFlags::SDWA)) {
2819     // Check special imm operands (used by madmk, etc)
2820     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2821       ++ConstantBusUseCount;
2822     }
2823 
2824     SmallDenseSet<unsigned> SGPRsUsed;
2825     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2826     if (SGPRUsed != AMDGPU::NoRegister) {
2827       SGPRsUsed.insert(SGPRUsed);
2828       ++ConstantBusUseCount;
2829     }
2830 
2831     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2832     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2833     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2834 
2835     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2836 
2837     for (int OpIdx : OpIndices) {
2838       if (OpIdx == -1) break;
2839 
2840       const MCOperand &MO = Inst.getOperand(OpIdx);
2841       if (usesConstantBus(Inst, OpIdx)) {
2842         if (MO.isReg()) {
2843           const unsigned Reg = mc2PseudoReg(MO.getReg());
2844           // Pairs of registers with a partial intersections like these
2845           //   s0, s[0:1]
2846           //   flat_scratch_lo, flat_scratch
2847           //   flat_scratch_lo, flat_scratch_hi
2848           // are theoretically valid but they are disabled anyway.
2849           // Note that this code mimics SIInstrInfo::verifyInstruction
2850           if (!SGPRsUsed.count(Reg)) {
2851             SGPRsUsed.insert(Reg);
2852             ++ConstantBusUseCount;
2853           }
2854         } else { // Expression or a literal
2855 
2856           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2857             continue; // special operand like VINTERP attr_chan
2858 
2859           // An instruction may use only one literal.
2860           // This has been validated on the previous step.
2861           // See validateVOP3Literal.
2862           // This literal may be used as more than one operand.
2863           // If all these operands are of the same size,
2864           // this literal counts as one scalar value.
2865           // Otherwise it counts as 2 scalar values.
2866           // See "GFX10 Shader Programming", section 3.6.2.3.
2867 
2868           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2869           if (Size < 4) Size = 4;
2870 
2871           if (NumLiterals == 0) {
2872             NumLiterals = 1;
2873             LiteralSize = Size;
2874           } else if (LiteralSize != Size) {
2875             NumLiterals = 2;
2876           }
2877         }
2878       }
2879     }
2880   }
2881   ConstantBusUseCount += NumLiterals;
2882 
2883   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2884 }
2885 
2886 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2887   const unsigned Opcode = Inst.getOpcode();
2888   const MCInstrDesc &Desc = MII.get(Opcode);
2889 
2890   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2891   if (DstIdx == -1 ||
2892       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2893     return true;
2894   }
2895 
2896   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2897 
2898   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2899   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2900   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2901 
2902   assert(DstIdx != -1);
2903   const MCOperand &Dst = Inst.getOperand(DstIdx);
2904   assert(Dst.isReg());
2905   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2906 
2907   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2908 
2909   for (int SrcIdx : SrcIndices) {
2910     if (SrcIdx == -1) break;
2911     const MCOperand &Src = Inst.getOperand(SrcIdx);
2912     if (Src.isReg()) {
2913       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2914       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2915         return false;
2916       }
2917     }
2918   }
2919 
2920   return true;
2921 }
2922 
2923 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2924 
2925   const unsigned Opc = Inst.getOpcode();
2926   const MCInstrDesc &Desc = MII.get(Opc);
2927 
2928   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2929     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2930     assert(ClampIdx != -1);
2931     return Inst.getOperand(ClampIdx).getImm() == 0;
2932   }
2933 
2934   return true;
2935 }
2936 
2937 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2938 
2939   const unsigned Opc = Inst.getOpcode();
2940   const MCInstrDesc &Desc = MII.get(Opc);
2941 
2942   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2943     return true;
2944 
2945   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2946   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2947   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2948 
2949   assert(VDataIdx != -1);
2950   assert(DMaskIdx != -1);
2951   assert(TFEIdx != -1);
2952 
2953   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2954   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2955   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2956   if (DMask == 0)
2957     DMask = 1;
2958 
2959   unsigned DataSize =
2960     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2961   if (hasPackedD16()) {
2962     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2963     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2964       DataSize = (DataSize + 1) / 2;
2965   }
2966 
2967   return (VDataSize / 4) == DataSize + TFESize;
2968 }
2969 
2970 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2971   const unsigned Opc = Inst.getOpcode();
2972   const MCInstrDesc &Desc = MII.get(Opc);
2973 
2974   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2975     return true;
2976 
2977   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2978   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2979       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2980   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2981   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2982   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2983 
2984   assert(VAddr0Idx != -1);
2985   assert(SrsrcIdx != -1);
2986   assert(DimIdx != -1);
2987   assert(SrsrcIdx > VAddr0Idx);
2988 
2989   unsigned Dim = Inst.getOperand(DimIdx).getImm();
2990   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2991   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2992   unsigned VAddrSize =
2993       IsNSA ? SrsrcIdx - VAddr0Idx
2994             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2995 
2996   unsigned AddrSize = BaseOpcode->NumExtraArgs +
2997                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2998                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2999                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3000   if (!IsNSA) {
3001     if (AddrSize > 8)
3002       AddrSize = 16;
3003     else if (AddrSize > 4)
3004       AddrSize = 8;
3005   }
3006 
3007   return VAddrSize == AddrSize;
3008 }
3009 
3010 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3011 
3012   const unsigned Opc = Inst.getOpcode();
3013   const MCInstrDesc &Desc = MII.get(Opc);
3014 
3015   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3016     return true;
3017   if (!Desc.mayLoad() || !Desc.mayStore())
3018     return true; // Not atomic
3019 
3020   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3021   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3022 
3023   // This is an incomplete check because image_atomic_cmpswap
3024   // may only use 0x3 and 0xf while other atomic operations
3025   // may use 0x1 and 0x3. However these limitations are
3026   // verified when we check that dmask matches dst size.
3027   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3028 }
3029 
3030 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3031 
3032   const unsigned Opc = Inst.getOpcode();
3033   const MCInstrDesc &Desc = MII.get(Opc);
3034 
3035   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3036     return true;
3037 
3038   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3039   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3040 
3041   // GATHER4 instructions use dmask in a different fashion compared to
3042   // other MIMG instructions. The only useful DMASK values are
3043   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3044   // (red,red,red,red) etc.) The ISA document doesn't mention
3045   // this.
3046   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3047 }
3048 
3049 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3050 
3051   const unsigned Opc = Inst.getOpcode();
3052   const MCInstrDesc &Desc = MII.get(Opc);
3053 
3054   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3055     return true;
3056 
3057   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3058   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3059     if (isCI() || isSI())
3060       return false;
3061   }
3062 
3063   return true;
3064 }
3065 
3066 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3067   const unsigned Opc = Inst.getOpcode();
3068   const MCInstrDesc &Desc = MII.get(Opc);
3069 
3070   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3071     return true;
3072 
3073   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3074   if (DimIdx < 0)
3075     return true;
3076 
3077   long Imm = Inst.getOperand(DimIdx).getImm();
3078   if (Imm < 0 || Imm >= 8)
3079     return false;
3080 
3081   return true;
3082 }
3083 
3084 static bool IsRevOpcode(const unsigned Opcode)
3085 {
3086   switch (Opcode) {
3087   case AMDGPU::V_SUBREV_F32_e32:
3088   case AMDGPU::V_SUBREV_F32_e64:
3089   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3090   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3091   case AMDGPU::V_SUBREV_F32_e32_vi:
3092   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3093   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3094   case AMDGPU::V_SUBREV_F32_e64_vi:
3095 
3096   case AMDGPU::V_SUBREV_I32_e32:
3097   case AMDGPU::V_SUBREV_I32_e64:
3098   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3099   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3100 
3101   case AMDGPU::V_SUBBREV_U32_e32:
3102   case AMDGPU::V_SUBBREV_U32_e64:
3103   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3104   case AMDGPU::V_SUBBREV_U32_e32_vi:
3105   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3106   case AMDGPU::V_SUBBREV_U32_e64_vi:
3107 
3108   case AMDGPU::V_SUBREV_U32_e32:
3109   case AMDGPU::V_SUBREV_U32_e64:
3110   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3111   case AMDGPU::V_SUBREV_U32_e32_vi:
3112   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3113   case AMDGPU::V_SUBREV_U32_e64_vi:
3114 
3115   case AMDGPU::V_SUBREV_F16_e32:
3116   case AMDGPU::V_SUBREV_F16_e64:
3117   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3118   case AMDGPU::V_SUBREV_F16_e32_vi:
3119   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3120   case AMDGPU::V_SUBREV_F16_e64_vi:
3121 
3122   case AMDGPU::V_SUBREV_U16_e32:
3123   case AMDGPU::V_SUBREV_U16_e64:
3124   case AMDGPU::V_SUBREV_U16_e32_vi:
3125   case AMDGPU::V_SUBREV_U16_e64_vi:
3126 
3127   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3128   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3129   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3130 
3131   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3132   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3133 
3134   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3135   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3136 
3137   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3138   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3139 
3140   case AMDGPU::V_LSHRREV_B32_e32:
3141   case AMDGPU::V_LSHRREV_B32_e64:
3142   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3143   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3144   case AMDGPU::V_LSHRREV_B32_e32_vi:
3145   case AMDGPU::V_LSHRREV_B32_e64_vi:
3146   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3147   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3148 
3149   case AMDGPU::V_ASHRREV_I32_e32:
3150   case AMDGPU::V_ASHRREV_I32_e64:
3151   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3152   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3153   case AMDGPU::V_ASHRREV_I32_e32_vi:
3154   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3155   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3156   case AMDGPU::V_ASHRREV_I32_e64_vi:
3157 
3158   case AMDGPU::V_LSHLREV_B32_e32:
3159   case AMDGPU::V_LSHLREV_B32_e64:
3160   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3161   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3162   case AMDGPU::V_LSHLREV_B32_e32_vi:
3163   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3164   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3165   case AMDGPU::V_LSHLREV_B32_e64_vi:
3166 
3167   case AMDGPU::V_LSHLREV_B16_e32:
3168   case AMDGPU::V_LSHLREV_B16_e64:
3169   case AMDGPU::V_LSHLREV_B16_e32_vi:
3170   case AMDGPU::V_LSHLREV_B16_e64_vi:
3171   case AMDGPU::V_LSHLREV_B16_gfx10:
3172 
3173   case AMDGPU::V_LSHRREV_B16_e32:
3174   case AMDGPU::V_LSHRREV_B16_e64:
3175   case AMDGPU::V_LSHRREV_B16_e32_vi:
3176   case AMDGPU::V_LSHRREV_B16_e64_vi:
3177   case AMDGPU::V_LSHRREV_B16_gfx10:
3178 
3179   case AMDGPU::V_ASHRREV_I16_e32:
3180   case AMDGPU::V_ASHRREV_I16_e64:
3181   case AMDGPU::V_ASHRREV_I16_e32_vi:
3182   case AMDGPU::V_ASHRREV_I16_e64_vi:
3183   case AMDGPU::V_ASHRREV_I16_gfx10:
3184 
3185   case AMDGPU::V_LSHLREV_B64:
3186   case AMDGPU::V_LSHLREV_B64_gfx10:
3187   case AMDGPU::V_LSHLREV_B64_vi:
3188 
3189   case AMDGPU::V_LSHRREV_B64:
3190   case AMDGPU::V_LSHRREV_B64_gfx10:
3191   case AMDGPU::V_LSHRREV_B64_vi:
3192 
3193   case AMDGPU::V_ASHRREV_I64:
3194   case AMDGPU::V_ASHRREV_I64_gfx10:
3195   case AMDGPU::V_ASHRREV_I64_vi:
3196 
3197   case AMDGPU::V_PK_LSHLREV_B16:
3198   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3199   case AMDGPU::V_PK_LSHLREV_B16_vi:
3200 
3201   case AMDGPU::V_PK_LSHRREV_B16:
3202   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3203   case AMDGPU::V_PK_LSHRREV_B16_vi:
3204   case AMDGPU::V_PK_ASHRREV_I16:
3205   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3206   case AMDGPU::V_PK_ASHRREV_I16_vi:
3207     return true;
3208   default:
3209     return false;
3210   }
3211 }
3212 
3213 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3214 
3215   using namespace SIInstrFlags;
3216   const unsigned Opcode = Inst.getOpcode();
3217   const MCInstrDesc &Desc = MII.get(Opcode);
3218 
3219   // lds_direct register is defined so that it can be used
3220   // with 9-bit operands only. Ignore encodings which do not accept these.
3221   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3222     return true;
3223 
3224   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3225   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3226   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3227 
3228   const int SrcIndices[] = { Src1Idx, Src2Idx };
3229 
3230   // lds_direct cannot be specified as either src1 or src2.
3231   for (int SrcIdx : SrcIndices) {
3232     if (SrcIdx == -1) break;
3233     const MCOperand &Src = Inst.getOperand(SrcIdx);
3234     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3235       return false;
3236     }
3237   }
3238 
3239   if (Src0Idx == -1)
3240     return true;
3241 
3242   const MCOperand &Src = Inst.getOperand(Src0Idx);
3243   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3244     return true;
3245 
3246   // lds_direct is specified as src0. Check additional limitations.
3247   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3248 }
3249 
3250 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3251   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3252     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3253     if (Op.isFlatOffset())
3254       return Op.getStartLoc();
3255   }
3256   return getLoc();
3257 }
3258 
3259 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3260                                          const OperandVector &Operands) {
3261   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3262   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3263     return true;
3264 
3265   auto Opcode = Inst.getOpcode();
3266   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3267   assert(OpNum != -1);
3268 
3269   const auto &Op = Inst.getOperand(OpNum);
3270   if (!hasFlatOffsets() && Op.getImm() != 0) {
3271     Error(getFlatOffsetLoc(Operands),
3272           "flat offset modifier is not supported on this GPU");
3273     return false;
3274   }
3275 
3276   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3277   // For FLAT segment the offset must be positive;
3278   // MSB is ignored and forced to zero.
3279   unsigned OffsetSize = isGFX9() ? 13 : 12;
3280   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3281     if (!isIntN(OffsetSize, Op.getImm())) {
3282       Error(getFlatOffsetLoc(Operands),
3283             isGFX9() ? "expected a 13-bit signed offset" :
3284                        "expected a 12-bit signed offset");
3285       return false;
3286     }
3287   } else {
3288     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3289       Error(getFlatOffsetLoc(Operands),
3290             isGFX9() ? "expected a 12-bit unsigned offset" :
3291                        "expected an 11-bit unsigned offset");
3292       return false;
3293     }
3294   }
3295 
3296   return true;
3297 }
3298 
3299 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3300   unsigned Opcode = Inst.getOpcode();
3301   const MCInstrDesc &Desc = MII.get(Opcode);
3302   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3303     return true;
3304 
3305   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3306   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3307 
3308   const int OpIndices[] = { Src0Idx, Src1Idx };
3309 
3310   unsigned NumExprs = 0;
3311   unsigned NumLiterals = 0;
3312   uint32_t LiteralValue;
3313 
3314   for (int OpIdx : OpIndices) {
3315     if (OpIdx == -1) break;
3316 
3317     const MCOperand &MO = Inst.getOperand(OpIdx);
3318     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3319     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3320       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3321         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3322         if (NumLiterals == 0 || LiteralValue != Value) {
3323           LiteralValue = Value;
3324           ++NumLiterals;
3325         }
3326       } else if (MO.isExpr()) {
3327         ++NumExprs;
3328       }
3329     }
3330   }
3331 
3332   return NumLiterals + NumExprs <= 1;
3333 }
3334 
3335 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3336   const unsigned Opc = Inst.getOpcode();
3337   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3338       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3339     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3340     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3341 
3342     if (OpSel & ~3)
3343       return false;
3344   }
3345   return true;
3346 }
3347 
3348 // Check if VCC register matches wavefront size
3349 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3350   auto FB = getFeatureBits();
3351   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3352     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3353 }
3354 
3355 // VOP3 literal is only allowed in GFX10+ and only one can be used
3356 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3357   unsigned Opcode = Inst.getOpcode();
3358   const MCInstrDesc &Desc = MII.get(Opcode);
3359   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3360     return true;
3361 
3362   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3363   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3364   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3365 
3366   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3367 
3368   unsigned NumExprs = 0;
3369   unsigned NumLiterals = 0;
3370   uint32_t LiteralValue;
3371 
3372   for (int OpIdx : OpIndices) {
3373     if (OpIdx == -1) break;
3374 
3375     const MCOperand &MO = Inst.getOperand(OpIdx);
3376     if (!MO.isImm() && !MO.isExpr())
3377       continue;
3378     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3379       continue;
3380 
3381     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3382         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3383       return false;
3384 
3385     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3386       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3387       if (NumLiterals == 0 || LiteralValue != Value) {
3388         LiteralValue = Value;
3389         ++NumLiterals;
3390       }
3391     } else if (MO.isExpr()) {
3392       ++NumExprs;
3393     }
3394   }
3395   NumLiterals += NumExprs;
3396 
3397   return !NumLiterals ||
3398          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3399 }
3400 
3401 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3402                                           const SMLoc &IDLoc,
3403                                           const OperandVector &Operands) {
3404   if (!validateLdsDirect(Inst)) {
3405     Error(IDLoc,
3406       "invalid use of lds_direct");
3407     return false;
3408   }
3409   if (!validateSOPLiteral(Inst)) {
3410     Error(IDLoc,
3411       "only one literal operand is allowed");
3412     return false;
3413   }
3414   if (!validateVOP3Literal(Inst)) {
3415     Error(IDLoc,
3416       "invalid literal operand");
3417     return false;
3418   }
3419   if (!validateConstantBusLimitations(Inst)) {
3420     Error(IDLoc,
3421       "invalid operand (violates constant bus restrictions)");
3422     return false;
3423   }
3424   if (!validateEarlyClobberLimitations(Inst)) {
3425     Error(IDLoc,
3426       "destination must be different than all sources");
3427     return false;
3428   }
3429   if (!validateIntClampSupported(Inst)) {
3430     Error(IDLoc,
3431       "integer clamping is not supported on this GPU");
3432     return false;
3433   }
3434   if (!validateOpSel(Inst)) {
3435     Error(IDLoc,
3436       "invalid op_sel operand");
3437     return false;
3438   }
3439   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3440   if (!validateMIMGD16(Inst)) {
3441     Error(IDLoc,
3442       "d16 modifier is not supported on this GPU");
3443     return false;
3444   }
3445   if (!validateMIMGDim(Inst)) {
3446     Error(IDLoc, "dim modifier is required on this GPU");
3447     return false;
3448   }
3449   if (!validateMIMGDataSize(Inst)) {
3450     Error(IDLoc,
3451       "image data size does not match dmask and tfe");
3452     return false;
3453   }
3454   if (!validateMIMGAddrSize(Inst)) {
3455     Error(IDLoc,
3456       "image address size does not match dim and a16");
3457     return false;
3458   }
3459   if (!validateMIMGAtomicDMask(Inst)) {
3460     Error(IDLoc,
3461       "invalid atomic image dmask");
3462     return false;
3463   }
3464   if (!validateMIMGGatherDMask(Inst)) {
3465     Error(IDLoc,
3466       "invalid image_gather dmask: only one bit must be set");
3467     return false;
3468   }
3469   if (!validateFlatOffset(Inst, Operands)) {
3470     return false;
3471   }
3472 
3473   return true;
3474 }
3475 
3476 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3477                                             const FeatureBitset &FBS,
3478                                             unsigned VariantID = 0);
3479 
3480 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3481                                               OperandVector &Operands,
3482                                               MCStreamer &Out,
3483                                               uint64_t &ErrorInfo,
3484                                               bool MatchingInlineAsm) {
3485   MCInst Inst;
3486   unsigned Result = Match_Success;
3487   for (auto Variant : getMatchedVariants()) {
3488     uint64_t EI;
3489     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3490                                   Variant);
3491     // We order match statuses from least to most specific. We use most specific
3492     // status as resulting
3493     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3494     if ((R == Match_Success) ||
3495         (R == Match_PreferE32) ||
3496         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3497         (R == Match_InvalidOperand && Result != Match_MissingFeature
3498                                    && Result != Match_PreferE32) ||
3499         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3500                                    && Result != Match_MissingFeature
3501                                    && Result != Match_PreferE32)) {
3502       Result = R;
3503       ErrorInfo = EI;
3504     }
3505     if (R == Match_Success)
3506       break;
3507   }
3508 
3509   switch (Result) {
3510   default: break;
3511   case Match_Success:
3512     if (!validateInstruction(Inst, IDLoc, Operands)) {
3513       return true;
3514     }
3515     Inst.setLoc(IDLoc);
3516     Out.EmitInstruction(Inst, getSTI());
3517     return false;
3518 
3519   case Match_MissingFeature:
3520     return Error(IDLoc, "instruction not supported on this GPU");
3521 
3522   case Match_MnemonicFail: {
3523     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3524     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3525         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3526     return Error(IDLoc, "invalid instruction" + Suggestion,
3527                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3528   }
3529 
3530   case Match_InvalidOperand: {
3531     SMLoc ErrorLoc = IDLoc;
3532     if (ErrorInfo != ~0ULL) {
3533       if (ErrorInfo >= Operands.size()) {
3534         return Error(IDLoc, "too few operands for instruction");
3535       }
3536       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3537       if (ErrorLoc == SMLoc())
3538         ErrorLoc = IDLoc;
3539     }
3540     return Error(ErrorLoc, "invalid operand for instruction");
3541   }
3542 
3543   case Match_PreferE32:
3544     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3545                         "should be encoded as e32");
3546   }
3547   llvm_unreachable("Implement any new match types added!");
3548 }
3549 
3550 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3551   int64_t Tmp = -1;
3552   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3553     return true;
3554   }
3555   if (getParser().parseAbsoluteExpression(Tmp)) {
3556     return true;
3557   }
3558   Ret = static_cast<uint32_t>(Tmp);
3559   return false;
3560 }
3561 
3562 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3563                                                uint32_t &Minor) {
3564   if (ParseAsAbsoluteExpression(Major))
3565     return TokError("invalid major version");
3566 
3567   if (getLexer().isNot(AsmToken::Comma))
3568     return TokError("minor version number required, comma expected");
3569   Lex();
3570 
3571   if (ParseAsAbsoluteExpression(Minor))
3572     return TokError("invalid minor version");
3573 
3574   return false;
3575 }
3576 
3577 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3578   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3579     return TokError("directive only supported for amdgcn architecture");
3580 
3581   std::string Target;
3582 
3583   SMLoc TargetStart = getTok().getLoc();
3584   if (getParser().parseEscapedString(Target))
3585     return true;
3586   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3587 
3588   std::string ExpectedTarget;
3589   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3590   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3591 
3592   if (Target != ExpectedTargetOS.str())
3593     return getParser().Error(TargetRange.Start, "target must match options",
3594                              TargetRange);
3595 
3596   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3597   return false;
3598 }
3599 
3600 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3601   return getParser().Error(Range.Start, "value out of range", Range);
3602 }
3603 
3604 bool AMDGPUAsmParser::calculateGPRBlocks(
3605     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3606     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3607     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3608     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3609   // TODO(scott.linder): These calculations are duplicated from
3610   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3611   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3612 
3613   unsigned NumVGPRs = NextFreeVGPR;
3614   unsigned NumSGPRs = NextFreeSGPR;
3615 
3616   if (Version.Major >= 10)
3617     NumSGPRs = 0;
3618   else {
3619     unsigned MaxAddressableNumSGPRs =
3620         IsaInfo::getAddressableNumSGPRs(&getSTI());
3621 
3622     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3623         NumSGPRs > MaxAddressableNumSGPRs)
3624       return OutOfRangeError(SGPRRange);
3625 
3626     NumSGPRs +=
3627         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3628 
3629     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3630         NumSGPRs > MaxAddressableNumSGPRs)
3631       return OutOfRangeError(SGPRRange);
3632 
3633     if (Features.test(FeatureSGPRInitBug))
3634       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3635   }
3636 
3637   VGPRBlocks =
3638       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3639   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3640 
3641   return false;
3642 }
3643 
3644 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3645   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3646     return TokError("directive only supported for amdgcn architecture");
3647 
3648   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3649     return TokError("directive only supported for amdhsa OS");
3650 
3651   StringRef KernelName;
3652   if (getParser().parseIdentifier(KernelName))
3653     return true;
3654 
3655   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3656 
3657   StringSet<> Seen;
3658 
3659   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3660 
3661   SMRange VGPRRange;
3662   uint64_t NextFreeVGPR = 0;
3663   SMRange SGPRRange;
3664   uint64_t NextFreeSGPR = 0;
3665   unsigned UserSGPRCount = 0;
3666   bool ReserveVCC = true;
3667   bool ReserveFlatScr = true;
3668   bool ReserveXNACK = hasXNACK();
3669   Optional<bool> EnableWavefrontSize32;
3670 
3671   while (true) {
3672     while (getLexer().is(AsmToken::EndOfStatement))
3673       Lex();
3674 
3675     if (getLexer().isNot(AsmToken::Identifier))
3676       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3677 
3678     StringRef ID = getTok().getIdentifier();
3679     SMRange IDRange = getTok().getLocRange();
3680     Lex();
3681 
3682     if (ID == ".end_amdhsa_kernel")
3683       break;
3684 
3685     if (Seen.find(ID) != Seen.end())
3686       return TokError(".amdhsa_ directives cannot be repeated");
3687     Seen.insert(ID);
3688 
3689     SMLoc ValStart = getTok().getLoc();
3690     int64_t IVal;
3691     if (getParser().parseAbsoluteExpression(IVal))
3692       return true;
3693     SMLoc ValEnd = getTok().getLoc();
3694     SMRange ValRange = SMRange(ValStart, ValEnd);
3695 
3696     if (IVal < 0)
3697       return OutOfRangeError(ValRange);
3698 
3699     uint64_t Val = IVal;
3700 
3701 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3702   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3703     return OutOfRangeError(RANGE);                                             \
3704   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3705 
3706     if (ID == ".amdhsa_group_segment_fixed_size") {
3707       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3708         return OutOfRangeError(ValRange);
3709       KD.group_segment_fixed_size = Val;
3710     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3711       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3712         return OutOfRangeError(ValRange);
3713       KD.private_segment_fixed_size = Val;
3714     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3715       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3716                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3717                        Val, ValRange);
3718       if (Val)
3719         UserSGPRCount += 4;
3720     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3721       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3722                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3723                        ValRange);
3724       if (Val)
3725         UserSGPRCount += 2;
3726     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3727       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3728                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3729                        ValRange);
3730       if (Val)
3731         UserSGPRCount += 2;
3732     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3733       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3734                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3735                        Val, ValRange);
3736       if (Val)
3737         UserSGPRCount += 2;
3738     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3739       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3740                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3741                        ValRange);
3742       if (Val)
3743         UserSGPRCount += 2;
3744     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3745       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3746                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3747                        ValRange);
3748       if (Val)
3749         UserSGPRCount += 2;
3750     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3751       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3752                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3753                        Val, ValRange);
3754       if (Val)
3755         UserSGPRCount += 1;
3756     } else if (ID == ".amdhsa_wavefront_size32") {
3757       if (IVersion.Major < 10)
3758         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3759                                  IDRange);
3760       EnableWavefrontSize32 = Val;
3761       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3762                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3763                        Val, ValRange);
3764     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3765       PARSE_BITS_ENTRY(
3766           KD.compute_pgm_rsrc2,
3767           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3768           ValRange);
3769     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3770       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3771                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3772                        ValRange);
3773     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3774       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3775                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3776                        ValRange);
3777     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3778       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3779                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3780                        ValRange);
3781     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3782       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3783                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3784                        ValRange);
3785     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3786       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3787                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3788                        ValRange);
3789     } else if (ID == ".amdhsa_next_free_vgpr") {
3790       VGPRRange = ValRange;
3791       NextFreeVGPR = Val;
3792     } else if (ID == ".amdhsa_next_free_sgpr") {
3793       SGPRRange = ValRange;
3794       NextFreeSGPR = Val;
3795     } else if (ID == ".amdhsa_reserve_vcc") {
3796       if (!isUInt<1>(Val))
3797         return OutOfRangeError(ValRange);
3798       ReserveVCC = Val;
3799     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3800       if (IVersion.Major < 7)
3801         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3802                                  IDRange);
3803       if (!isUInt<1>(Val))
3804         return OutOfRangeError(ValRange);
3805       ReserveFlatScr = Val;
3806     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3807       if (IVersion.Major < 8)
3808         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3809                                  IDRange);
3810       if (!isUInt<1>(Val))
3811         return OutOfRangeError(ValRange);
3812       ReserveXNACK = Val;
3813     } else if (ID == ".amdhsa_float_round_mode_32") {
3814       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3815                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3816     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3817       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3818                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3819     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3820       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3821                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3822     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3823       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3824                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3825                        ValRange);
3826     } else if (ID == ".amdhsa_dx10_clamp") {
3827       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3828                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3829     } else if (ID == ".amdhsa_ieee_mode") {
3830       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3831                        Val, ValRange);
3832     } else if (ID == ".amdhsa_fp16_overflow") {
3833       if (IVersion.Major < 9)
3834         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3835                                  IDRange);
3836       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3837                        ValRange);
3838     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3839       if (IVersion.Major < 10)
3840         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3841                                  IDRange);
3842       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3843                        ValRange);
3844     } else if (ID == ".amdhsa_memory_ordered") {
3845       if (IVersion.Major < 10)
3846         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3847                                  IDRange);
3848       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3849                        ValRange);
3850     } else if (ID == ".amdhsa_forward_progress") {
3851       if (IVersion.Major < 10)
3852         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3853                                  IDRange);
3854       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3855                        ValRange);
3856     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3857       PARSE_BITS_ENTRY(
3858           KD.compute_pgm_rsrc2,
3859           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3860           ValRange);
3861     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3862       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3863                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3864                        Val, ValRange);
3865     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3866       PARSE_BITS_ENTRY(
3867           KD.compute_pgm_rsrc2,
3868           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3869           ValRange);
3870     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3871       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3872                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3873                        Val, ValRange);
3874     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3875       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3876                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3877                        Val, ValRange);
3878     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3879       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3880                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3881                        Val, ValRange);
3882     } else if (ID == ".amdhsa_exception_int_div_zero") {
3883       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3884                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3885                        Val, ValRange);
3886     } else {
3887       return getParser().Error(IDRange.Start,
3888                                "unknown .amdhsa_kernel directive", IDRange);
3889     }
3890 
3891 #undef PARSE_BITS_ENTRY
3892   }
3893 
3894   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3895     return TokError(".amdhsa_next_free_vgpr directive is required");
3896 
3897   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3898     return TokError(".amdhsa_next_free_sgpr directive is required");
3899 
3900   unsigned VGPRBlocks;
3901   unsigned SGPRBlocks;
3902   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3903                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3904                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3905                          SGPRBlocks))
3906     return true;
3907 
3908   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3909           VGPRBlocks))
3910     return OutOfRangeError(VGPRRange);
3911   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3912                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3913 
3914   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3915           SGPRBlocks))
3916     return OutOfRangeError(SGPRRange);
3917   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3918                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3919                   SGPRBlocks);
3920 
3921   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3922     return TokError("too many user SGPRs enabled");
3923   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3924                   UserSGPRCount);
3925 
3926   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3927       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3928       ReserveFlatScr, ReserveXNACK);
3929   return false;
3930 }
3931 
3932 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3933   uint32_t Major;
3934   uint32_t Minor;
3935 
3936   if (ParseDirectiveMajorMinor(Major, Minor))
3937     return true;
3938 
3939   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3940   return false;
3941 }
3942 
3943 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3944   uint32_t Major;
3945   uint32_t Minor;
3946   uint32_t Stepping;
3947   StringRef VendorName;
3948   StringRef ArchName;
3949 
3950   // If this directive has no arguments, then use the ISA version for the
3951   // targeted GPU.
3952   if (getLexer().is(AsmToken::EndOfStatement)) {
3953     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3954     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3955                                                       ISA.Stepping,
3956                                                       "AMD", "AMDGPU");
3957     return false;
3958   }
3959 
3960   if (ParseDirectiveMajorMinor(Major, Minor))
3961     return true;
3962 
3963   if (getLexer().isNot(AsmToken::Comma))
3964     return TokError("stepping version number required, comma expected");
3965   Lex();
3966 
3967   if (ParseAsAbsoluteExpression(Stepping))
3968     return TokError("invalid stepping version");
3969 
3970   if (getLexer().isNot(AsmToken::Comma))
3971     return TokError("vendor name required, comma expected");
3972   Lex();
3973 
3974   if (getLexer().isNot(AsmToken::String))
3975     return TokError("invalid vendor name");
3976 
3977   VendorName = getLexer().getTok().getStringContents();
3978   Lex();
3979 
3980   if (getLexer().isNot(AsmToken::Comma))
3981     return TokError("arch name required, comma expected");
3982   Lex();
3983 
3984   if (getLexer().isNot(AsmToken::String))
3985     return TokError("invalid arch name");
3986 
3987   ArchName = getLexer().getTok().getStringContents();
3988   Lex();
3989 
3990   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3991                                                     VendorName, ArchName);
3992   return false;
3993 }
3994 
3995 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3996                                                amd_kernel_code_t &Header) {
3997   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3998   // assembly for backwards compatibility.
3999   if (ID == "max_scratch_backing_memory_byte_size") {
4000     Parser.eatToEndOfStatement();
4001     return false;
4002   }
4003 
4004   SmallString<40> ErrStr;
4005   raw_svector_ostream Err(ErrStr);
4006   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4007     return TokError(Err.str());
4008   }
4009   Lex();
4010 
4011   if (ID == "enable_wavefront_size32") {
4012     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4013       if (!isGFX10())
4014         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4015       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4016         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4017     } else {
4018       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4019         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4020     }
4021   }
4022 
4023   if (ID == "wavefront_size") {
4024     if (Header.wavefront_size == 5) {
4025       if (!isGFX10())
4026         return TokError("wavefront_size=5 is only allowed on GFX10+");
4027       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4028         return TokError("wavefront_size=5 requires +WavefrontSize32");
4029     } else if (Header.wavefront_size == 6) {
4030       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4031         return TokError("wavefront_size=6 requires +WavefrontSize64");
4032     }
4033   }
4034 
4035   if (ID == "enable_wgp_mode") {
4036     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4037       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4038   }
4039 
4040   if (ID == "enable_mem_ordered") {
4041     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4042       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4043   }
4044 
4045   if (ID == "enable_fwd_progress") {
4046     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4047       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4048   }
4049 
4050   return false;
4051 }
4052 
4053 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4054   amd_kernel_code_t Header;
4055   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4056 
4057   while (true) {
4058     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4059     // will set the current token to EndOfStatement.
4060     while(getLexer().is(AsmToken::EndOfStatement))
4061       Lex();
4062 
4063     if (getLexer().isNot(AsmToken::Identifier))
4064       return TokError("expected value identifier or .end_amd_kernel_code_t");
4065 
4066     StringRef ID = getLexer().getTok().getIdentifier();
4067     Lex();
4068 
4069     if (ID == ".end_amd_kernel_code_t")
4070       break;
4071 
4072     if (ParseAMDKernelCodeTValue(ID, Header))
4073       return true;
4074   }
4075 
4076   getTargetStreamer().EmitAMDKernelCodeT(Header);
4077 
4078   return false;
4079 }
4080 
4081 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4082   if (getLexer().isNot(AsmToken::Identifier))
4083     return TokError("expected symbol name");
4084 
4085   StringRef KernelName = Parser.getTok().getString();
4086 
4087   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4088                                            ELF::STT_AMDGPU_HSA_KERNEL);
4089   Lex();
4090   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4091     KernelScope.initialize(getContext());
4092   return false;
4093 }
4094 
4095 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4096   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4097     return Error(getParser().getTok().getLoc(),
4098                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4099                  "architectures");
4100   }
4101 
4102   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4103 
4104   std::string ISAVersionStringFromSTI;
4105   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4106   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4107 
4108   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4109     return Error(getParser().getTok().getLoc(),
4110                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4111                  "arguments specified through the command line");
4112   }
4113 
4114   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4115   Lex();
4116 
4117   return false;
4118 }
4119 
4120 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4121   const char *AssemblerDirectiveBegin;
4122   const char *AssemblerDirectiveEnd;
4123   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4124       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4125           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4126                             HSAMD::V3::AssemblerDirectiveEnd)
4127           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4128                             HSAMD::AssemblerDirectiveEnd);
4129 
4130   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4131     return Error(getParser().getTok().getLoc(),
4132                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4133                  "not available on non-amdhsa OSes")).str());
4134   }
4135 
4136   std::string HSAMetadataString;
4137   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4138                           HSAMetadataString))
4139     return true;
4140 
4141   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4142     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4143       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4144   } else {
4145     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4146       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4147   }
4148 
4149   return false;
4150 }
4151 
4152 /// Common code to parse out a block of text (typically YAML) between start and
4153 /// end directives.
4154 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4155                                           const char *AssemblerDirectiveEnd,
4156                                           std::string &CollectString) {
4157 
4158   raw_string_ostream CollectStream(CollectString);
4159 
4160   getLexer().setSkipSpace(false);
4161 
4162   bool FoundEnd = false;
4163   while (!getLexer().is(AsmToken::Eof)) {
4164     while (getLexer().is(AsmToken::Space)) {
4165       CollectStream << getLexer().getTok().getString();
4166       Lex();
4167     }
4168 
4169     if (getLexer().is(AsmToken::Identifier)) {
4170       StringRef ID = getLexer().getTok().getIdentifier();
4171       if (ID == AssemblerDirectiveEnd) {
4172         Lex();
4173         FoundEnd = true;
4174         break;
4175       }
4176     }
4177 
4178     CollectStream << Parser.parseStringToEndOfStatement()
4179                   << getContext().getAsmInfo()->getSeparatorString();
4180 
4181     Parser.eatToEndOfStatement();
4182   }
4183 
4184   getLexer().setSkipSpace(true);
4185 
4186   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4187     return TokError(Twine("expected directive ") +
4188                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4189   }
4190 
4191   CollectStream.flush();
4192   return false;
4193 }
4194 
4195 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4196 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4197   std::string String;
4198   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4199                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4200     return true;
4201 
4202   auto PALMetadata = getTargetStreamer().getPALMetadata();
4203   if (!PALMetadata->setFromString(String))
4204     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4205   return false;
4206 }
4207 
4208 /// Parse the assembler directive for old linear-format PAL metadata.
4209 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4210   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4211     return Error(getParser().getTok().getLoc(),
4212                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4213                  "not available on non-amdpal OSes")).str());
4214   }
4215 
4216   auto PALMetadata = getTargetStreamer().getPALMetadata();
4217   PALMetadata->setLegacy();
4218   for (;;) {
4219     uint32_t Key, Value;
4220     if (ParseAsAbsoluteExpression(Key)) {
4221       return TokError(Twine("invalid value in ") +
4222                       Twine(PALMD::AssemblerDirective));
4223     }
4224     if (getLexer().isNot(AsmToken::Comma)) {
4225       return TokError(Twine("expected an even number of values in ") +
4226                       Twine(PALMD::AssemblerDirective));
4227     }
4228     Lex();
4229     if (ParseAsAbsoluteExpression(Value)) {
4230       return TokError(Twine("invalid value in ") +
4231                       Twine(PALMD::AssemblerDirective));
4232     }
4233     PALMetadata->setRegister(Key, Value);
4234     if (getLexer().isNot(AsmToken::Comma))
4235       break;
4236     Lex();
4237   }
4238   return false;
4239 }
4240 
4241 /// ParseDirectiveAMDGPULDS
4242 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4243 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4244   if (getParser().checkForValidSection())
4245     return true;
4246 
4247   StringRef Name;
4248   SMLoc NameLoc = getLexer().getLoc();
4249   if (getParser().parseIdentifier(Name))
4250     return TokError("expected identifier in directive");
4251 
4252   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4253   if (parseToken(AsmToken::Comma, "expected ','"))
4254     return true;
4255 
4256   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4257 
4258   int64_t Size;
4259   SMLoc SizeLoc = getLexer().getLoc();
4260   if (getParser().parseAbsoluteExpression(Size))
4261     return true;
4262   if (Size < 0)
4263     return Error(SizeLoc, "size must be non-negative");
4264   if (Size > LocalMemorySize)
4265     return Error(SizeLoc, "size is too large");
4266 
4267   int64_t Align = 4;
4268   if (getLexer().is(AsmToken::Comma)) {
4269     Lex();
4270     SMLoc AlignLoc = getLexer().getLoc();
4271     if (getParser().parseAbsoluteExpression(Align))
4272       return true;
4273     if (Align < 0 || !isPowerOf2_64(Align))
4274       return Error(AlignLoc, "alignment must be a power of two");
4275 
4276     // Alignment larger than the size of LDS is possible in theory, as long
4277     // as the linker manages to place to symbol at address 0, but we do want
4278     // to make sure the alignment fits nicely into a 32-bit integer.
4279     if (Align >= 1u << 31)
4280       return Error(AlignLoc, "alignment is too large");
4281   }
4282 
4283   if (parseToken(AsmToken::EndOfStatement,
4284                  "unexpected token in '.amdgpu_lds' directive"))
4285     return true;
4286 
4287   Symbol->redefineIfPossible();
4288   if (!Symbol->isUndefined())
4289     return Error(NameLoc, "invalid symbol redefinition");
4290 
4291   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4292   return false;
4293 }
4294 
4295 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4296   StringRef IDVal = DirectiveID.getString();
4297 
4298   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4299     if (IDVal == ".amdgcn_target")
4300       return ParseDirectiveAMDGCNTarget();
4301 
4302     if (IDVal == ".amdhsa_kernel")
4303       return ParseDirectiveAMDHSAKernel();
4304 
4305     // TODO: Restructure/combine with PAL metadata directive.
4306     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4307       return ParseDirectiveHSAMetadata();
4308   } else {
4309     if (IDVal == ".hsa_code_object_version")
4310       return ParseDirectiveHSACodeObjectVersion();
4311 
4312     if (IDVal == ".hsa_code_object_isa")
4313       return ParseDirectiveHSACodeObjectISA();
4314 
4315     if (IDVal == ".amd_kernel_code_t")
4316       return ParseDirectiveAMDKernelCodeT();
4317 
4318     if (IDVal == ".amdgpu_hsa_kernel")
4319       return ParseDirectiveAMDGPUHsaKernel();
4320 
4321     if (IDVal == ".amd_amdgpu_isa")
4322       return ParseDirectiveISAVersion();
4323 
4324     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4325       return ParseDirectiveHSAMetadata();
4326   }
4327 
4328   if (IDVal == ".amdgpu_lds")
4329     return ParseDirectiveAMDGPULDS();
4330 
4331   if (IDVal == PALMD::AssemblerDirectiveBegin)
4332     return ParseDirectivePALMetadataBegin();
4333 
4334   if (IDVal == PALMD::AssemblerDirective)
4335     return ParseDirectivePALMetadata();
4336 
4337   return true;
4338 }
4339 
4340 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4341                                            unsigned RegNo) const {
4342 
4343   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4344        R.isValid(); ++R) {
4345     if (*R == RegNo)
4346       return isGFX9() || isGFX10();
4347   }
4348 
4349   // GFX10 has 2 more SGPRs 104 and 105.
4350   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4351        R.isValid(); ++R) {
4352     if (*R == RegNo)
4353       return hasSGPR104_SGPR105();
4354   }
4355 
4356   switch (RegNo) {
4357   case AMDGPU::SRC_SHARED_BASE:
4358   case AMDGPU::SRC_SHARED_LIMIT:
4359   case AMDGPU::SRC_PRIVATE_BASE:
4360   case AMDGPU::SRC_PRIVATE_LIMIT:
4361   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4362     return !isCI() && !isSI() && !isVI();
4363   case AMDGPU::TBA:
4364   case AMDGPU::TBA_LO:
4365   case AMDGPU::TBA_HI:
4366   case AMDGPU::TMA:
4367   case AMDGPU::TMA_LO:
4368   case AMDGPU::TMA_HI:
4369     return !isGFX9() && !isGFX10();
4370   case AMDGPU::XNACK_MASK:
4371   case AMDGPU::XNACK_MASK_LO:
4372   case AMDGPU::XNACK_MASK_HI:
4373     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4374   case AMDGPU::SGPR_NULL:
4375     return isGFX10();
4376   default:
4377     break;
4378   }
4379 
4380   if (isCI())
4381     return true;
4382 
4383   if (isSI() || isGFX10()) {
4384     // No flat_scr on SI.
4385     // On GFX10 flat scratch is not a valid register operand and can only be
4386     // accessed with s_setreg/s_getreg.
4387     switch (RegNo) {
4388     case AMDGPU::FLAT_SCR:
4389     case AMDGPU::FLAT_SCR_LO:
4390     case AMDGPU::FLAT_SCR_HI:
4391       return false;
4392     default:
4393       return true;
4394     }
4395   }
4396 
4397   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4398   // SI/CI have.
4399   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4400        R.isValid(); ++R) {
4401     if (*R == RegNo)
4402       return hasSGPR102_SGPR103();
4403   }
4404 
4405   return true;
4406 }
4407 
4408 OperandMatchResultTy
4409 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4410                               OperandMode Mode) {
4411   // Try to parse with a custom parser
4412   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4413 
4414   // If we successfully parsed the operand or if there as an error parsing,
4415   // we are done.
4416   //
4417   // If we are parsing after we reach EndOfStatement then this means we
4418   // are appending default values to the Operands list.  This is only done
4419   // by custom parser, so we shouldn't continue on to the generic parsing.
4420   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4421       getLexer().is(AsmToken::EndOfStatement))
4422     return ResTy;
4423 
4424   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4425     unsigned Prefix = Operands.size();
4426     SMLoc LBraceLoc = getTok().getLoc();
4427     Parser.Lex(); // eat the '['
4428 
4429     for (;;) {
4430       ResTy = parseReg(Operands);
4431       if (ResTy != MatchOperand_Success)
4432         return ResTy;
4433 
4434       if (getLexer().is(AsmToken::RBrac))
4435         break;
4436 
4437       if (getLexer().isNot(AsmToken::Comma))
4438         return MatchOperand_ParseFail;
4439       Parser.Lex();
4440     }
4441 
4442     if (Operands.size() - Prefix > 1) {
4443       Operands.insert(Operands.begin() + Prefix,
4444                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4445       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4446                                                     getTok().getLoc()));
4447     }
4448 
4449     Parser.Lex(); // eat the ']'
4450     return MatchOperand_Success;
4451   }
4452 
4453   return parseRegOrImm(Operands);
4454 }
4455 
4456 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4457   // Clear any forced encodings from the previous instruction.
4458   setForcedEncodingSize(0);
4459   setForcedDPP(false);
4460   setForcedSDWA(false);
4461 
4462   if (Name.endswith("_e64")) {
4463     setForcedEncodingSize(64);
4464     return Name.substr(0, Name.size() - 4);
4465   } else if (Name.endswith("_e32")) {
4466     setForcedEncodingSize(32);
4467     return Name.substr(0, Name.size() - 4);
4468   } else if (Name.endswith("_dpp")) {
4469     setForcedDPP(true);
4470     return Name.substr(0, Name.size() - 4);
4471   } else if (Name.endswith("_sdwa")) {
4472     setForcedSDWA(true);
4473     return Name.substr(0, Name.size() - 5);
4474   }
4475   return Name;
4476 }
4477 
4478 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4479                                        StringRef Name,
4480                                        SMLoc NameLoc, OperandVector &Operands) {
4481   // Add the instruction mnemonic
4482   Name = parseMnemonicSuffix(Name);
4483   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4484 
4485   bool IsMIMG = Name.startswith("image_");
4486 
4487   while (!getLexer().is(AsmToken::EndOfStatement)) {
4488     OperandMode Mode = OperandMode_Default;
4489     if (IsMIMG && isGFX10() && Operands.size() == 2)
4490       Mode = OperandMode_NSA;
4491     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4492 
4493     // Eat the comma or space if there is one.
4494     if (getLexer().is(AsmToken::Comma))
4495       Parser.Lex();
4496 
4497     switch (Res) {
4498       case MatchOperand_Success: break;
4499       case MatchOperand_ParseFail:
4500         // FIXME: use real operand location rather than the current location.
4501         Error(getLexer().getLoc(), "failed parsing operand.");
4502         while (!getLexer().is(AsmToken::EndOfStatement)) {
4503           Parser.Lex();
4504         }
4505         return true;
4506       case MatchOperand_NoMatch:
4507         // FIXME: use real operand location rather than the current location.
4508         Error(getLexer().getLoc(), "not a valid operand.");
4509         while (!getLexer().is(AsmToken::EndOfStatement)) {
4510           Parser.Lex();
4511         }
4512         return true;
4513     }
4514   }
4515 
4516   return false;
4517 }
4518 
4519 //===----------------------------------------------------------------------===//
4520 // Utility functions
4521 //===----------------------------------------------------------------------===//
4522 
4523 OperandMatchResultTy
4524 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4525 
4526   if (!trySkipId(Prefix, AsmToken::Colon))
4527     return MatchOperand_NoMatch;
4528 
4529   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4530 }
4531 
4532 OperandMatchResultTy
4533 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4534                                     AMDGPUOperand::ImmTy ImmTy,
4535                                     bool (*ConvertResult)(int64_t&)) {
4536   SMLoc S = getLoc();
4537   int64_t Value = 0;
4538 
4539   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4540   if (Res != MatchOperand_Success)
4541     return Res;
4542 
4543   if (ConvertResult && !ConvertResult(Value)) {
4544     Error(S, "invalid " + StringRef(Prefix) + " value.");
4545   }
4546 
4547   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4548   return MatchOperand_Success;
4549 }
4550 
4551 OperandMatchResultTy
4552 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4553                                              OperandVector &Operands,
4554                                              AMDGPUOperand::ImmTy ImmTy,
4555                                              bool (*ConvertResult)(int64_t&)) {
4556   SMLoc S = getLoc();
4557   if (!trySkipId(Prefix, AsmToken::Colon))
4558     return MatchOperand_NoMatch;
4559 
4560   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4561     return MatchOperand_ParseFail;
4562 
4563   unsigned Val = 0;
4564   const unsigned MaxSize = 4;
4565 
4566   // FIXME: How to verify the number of elements matches the number of src
4567   // operands?
4568   for (int I = 0; ; ++I) {
4569     int64_t Op;
4570     SMLoc Loc = getLoc();
4571     if (!parseExpr(Op))
4572       return MatchOperand_ParseFail;
4573 
4574     if (Op != 0 && Op != 1) {
4575       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4576       return MatchOperand_ParseFail;
4577     }
4578 
4579     Val |= (Op << I);
4580 
4581     if (trySkipToken(AsmToken::RBrac))
4582       break;
4583 
4584     if (I + 1 == MaxSize) {
4585       Error(getLoc(), "expected a closing square bracket");
4586       return MatchOperand_ParseFail;
4587     }
4588 
4589     if (!skipToken(AsmToken::Comma, "expected a comma"))
4590       return MatchOperand_ParseFail;
4591   }
4592 
4593   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4594   return MatchOperand_Success;
4595 }
4596 
4597 OperandMatchResultTy
4598 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4599                                AMDGPUOperand::ImmTy ImmTy) {
4600   int64_t Bit = 0;
4601   SMLoc S = Parser.getTok().getLoc();
4602 
4603   // We are at the end of the statement, and this is a default argument, so
4604   // use a default value.
4605   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4606     switch(getLexer().getKind()) {
4607       case AsmToken::Identifier: {
4608         StringRef Tok = Parser.getTok().getString();
4609         if (Tok == Name) {
4610           if (Tok == "r128" && isGFX9())
4611             Error(S, "r128 modifier is not supported on this GPU");
4612           if (Tok == "a16" && !isGFX9() && !isGFX10())
4613             Error(S, "a16 modifier is not supported on this GPU");
4614           Bit = 1;
4615           Parser.Lex();
4616         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4617           Bit = 0;
4618           Parser.Lex();
4619         } else {
4620           return MatchOperand_NoMatch;
4621         }
4622         break;
4623       }
4624       default:
4625         return MatchOperand_NoMatch;
4626     }
4627   }
4628 
4629   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4630     return MatchOperand_ParseFail;
4631 
4632   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4633   return MatchOperand_Success;
4634 }
4635 
4636 static void addOptionalImmOperand(
4637   MCInst& Inst, const OperandVector& Operands,
4638   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4639   AMDGPUOperand::ImmTy ImmT,
4640   int64_t Default = 0) {
4641   auto i = OptionalIdx.find(ImmT);
4642   if (i != OptionalIdx.end()) {
4643     unsigned Idx = i->second;
4644     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4645   } else {
4646     Inst.addOperand(MCOperand::createImm(Default));
4647   }
4648 }
4649 
4650 OperandMatchResultTy
4651 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4652   if (getLexer().isNot(AsmToken::Identifier)) {
4653     return MatchOperand_NoMatch;
4654   }
4655   StringRef Tok = Parser.getTok().getString();
4656   if (Tok != Prefix) {
4657     return MatchOperand_NoMatch;
4658   }
4659 
4660   Parser.Lex();
4661   if (getLexer().isNot(AsmToken::Colon)) {
4662     return MatchOperand_ParseFail;
4663   }
4664 
4665   Parser.Lex();
4666   if (getLexer().isNot(AsmToken::Identifier)) {
4667     return MatchOperand_ParseFail;
4668   }
4669 
4670   Value = Parser.getTok().getString();
4671   return MatchOperand_Success;
4672 }
4673 
4674 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4675 // values to live in a joint format operand in the MCInst encoding.
4676 OperandMatchResultTy
4677 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4678   SMLoc S = Parser.getTok().getLoc();
4679   int64_t Dfmt = 0, Nfmt = 0;
4680   // dfmt and nfmt can appear in either order, and each is optional.
4681   bool GotDfmt = false, GotNfmt = false;
4682   while (!GotDfmt || !GotNfmt) {
4683     if (!GotDfmt) {
4684       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4685       if (Res != MatchOperand_NoMatch) {
4686         if (Res != MatchOperand_Success)
4687           return Res;
4688         if (Dfmt >= 16) {
4689           Error(Parser.getTok().getLoc(), "out of range dfmt");
4690           return MatchOperand_ParseFail;
4691         }
4692         GotDfmt = true;
4693         Parser.Lex();
4694         continue;
4695       }
4696     }
4697     if (!GotNfmt) {
4698       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4699       if (Res != MatchOperand_NoMatch) {
4700         if (Res != MatchOperand_Success)
4701           return Res;
4702         if (Nfmt >= 8) {
4703           Error(Parser.getTok().getLoc(), "out of range nfmt");
4704           return MatchOperand_ParseFail;
4705         }
4706         GotNfmt = true;
4707         Parser.Lex();
4708         continue;
4709       }
4710     }
4711     break;
4712   }
4713   if (!GotDfmt && !GotNfmt)
4714     return MatchOperand_NoMatch;
4715   auto Format = Dfmt | Nfmt << 4;
4716   Operands.push_back(
4717       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4718   return MatchOperand_Success;
4719 }
4720 
4721 //===----------------------------------------------------------------------===//
4722 // ds
4723 //===----------------------------------------------------------------------===//
4724 
4725 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4726                                     const OperandVector &Operands) {
4727   OptionalImmIndexMap OptionalIdx;
4728 
4729   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4730     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4731 
4732     // Add the register arguments
4733     if (Op.isReg()) {
4734       Op.addRegOperands(Inst, 1);
4735       continue;
4736     }
4737 
4738     // Handle optional arguments
4739     OptionalIdx[Op.getImmTy()] = i;
4740   }
4741 
4742   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4743   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4744   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4745 
4746   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4747 }
4748 
4749 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4750                                 bool IsGdsHardcoded) {
4751   OptionalImmIndexMap OptionalIdx;
4752 
4753   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4754     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4755 
4756     // Add the register arguments
4757     if (Op.isReg()) {
4758       Op.addRegOperands(Inst, 1);
4759       continue;
4760     }
4761 
4762     if (Op.isToken() && Op.getToken() == "gds") {
4763       IsGdsHardcoded = true;
4764       continue;
4765     }
4766 
4767     // Handle optional arguments
4768     OptionalIdx[Op.getImmTy()] = i;
4769   }
4770 
4771   AMDGPUOperand::ImmTy OffsetType =
4772     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4773      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4774      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4775                                                       AMDGPUOperand::ImmTyOffset;
4776 
4777   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4778 
4779   if (!IsGdsHardcoded) {
4780     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4781   }
4782   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4783 }
4784 
4785 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4786   OptionalImmIndexMap OptionalIdx;
4787 
4788   unsigned OperandIdx[4];
4789   unsigned EnMask = 0;
4790   int SrcIdx = 0;
4791 
4792   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4793     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4794 
4795     // Add the register arguments
4796     if (Op.isReg()) {
4797       assert(SrcIdx < 4);
4798       OperandIdx[SrcIdx] = Inst.size();
4799       Op.addRegOperands(Inst, 1);
4800       ++SrcIdx;
4801       continue;
4802     }
4803 
4804     if (Op.isOff()) {
4805       assert(SrcIdx < 4);
4806       OperandIdx[SrcIdx] = Inst.size();
4807       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4808       ++SrcIdx;
4809       continue;
4810     }
4811 
4812     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4813       Op.addImmOperands(Inst, 1);
4814       continue;
4815     }
4816 
4817     if (Op.isToken() && Op.getToken() == "done")
4818       continue;
4819 
4820     // Handle optional arguments
4821     OptionalIdx[Op.getImmTy()] = i;
4822   }
4823 
4824   assert(SrcIdx == 4);
4825 
4826   bool Compr = false;
4827   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4828     Compr = true;
4829     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4830     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4831     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4832   }
4833 
4834   for (auto i = 0; i < SrcIdx; ++i) {
4835     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4836       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4837     }
4838   }
4839 
4840   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4841   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4842 
4843   Inst.addOperand(MCOperand::createImm(EnMask));
4844 }
4845 
4846 //===----------------------------------------------------------------------===//
4847 // s_waitcnt
4848 //===----------------------------------------------------------------------===//
4849 
4850 static bool
4851 encodeCnt(
4852   const AMDGPU::IsaVersion ISA,
4853   int64_t &IntVal,
4854   int64_t CntVal,
4855   bool Saturate,
4856   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4857   unsigned (*decode)(const IsaVersion &Version, unsigned))
4858 {
4859   bool Failed = false;
4860 
4861   IntVal = encode(ISA, IntVal, CntVal);
4862   if (CntVal != decode(ISA, IntVal)) {
4863     if (Saturate) {
4864       IntVal = encode(ISA, IntVal, -1);
4865     } else {
4866       Failed = true;
4867     }
4868   }
4869   return Failed;
4870 }
4871 
4872 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4873 
4874   SMLoc CntLoc = getLoc();
4875   StringRef CntName = getTokenStr();
4876 
4877   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4878       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4879     return false;
4880 
4881   int64_t CntVal;
4882   SMLoc ValLoc = getLoc();
4883   if (!parseExpr(CntVal))
4884     return false;
4885 
4886   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4887 
4888   bool Failed = true;
4889   bool Sat = CntName.endswith("_sat");
4890 
4891   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4892     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4893   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4894     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4895   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4896     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4897   } else {
4898     Error(CntLoc, "invalid counter name " + CntName);
4899     return false;
4900   }
4901 
4902   if (Failed) {
4903     Error(ValLoc, "too large value for " + CntName);
4904     return false;
4905   }
4906 
4907   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4908     return false;
4909 
4910   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4911     if (isToken(AsmToken::EndOfStatement)) {
4912       Error(getLoc(), "expected a counter name");
4913       return false;
4914     }
4915   }
4916 
4917   return true;
4918 }
4919 
4920 OperandMatchResultTy
4921 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4922   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4923   int64_t Waitcnt = getWaitcntBitMask(ISA);
4924   SMLoc S = getLoc();
4925 
4926   // If parse failed, do not return error code
4927   // to avoid excessive error messages.
4928   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4929     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4930   } else {
4931     parseExpr(Waitcnt);
4932   }
4933 
4934   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4935   return MatchOperand_Success;
4936 }
4937 
4938 bool
4939 AMDGPUOperand::isSWaitCnt() const {
4940   return isImm();
4941 }
4942 
4943 //===----------------------------------------------------------------------===//
4944 // hwreg
4945 //===----------------------------------------------------------------------===//
4946 
4947 bool
4948 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4949                                 int64_t &Offset,
4950                                 int64_t &Width) {
4951   using namespace llvm::AMDGPU::Hwreg;
4952 
4953   // The register may be specified by name or using a numeric code
4954   if (isToken(AsmToken::Identifier) &&
4955       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4956     HwReg.IsSymbolic = true;
4957     lex(); // skip message name
4958   } else if (!parseExpr(HwReg.Id)) {
4959     return false;
4960   }
4961 
4962   if (trySkipToken(AsmToken::RParen))
4963     return true;
4964 
4965   // parse optional params
4966   return
4967     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
4968     parseExpr(Offset) &&
4969     skipToken(AsmToken::Comma, "expected a comma") &&
4970     parseExpr(Width) &&
4971     skipToken(AsmToken::RParen, "expected a closing parenthesis");
4972 }
4973 
4974 bool
4975 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
4976                                const int64_t Offset,
4977                                const int64_t Width,
4978                                const SMLoc Loc) {
4979 
4980   using namespace llvm::AMDGPU::Hwreg;
4981 
4982   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
4983     Error(Loc, "specified hardware register is not supported on this GPU");
4984     return false;
4985   } else if (!isValidHwreg(HwReg.Id)) {
4986     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
4987     return false;
4988   } else if (!isValidHwregOffset(Offset)) {
4989     Error(Loc, "invalid bit offset: only 5-bit values are legal");
4990     return false;
4991   } else if (!isValidHwregWidth(Width)) {
4992     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
4993     return false;
4994   }
4995   return true;
4996 }
4997 
4998 OperandMatchResultTy
4999 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5000   using namespace llvm::AMDGPU::Hwreg;
5001 
5002   int64_t ImmVal = 0;
5003   SMLoc Loc = getLoc();
5004 
5005   // If parse failed, do not return error code
5006   // to avoid excessive error messages.
5007   if (trySkipId("hwreg", AsmToken::LParen)) {
5008     OperandInfoTy HwReg(ID_UNKNOWN_);
5009     int64_t Offset = OFFSET_DEFAULT_;
5010     int64_t Width = WIDTH_DEFAULT_;
5011     if (parseHwregBody(HwReg, Offset, Width) &&
5012         validateHwreg(HwReg, Offset, Width, Loc)) {
5013       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5014     }
5015   } else if (parseExpr(ImmVal)) {
5016     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5017       Error(Loc, "invalid immediate: only 16-bit values are legal");
5018   }
5019 
5020   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5021   return MatchOperand_Success;
5022 }
5023 
5024 bool AMDGPUOperand::isHwreg() const {
5025   return isImmTy(ImmTyHwreg);
5026 }
5027 
5028 //===----------------------------------------------------------------------===//
5029 // sendmsg
5030 //===----------------------------------------------------------------------===//
5031 
5032 bool
5033 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5034                                   OperandInfoTy &Op,
5035                                   OperandInfoTy &Stream) {
5036   using namespace llvm::AMDGPU::SendMsg;
5037 
5038   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5039     Msg.IsSymbolic = true;
5040     lex(); // skip message name
5041   } else if (!parseExpr(Msg.Id)) {
5042     return false;
5043   }
5044 
5045   if (trySkipToken(AsmToken::Comma)) {
5046     Op.IsDefined = true;
5047     if (isToken(AsmToken::Identifier) &&
5048         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5049       lex(); // skip operation name
5050     } else if (!parseExpr(Op.Id)) {
5051       return false;
5052     }
5053 
5054     if (trySkipToken(AsmToken::Comma)) {
5055       Stream.IsDefined = true;
5056       if (!parseExpr(Stream.Id))
5057         return false;
5058     }
5059   }
5060 
5061   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5062 }
5063 
5064 bool
5065 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5066                                  const OperandInfoTy &Op,
5067                                  const OperandInfoTy &Stream,
5068                                  const SMLoc S) {
5069   using namespace llvm::AMDGPU::SendMsg;
5070 
5071   // Validation strictness depends on whether message is specified
5072   // in a symbolc or in a numeric form. In the latter case
5073   // only encoding possibility is checked.
5074   bool Strict = Msg.IsSymbolic;
5075 
5076   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5077     Error(S, "invalid message id");
5078     return false;
5079   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5080     Error(S, Op.IsDefined ?
5081              "message does not support operations" :
5082              "missing message operation");
5083     return false;
5084   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5085     Error(S, "invalid operation id");
5086     return false;
5087   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5088     Error(S, "message operation does not support streams");
5089     return false;
5090   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5091     Error(S, "invalid message stream id");
5092     return false;
5093   }
5094   return true;
5095 }
5096 
5097 OperandMatchResultTy
5098 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5099   using namespace llvm::AMDGPU::SendMsg;
5100 
5101   int64_t ImmVal = 0;
5102   SMLoc Loc = getLoc();
5103 
5104   // If parse failed, do not return error code
5105   // to avoid excessive error messages.
5106   if (trySkipId("sendmsg", AsmToken::LParen)) {
5107     OperandInfoTy Msg(ID_UNKNOWN_);
5108     OperandInfoTy Op(OP_NONE_);
5109     OperandInfoTy Stream(STREAM_ID_NONE_);
5110     if (parseSendMsgBody(Msg, Op, Stream) &&
5111         validateSendMsg(Msg, Op, Stream, Loc)) {
5112       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5113     }
5114   } else if (parseExpr(ImmVal)) {
5115     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5116       Error(Loc, "invalid immediate: only 16-bit values are legal");
5117   }
5118 
5119   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5120   return MatchOperand_Success;
5121 }
5122 
5123 bool AMDGPUOperand::isSendMsg() const {
5124   return isImmTy(ImmTySendMsg);
5125 }
5126 
5127 //===----------------------------------------------------------------------===//
5128 // v_interp
5129 //===----------------------------------------------------------------------===//
5130 
5131 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5132   if (getLexer().getKind() != AsmToken::Identifier)
5133     return MatchOperand_NoMatch;
5134 
5135   StringRef Str = Parser.getTok().getString();
5136   int Slot = StringSwitch<int>(Str)
5137     .Case("p10", 0)
5138     .Case("p20", 1)
5139     .Case("p0", 2)
5140     .Default(-1);
5141 
5142   SMLoc S = Parser.getTok().getLoc();
5143   if (Slot == -1)
5144     return MatchOperand_ParseFail;
5145 
5146   Parser.Lex();
5147   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5148                                               AMDGPUOperand::ImmTyInterpSlot));
5149   return MatchOperand_Success;
5150 }
5151 
5152 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5153   if (getLexer().getKind() != AsmToken::Identifier)
5154     return MatchOperand_NoMatch;
5155 
5156   StringRef Str = Parser.getTok().getString();
5157   if (!Str.startswith("attr"))
5158     return MatchOperand_NoMatch;
5159 
5160   StringRef Chan = Str.take_back(2);
5161   int AttrChan = StringSwitch<int>(Chan)
5162     .Case(".x", 0)
5163     .Case(".y", 1)
5164     .Case(".z", 2)
5165     .Case(".w", 3)
5166     .Default(-1);
5167   if (AttrChan == -1)
5168     return MatchOperand_ParseFail;
5169 
5170   Str = Str.drop_back(2).drop_front(4);
5171 
5172   uint8_t Attr;
5173   if (Str.getAsInteger(10, Attr))
5174     return MatchOperand_ParseFail;
5175 
5176   SMLoc S = Parser.getTok().getLoc();
5177   Parser.Lex();
5178   if (Attr > 63) {
5179     Error(S, "out of bounds attr");
5180     return MatchOperand_Success;
5181   }
5182 
5183   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5184 
5185   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5186                                               AMDGPUOperand::ImmTyInterpAttr));
5187   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5188                                               AMDGPUOperand::ImmTyAttrChan));
5189   return MatchOperand_Success;
5190 }
5191 
5192 //===----------------------------------------------------------------------===//
5193 // exp
5194 //===----------------------------------------------------------------------===//
5195 
5196 void AMDGPUAsmParser::errorExpTgt() {
5197   Error(Parser.getTok().getLoc(), "invalid exp target");
5198 }
5199 
5200 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5201                                                       uint8_t &Val) {
5202   if (Str == "null") {
5203     Val = 9;
5204     return MatchOperand_Success;
5205   }
5206 
5207   if (Str.startswith("mrt")) {
5208     Str = Str.drop_front(3);
5209     if (Str == "z") { // == mrtz
5210       Val = 8;
5211       return MatchOperand_Success;
5212     }
5213 
5214     if (Str.getAsInteger(10, Val))
5215       return MatchOperand_ParseFail;
5216 
5217     if (Val > 7)
5218       errorExpTgt();
5219 
5220     return MatchOperand_Success;
5221   }
5222 
5223   if (Str.startswith("pos")) {
5224     Str = Str.drop_front(3);
5225     if (Str.getAsInteger(10, Val))
5226       return MatchOperand_ParseFail;
5227 
5228     if (Val > 4 || (Val == 4 && !isGFX10()))
5229       errorExpTgt();
5230 
5231     Val += 12;
5232     return MatchOperand_Success;
5233   }
5234 
5235   if (isGFX10() && Str == "prim") {
5236     Val = 20;
5237     return MatchOperand_Success;
5238   }
5239 
5240   if (Str.startswith("param")) {
5241     Str = Str.drop_front(5);
5242     if (Str.getAsInteger(10, Val))
5243       return MatchOperand_ParseFail;
5244 
5245     if (Val >= 32)
5246       errorExpTgt();
5247 
5248     Val += 32;
5249     return MatchOperand_Success;
5250   }
5251 
5252   if (Str.startswith("invalid_target_")) {
5253     Str = Str.drop_front(15);
5254     if (Str.getAsInteger(10, Val))
5255       return MatchOperand_ParseFail;
5256 
5257     errorExpTgt();
5258     return MatchOperand_Success;
5259   }
5260 
5261   return MatchOperand_NoMatch;
5262 }
5263 
5264 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5265   uint8_t Val;
5266   StringRef Str = Parser.getTok().getString();
5267 
5268   auto Res = parseExpTgtImpl(Str, Val);
5269   if (Res != MatchOperand_Success)
5270     return Res;
5271 
5272   SMLoc S = Parser.getTok().getLoc();
5273   Parser.Lex();
5274 
5275   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5276                                               AMDGPUOperand::ImmTyExpTgt));
5277   return MatchOperand_Success;
5278 }
5279 
5280 //===----------------------------------------------------------------------===//
5281 // parser helpers
5282 //===----------------------------------------------------------------------===//
5283 
5284 bool
5285 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5286   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5287 }
5288 
5289 bool
5290 AMDGPUAsmParser::isId(const StringRef Id) const {
5291   return isId(getToken(), Id);
5292 }
5293 
5294 bool
5295 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5296   return getTokenKind() == Kind;
5297 }
5298 
5299 bool
5300 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5301   if (isId(Id)) {
5302     lex();
5303     return true;
5304   }
5305   return false;
5306 }
5307 
5308 bool
5309 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5310   if (isId(Id) && peekToken().is(Kind)) {
5311     lex();
5312     lex();
5313     return true;
5314   }
5315   return false;
5316 }
5317 
5318 bool
5319 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5320   if (isToken(Kind)) {
5321     lex();
5322     return true;
5323   }
5324   return false;
5325 }
5326 
5327 bool
5328 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5329                            const StringRef ErrMsg) {
5330   if (!trySkipToken(Kind)) {
5331     Error(getLoc(), ErrMsg);
5332     return false;
5333   }
5334   return true;
5335 }
5336 
5337 bool
5338 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5339   return !getParser().parseAbsoluteExpression(Imm);
5340 }
5341 
5342 bool
5343 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5344   SMLoc S = getLoc();
5345 
5346   const MCExpr *Expr;
5347   if (Parser.parseExpression(Expr))
5348     return false;
5349 
5350   int64_t IntVal;
5351   if (Expr->evaluateAsAbsolute(IntVal)) {
5352     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5353   } else {
5354     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5355   }
5356   return true;
5357 }
5358 
5359 bool
5360 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5361   if (isToken(AsmToken::String)) {
5362     Val = getToken().getStringContents();
5363     lex();
5364     return true;
5365   } else {
5366     Error(getLoc(), ErrMsg);
5367     return false;
5368   }
5369 }
5370 
5371 AsmToken
5372 AMDGPUAsmParser::getToken() const {
5373   return Parser.getTok();
5374 }
5375 
5376 AsmToken
5377 AMDGPUAsmParser::peekToken() {
5378   return getLexer().peekTok();
5379 }
5380 
5381 void
5382 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5383   auto TokCount = getLexer().peekTokens(Tokens);
5384 
5385   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5386     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5387 }
5388 
5389 AsmToken::TokenKind
5390 AMDGPUAsmParser::getTokenKind() const {
5391   return getLexer().getKind();
5392 }
5393 
5394 SMLoc
5395 AMDGPUAsmParser::getLoc() const {
5396   return getToken().getLoc();
5397 }
5398 
5399 StringRef
5400 AMDGPUAsmParser::getTokenStr() const {
5401   return getToken().getString();
5402 }
5403 
5404 void
5405 AMDGPUAsmParser::lex() {
5406   Parser.Lex();
5407 }
5408 
5409 //===----------------------------------------------------------------------===//
5410 // swizzle
5411 //===----------------------------------------------------------------------===//
5412 
5413 LLVM_READNONE
5414 static unsigned
5415 encodeBitmaskPerm(const unsigned AndMask,
5416                   const unsigned OrMask,
5417                   const unsigned XorMask) {
5418   using namespace llvm::AMDGPU::Swizzle;
5419 
5420   return BITMASK_PERM_ENC |
5421          (AndMask << BITMASK_AND_SHIFT) |
5422          (OrMask  << BITMASK_OR_SHIFT)  |
5423          (XorMask << BITMASK_XOR_SHIFT);
5424 }
5425 
5426 bool
5427 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5428                                       const unsigned MinVal,
5429                                       const unsigned MaxVal,
5430                                       const StringRef ErrMsg) {
5431   for (unsigned i = 0; i < OpNum; ++i) {
5432     if (!skipToken(AsmToken::Comma, "expected a comma")){
5433       return false;
5434     }
5435     SMLoc ExprLoc = Parser.getTok().getLoc();
5436     if (!parseExpr(Op[i])) {
5437       return false;
5438     }
5439     if (Op[i] < MinVal || Op[i] > MaxVal) {
5440       Error(ExprLoc, ErrMsg);
5441       return false;
5442     }
5443   }
5444 
5445   return true;
5446 }
5447 
5448 bool
5449 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5450   using namespace llvm::AMDGPU::Swizzle;
5451 
5452   int64_t Lane[LANE_NUM];
5453   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5454                            "expected a 2-bit lane id")) {
5455     Imm = QUAD_PERM_ENC;
5456     for (unsigned I = 0; I < LANE_NUM; ++I) {
5457       Imm |= Lane[I] << (LANE_SHIFT * I);
5458     }
5459     return true;
5460   }
5461   return false;
5462 }
5463 
5464 bool
5465 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5466   using namespace llvm::AMDGPU::Swizzle;
5467 
5468   SMLoc S = Parser.getTok().getLoc();
5469   int64_t GroupSize;
5470   int64_t LaneIdx;
5471 
5472   if (!parseSwizzleOperands(1, &GroupSize,
5473                             2, 32,
5474                             "group size must be in the interval [2,32]")) {
5475     return false;
5476   }
5477   if (!isPowerOf2_64(GroupSize)) {
5478     Error(S, "group size must be a power of two");
5479     return false;
5480   }
5481   if (parseSwizzleOperands(1, &LaneIdx,
5482                            0, GroupSize - 1,
5483                            "lane id must be in the interval [0,group size - 1]")) {
5484     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5485     return true;
5486   }
5487   return false;
5488 }
5489 
5490 bool
5491 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5492   using namespace llvm::AMDGPU::Swizzle;
5493 
5494   SMLoc S = Parser.getTok().getLoc();
5495   int64_t GroupSize;
5496 
5497   if (!parseSwizzleOperands(1, &GroupSize,
5498       2, 32, "group size must be in the interval [2,32]")) {
5499     return false;
5500   }
5501   if (!isPowerOf2_64(GroupSize)) {
5502     Error(S, "group size must be a power of two");
5503     return false;
5504   }
5505 
5506   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5507   return true;
5508 }
5509 
5510 bool
5511 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5512   using namespace llvm::AMDGPU::Swizzle;
5513 
5514   SMLoc S = Parser.getTok().getLoc();
5515   int64_t GroupSize;
5516 
5517   if (!parseSwizzleOperands(1, &GroupSize,
5518       1, 16, "group size must be in the interval [1,16]")) {
5519     return false;
5520   }
5521   if (!isPowerOf2_64(GroupSize)) {
5522     Error(S, "group size must be a power of two");
5523     return false;
5524   }
5525 
5526   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5527   return true;
5528 }
5529 
5530 bool
5531 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5532   using namespace llvm::AMDGPU::Swizzle;
5533 
5534   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5535     return false;
5536   }
5537 
5538   StringRef Ctl;
5539   SMLoc StrLoc = Parser.getTok().getLoc();
5540   if (!parseString(Ctl)) {
5541     return false;
5542   }
5543   if (Ctl.size() != BITMASK_WIDTH) {
5544     Error(StrLoc, "expected a 5-character mask");
5545     return false;
5546   }
5547 
5548   unsigned AndMask = 0;
5549   unsigned OrMask = 0;
5550   unsigned XorMask = 0;
5551 
5552   for (size_t i = 0; i < Ctl.size(); ++i) {
5553     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5554     switch(Ctl[i]) {
5555     default:
5556       Error(StrLoc, "invalid mask");
5557       return false;
5558     case '0':
5559       break;
5560     case '1':
5561       OrMask |= Mask;
5562       break;
5563     case 'p':
5564       AndMask |= Mask;
5565       break;
5566     case 'i':
5567       AndMask |= Mask;
5568       XorMask |= Mask;
5569       break;
5570     }
5571   }
5572 
5573   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5574   return true;
5575 }
5576 
5577 bool
5578 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5579 
5580   SMLoc OffsetLoc = Parser.getTok().getLoc();
5581 
5582   if (!parseExpr(Imm)) {
5583     return false;
5584   }
5585   if (!isUInt<16>(Imm)) {
5586     Error(OffsetLoc, "expected a 16-bit offset");
5587     return false;
5588   }
5589   return true;
5590 }
5591 
5592 bool
5593 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5594   using namespace llvm::AMDGPU::Swizzle;
5595 
5596   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5597 
5598     SMLoc ModeLoc = Parser.getTok().getLoc();
5599     bool Ok = false;
5600 
5601     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5602       Ok = parseSwizzleQuadPerm(Imm);
5603     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5604       Ok = parseSwizzleBitmaskPerm(Imm);
5605     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5606       Ok = parseSwizzleBroadcast(Imm);
5607     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5608       Ok = parseSwizzleSwap(Imm);
5609     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5610       Ok = parseSwizzleReverse(Imm);
5611     } else {
5612       Error(ModeLoc, "expected a swizzle mode");
5613     }
5614 
5615     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5616   }
5617 
5618   return false;
5619 }
5620 
5621 OperandMatchResultTy
5622 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5623   SMLoc S = Parser.getTok().getLoc();
5624   int64_t Imm = 0;
5625 
5626   if (trySkipId("offset")) {
5627 
5628     bool Ok = false;
5629     if (skipToken(AsmToken::Colon, "expected a colon")) {
5630       if (trySkipId("swizzle")) {
5631         Ok = parseSwizzleMacro(Imm);
5632       } else {
5633         Ok = parseSwizzleOffset(Imm);
5634       }
5635     }
5636 
5637     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5638 
5639     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5640   } else {
5641     // Swizzle "offset" operand is optional.
5642     // If it is omitted, try parsing other optional operands.
5643     return parseOptionalOpr(Operands);
5644   }
5645 }
5646 
5647 bool
5648 AMDGPUOperand::isSwizzle() const {
5649   return isImmTy(ImmTySwizzle);
5650 }
5651 
5652 //===----------------------------------------------------------------------===//
5653 // VGPR Index Mode
5654 //===----------------------------------------------------------------------===//
5655 
5656 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5657 
5658   using namespace llvm::AMDGPU::VGPRIndexMode;
5659 
5660   if (trySkipToken(AsmToken::RParen)) {
5661     return OFF;
5662   }
5663 
5664   int64_t Imm = 0;
5665 
5666   while (true) {
5667     unsigned Mode = 0;
5668     SMLoc S = Parser.getTok().getLoc();
5669 
5670     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5671       if (trySkipId(IdSymbolic[ModeId])) {
5672         Mode = 1 << ModeId;
5673         break;
5674       }
5675     }
5676 
5677     if (Mode == 0) {
5678       Error(S, (Imm == 0)?
5679                "expected a VGPR index mode or a closing parenthesis" :
5680                "expected a VGPR index mode");
5681       break;
5682     }
5683 
5684     if (Imm & Mode) {
5685       Error(S, "duplicate VGPR index mode");
5686       break;
5687     }
5688     Imm |= Mode;
5689 
5690     if (trySkipToken(AsmToken::RParen))
5691       break;
5692     if (!skipToken(AsmToken::Comma,
5693                    "expected a comma or a closing parenthesis"))
5694       break;
5695   }
5696 
5697   return Imm;
5698 }
5699 
5700 OperandMatchResultTy
5701 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5702 
5703   int64_t Imm = 0;
5704   SMLoc S = Parser.getTok().getLoc();
5705 
5706   if (getLexer().getKind() == AsmToken::Identifier &&
5707       Parser.getTok().getString() == "gpr_idx" &&
5708       getLexer().peekTok().is(AsmToken::LParen)) {
5709 
5710     Parser.Lex();
5711     Parser.Lex();
5712 
5713     // If parse failed, trigger an error but do not return error code
5714     // to avoid excessive error messages.
5715     Imm = parseGPRIdxMacro();
5716 
5717   } else {
5718     if (getParser().parseAbsoluteExpression(Imm))
5719       return MatchOperand_NoMatch;
5720     if (Imm < 0 || !isUInt<4>(Imm)) {
5721       Error(S, "invalid immediate: only 4-bit values are legal");
5722     }
5723   }
5724 
5725   Operands.push_back(
5726       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5727   return MatchOperand_Success;
5728 }
5729 
5730 bool AMDGPUOperand::isGPRIdxMode() const {
5731   return isImmTy(ImmTyGprIdxMode);
5732 }
5733 
5734 //===----------------------------------------------------------------------===//
5735 // sopp branch targets
5736 //===----------------------------------------------------------------------===//
5737 
5738 OperandMatchResultTy
5739 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5740 
5741   // Make sure we are not parsing something
5742   // that looks like a label or an expression but is not.
5743   // This will improve error messages.
5744   if (isRegister() || isModifier())
5745     return MatchOperand_NoMatch;
5746 
5747   if (parseExpr(Operands)) {
5748 
5749     AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5750     assert(Opr.isImm() || Opr.isExpr());
5751     SMLoc Loc = Opr.getStartLoc();
5752 
5753     // Currently we do not support arbitrary expressions as branch targets.
5754     // Only labels and absolute expressions are accepted.
5755     if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5756       Error(Loc, "expected an absolute expression or a label");
5757     } else if (Opr.isImm() && !Opr.isS16Imm()) {
5758       Error(Loc, "expected a 16-bit signed jump offset");
5759     }
5760   }
5761 
5762   return MatchOperand_Success; // avoid excessive error messages
5763 }
5764 
5765 //===----------------------------------------------------------------------===//
5766 // Boolean holding registers
5767 //===----------------------------------------------------------------------===//
5768 
5769 OperandMatchResultTy
5770 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5771   return parseReg(Operands);
5772 }
5773 
5774 //===----------------------------------------------------------------------===//
5775 // mubuf
5776 //===----------------------------------------------------------------------===//
5777 
5778 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5779   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5780 }
5781 
5782 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5783   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5784 }
5785 
5786 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5787   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5788 }
5789 
5790 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5791                                const OperandVector &Operands,
5792                                bool IsAtomic,
5793                                bool IsAtomicReturn,
5794                                bool IsLds) {
5795   bool IsLdsOpcode = IsLds;
5796   bool HasLdsModifier = false;
5797   OptionalImmIndexMap OptionalIdx;
5798   assert(IsAtomicReturn ? IsAtomic : true);
5799   unsigned FirstOperandIdx = 1;
5800 
5801   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5802     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5803 
5804     // Add the register arguments
5805     if (Op.isReg()) {
5806       Op.addRegOperands(Inst, 1);
5807       // Insert a tied src for atomic return dst.
5808       // This cannot be postponed as subsequent calls to
5809       // addImmOperands rely on correct number of MC operands.
5810       if (IsAtomicReturn && i == FirstOperandIdx)
5811         Op.addRegOperands(Inst, 1);
5812       continue;
5813     }
5814 
5815     // Handle the case where soffset is an immediate
5816     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5817       Op.addImmOperands(Inst, 1);
5818       continue;
5819     }
5820 
5821     HasLdsModifier |= Op.isLDS();
5822 
5823     // Handle tokens like 'offen' which are sometimes hard-coded into the
5824     // asm string.  There are no MCInst operands for these.
5825     if (Op.isToken()) {
5826       continue;
5827     }
5828     assert(Op.isImm());
5829 
5830     // Handle optional arguments
5831     OptionalIdx[Op.getImmTy()] = i;
5832   }
5833 
5834   // This is a workaround for an llvm quirk which may result in an
5835   // incorrect instruction selection. Lds and non-lds versions of
5836   // MUBUF instructions are identical except that lds versions
5837   // have mandatory 'lds' modifier. However this modifier follows
5838   // optional modifiers and llvm asm matcher regards this 'lds'
5839   // modifier as an optional one. As a result, an lds version
5840   // of opcode may be selected even if it has no 'lds' modifier.
5841   if (IsLdsOpcode && !HasLdsModifier) {
5842     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5843     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5844       Inst.setOpcode(NoLdsOpcode);
5845       IsLdsOpcode = false;
5846     }
5847   }
5848 
5849   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5850   if (!IsAtomic) { // glc is hard-coded.
5851     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5852   }
5853   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5854 
5855   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5856     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5857   }
5858 
5859   if (isGFX10())
5860     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5861 }
5862 
5863 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5864   OptionalImmIndexMap OptionalIdx;
5865 
5866   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5867     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5868 
5869     // Add the register arguments
5870     if (Op.isReg()) {
5871       Op.addRegOperands(Inst, 1);
5872       continue;
5873     }
5874 
5875     // Handle the case where soffset is an immediate
5876     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5877       Op.addImmOperands(Inst, 1);
5878       continue;
5879     }
5880 
5881     // Handle tokens like 'offen' which are sometimes hard-coded into the
5882     // asm string.  There are no MCInst operands for these.
5883     if (Op.isToken()) {
5884       continue;
5885     }
5886     assert(Op.isImm());
5887 
5888     // Handle optional arguments
5889     OptionalIdx[Op.getImmTy()] = i;
5890   }
5891 
5892   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5893                         AMDGPUOperand::ImmTyOffset);
5894   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5895   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5896   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5897   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5898 
5899   if (isGFX10())
5900     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5901 }
5902 
5903 //===----------------------------------------------------------------------===//
5904 // mimg
5905 //===----------------------------------------------------------------------===//
5906 
5907 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5908                               bool IsAtomic) {
5909   unsigned I = 1;
5910   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5911   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5912     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5913   }
5914 
5915   if (IsAtomic) {
5916     // Add src, same as dst
5917     assert(Desc.getNumDefs() == 1);
5918     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5919   }
5920 
5921   OptionalImmIndexMap OptionalIdx;
5922 
5923   for (unsigned E = Operands.size(); I != E; ++I) {
5924     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5925 
5926     // Add the register arguments
5927     if (Op.isReg()) {
5928       Op.addRegOperands(Inst, 1);
5929     } else if (Op.isImmModifier()) {
5930       OptionalIdx[Op.getImmTy()] = I;
5931     } else if (!Op.isToken()) {
5932       llvm_unreachable("unexpected operand type");
5933     }
5934   }
5935 
5936   bool IsGFX10 = isGFX10();
5937 
5938   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5939   if (IsGFX10)
5940     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5941   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5942   if (IsGFX10)
5943     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5944   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5945   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5946   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5947   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5948   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5949   if (!IsGFX10)
5950     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5951   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5952 }
5953 
5954 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5955   cvtMIMG(Inst, Operands, true);
5956 }
5957 
5958 //===----------------------------------------------------------------------===//
5959 // smrd
5960 //===----------------------------------------------------------------------===//
5961 
5962 bool AMDGPUOperand::isSMRDOffset8() const {
5963   return isImm() && isUInt<8>(getImm());
5964 }
5965 
5966 bool AMDGPUOperand::isSMRDOffset20() const {
5967   return isImm() && isUInt<20>(getImm());
5968 }
5969 
5970 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5971   // 32-bit literals are only supported on CI and we only want to use them
5972   // when the offset is > 8-bits.
5973   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5974 }
5975 
5976 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5977   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5978 }
5979 
5980 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5981   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5982 }
5983 
5984 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5985   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5986 }
5987 
5988 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
5989   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5990 }
5991 
5992 //===----------------------------------------------------------------------===//
5993 // vop3
5994 //===----------------------------------------------------------------------===//
5995 
5996 static bool ConvertOmodMul(int64_t &Mul) {
5997   if (Mul != 1 && Mul != 2 && Mul != 4)
5998     return false;
5999 
6000   Mul >>= 1;
6001   return true;
6002 }
6003 
6004 static bool ConvertOmodDiv(int64_t &Div) {
6005   if (Div == 1) {
6006     Div = 0;
6007     return true;
6008   }
6009 
6010   if (Div == 2) {
6011     Div = 3;
6012     return true;
6013   }
6014 
6015   return false;
6016 }
6017 
6018 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6019   if (BoundCtrl == 0) {
6020     BoundCtrl = 1;
6021     return true;
6022   }
6023 
6024   if (BoundCtrl == -1) {
6025     BoundCtrl = 0;
6026     return true;
6027   }
6028 
6029   return false;
6030 }
6031 
6032 // Note: the order in this table matches the order of operands in AsmString.
6033 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6034   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6035   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6036   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6037   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6038   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6039   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6040   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6041   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6042   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6043   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6044   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
6045   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6046   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6047   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6048   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6049   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6050   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6051   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6052   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6053   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6054   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6055   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6056   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6057   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6058   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6059   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6060   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6061   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6062   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6063   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6064   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6065   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6066   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6067   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6068   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6069   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6070   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6071   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6072   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6073   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6074   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6075   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6076   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6077   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6078 };
6079 
6080 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6081 
6082   OperandMatchResultTy res = parseOptionalOpr(Operands);
6083 
6084   // This is a hack to enable hardcoded mandatory operands which follow
6085   // optional operands.
6086   //
6087   // Current design assumes that all operands after the first optional operand
6088   // are also optional. However implementation of some instructions violates
6089   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6090   //
6091   // To alleviate this problem, we have to (implicitly) parse extra operands
6092   // to make sure autogenerated parser of custom operands never hit hardcoded
6093   // mandatory operands.
6094 
6095   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6096     if (res != MatchOperand_Success ||
6097         isToken(AsmToken::EndOfStatement))
6098       break;
6099 
6100     trySkipToken(AsmToken::Comma);
6101     res = parseOptionalOpr(Operands);
6102   }
6103 
6104   return res;
6105 }
6106 
6107 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6108   OperandMatchResultTy res;
6109   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6110     // try to parse any optional operand here
6111     if (Op.IsBit) {
6112       res = parseNamedBit(Op.Name, Operands, Op.Type);
6113     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6114       res = parseOModOperand(Operands);
6115     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6116                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6117                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6118       res = parseSDWASel(Operands, Op.Name, Op.Type);
6119     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6120       res = parseSDWADstUnused(Operands);
6121     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6122                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6123                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6124                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6125       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6126                                         Op.ConvertResult);
6127     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6128       res = parseDim(Operands);
6129     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6130       res = parseDfmtNfmt(Operands);
6131     } else {
6132       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6133     }
6134     if (res != MatchOperand_NoMatch) {
6135       return res;
6136     }
6137   }
6138   return MatchOperand_NoMatch;
6139 }
6140 
6141 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6142   StringRef Name = Parser.getTok().getString();
6143   if (Name == "mul") {
6144     return parseIntWithPrefix("mul", Operands,
6145                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6146   }
6147 
6148   if (Name == "div") {
6149     return parseIntWithPrefix("div", Operands,
6150                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6151   }
6152 
6153   return MatchOperand_NoMatch;
6154 }
6155 
6156 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6157   cvtVOP3P(Inst, Operands);
6158 
6159   int Opc = Inst.getOpcode();
6160 
6161   int SrcNum;
6162   const int Ops[] = { AMDGPU::OpName::src0,
6163                       AMDGPU::OpName::src1,
6164                       AMDGPU::OpName::src2 };
6165   for (SrcNum = 0;
6166        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6167        ++SrcNum);
6168   assert(SrcNum > 0);
6169 
6170   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6171   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6172 
6173   if ((OpSel & (1 << SrcNum)) != 0) {
6174     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6175     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6176     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6177   }
6178 }
6179 
6180 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6181       // 1. This operand is input modifiers
6182   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6183       // 2. This is not last operand
6184       && Desc.NumOperands > (OpNum + 1)
6185       // 3. Next operand is register class
6186       && Desc.OpInfo[OpNum + 1].RegClass != -1
6187       // 4. Next register is not tied to any other operand
6188       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6189 }
6190 
6191 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6192 {
6193   OptionalImmIndexMap OptionalIdx;
6194   unsigned Opc = Inst.getOpcode();
6195 
6196   unsigned I = 1;
6197   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6198   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6199     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6200   }
6201 
6202   for (unsigned E = Operands.size(); I != E; ++I) {
6203     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6204     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6205       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6206     } else if (Op.isInterpSlot() ||
6207                Op.isInterpAttr() ||
6208                Op.isAttrChan()) {
6209       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6210     } else if (Op.isImmModifier()) {
6211       OptionalIdx[Op.getImmTy()] = I;
6212     } else {
6213       llvm_unreachable("unhandled operand type");
6214     }
6215   }
6216 
6217   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6218     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6219   }
6220 
6221   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6222     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6223   }
6224 
6225   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6226     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6227   }
6228 }
6229 
6230 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6231                               OptionalImmIndexMap &OptionalIdx) {
6232   unsigned Opc = Inst.getOpcode();
6233 
6234   unsigned I = 1;
6235   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6236   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6237     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6238   }
6239 
6240   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6241     // This instruction has src modifiers
6242     for (unsigned E = Operands.size(); I != E; ++I) {
6243       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6244       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6245         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6246       } else if (Op.isImmModifier()) {
6247         OptionalIdx[Op.getImmTy()] = I;
6248       } else if (Op.isRegOrImm()) {
6249         Op.addRegOrImmOperands(Inst, 1);
6250       } else {
6251         llvm_unreachable("unhandled operand type");
6252       }
6253     }
6254   } else {
6255     // No src modifiers
6256     for (unsigned E = Operands.size(); I != E; ++I) {
6257       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6258       if (Op.isMod()) {
6259         OptionalIdx[Op.getImmTy()] = I;
6260       } else {
6261         Op.addRegOrImmOperands(Inst, 1);
6262       }
6263     }
6264   }
6265 
6266   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6267     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6268   }
6269 
6270   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6271     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6272   }
6273 
6274   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6275   // it has src2 register operand that is tied to dst operand
6276   // we don't allow modifiers for this operand in assembler so src2_modifiers
6277   // should be 0.
6278   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6279       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6280       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6281       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6282       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6283       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6284       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6285     auto it = Inst.begin();
6286     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6287     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6288     ++it;
6289     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6290   }
6291 }
6292 
6293 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6294   OptionalImmIndexMap OptionalIdx;
6295   cvtVOP3(Inst, Operands, OptionalIdx);
6296 }
6297 
6298 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6299                                const OperandVector &Operands) {
6300   OptionalImmIndexMap OptIdx;
6301   const int Opc = Inst.getOpcode();
6302   const MCInstrDesc &Desc = MII.get(Opc);
6303 
6304   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6305 
6306   cvtVOP3(Inst, Operands, OptIdx);
6307 
6308   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6309     assert(!IsPacked);
6310     Inst.addOperand(Inst.getOperand(0));
6311   }
6312 
6313   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6314   // instruction, and then figure out where to actually put the modifiers
6315 
6316   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6317 
6318   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6319   if (OpSelHiIdx != -1) {
6320     int DefaultVal = IsPacked ? -1 : 0;
6321     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6322                           DefaultVal);
6323   }
6324 
6325   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6326   if (NegLoIdx != -1) {
6327     assert(IsPacked);
6328     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6329     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6330   }
6331 
6332   const int Ops[] = { AMDGPU::OpName::src0,
6333                       AMDGPU::OpName::src1,
6334                       AMDGPU::OpName::src2 };
6335   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6336                          AMDGPU::OpName::src1_modifiers,
6337                          AMDGPU::OpName::src2_modifiers };
6338 
6339   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6340 
6341   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6342   unsigned OpSelHi = 0;
6343   unsigned NegLo = 0;
6344   unsigned NegHi = 0;
6345 
6346   if (OpSelHiIdx != -1) {
6347     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6348   }
6349 
6350   if (NegLoIdx != -1) {
6351     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6352     NegLo = Inst.getOperand(NegLoIdx).getImm();
6353     NegHi = Inst.getOperand(NegHiIdx).getImm();
6354   }
6355 
6356   for (int J = 0; J < 3; ++J) {
6357     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6358     if (OpIdx == -1)
6359       break;
6360 
6361     uint32_t ModVal = 0;
6362 
6363     if ((OpSel & (1 << J)) != 0)
6364       ModVal |= SISrcMods::OP_SEL_0;
6365 
6366     if ((OpSelHi & (1 << J)) != 0)
6367       ModVal |= SISrcMods::OP_SEL_1;
6368 
6369     if ((NegLo & (1 << J)) != 0)
6370       ModVal |= SISrcMods::NEG;
6371 
6372     if ((NegHi & (1 << J)) != 0)
6373       ModVal |= SISrcMods::NEG_HI;
6374 
6375     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6376 
6377     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6378   }
6379 }
6380 
6381 //===----------------------------------------------------------------------===//
6382 // dpp
6383 //===----------------------------------------------------------------------===//
6384 
6385 bool AMDGPUOperand::isDPP8() const {
6386   return isImmTy(ImmTyDPP8);
6387 }
6388 
6389 bool AMDGPUOperand::isDPPCtrl() const {
6390   using namespace AMDGPU::DPP;
6391 
6392   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6393   if (result) {
6394     int64_t Imm = getImm();
6395     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6396            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6397            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6398            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6399            (Imm == DppCtrl::WAVE_SHL1) ||
6400            (Imm == DppCtrl::WAVE_ROL1) ||
6401            (Imm == DppCtrl::WAVE_SHR1) ||
6402            (Imm == DppCtrl::WAVE_ROR1) ||
6403            (Imm == DppCtrl::ROW_MIRROR) ||
6404            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6405            (Imm == DppCtrl::BCAST15) ||
6406            (Imm == DppCtrl::BCAST31) ||
6407            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6408            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6409   }
6410   return false;
6411 }
6412 
6413 //===----------------------------------------------------------------------===//
6414 // mAI
6415 //===----------------------------------------------------------------------===//
6416 
6417 bool AMDGPUOperand::isBLGP() const {
6418   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6419 }
6420 
6421 bool AMDGPUOperand::isCBSZ() const {
6422   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6423 }
6424 
6425 bool AMDGPUOperand::isABID() const {
6426   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6427 }
6428 
6429 bool AMDGPUOperand::isS16Imm() const {
6430   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6431 }
6432 
6433 bool AMDGPUOperand::isU16Imm() const {
6434   return isImm() && isUInt<16>(getImm());
6435 }
6436 
6437 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6438   if (!isGFX10())
6439     return MatchOperand_NoMatch;
6440 
6441   SMLoc S = Parser.getTok().getLoc();
6442 
6443   if (getLexer().isNot(AsmToken::Identifier))
6444     return MatchOperand_NoMatch;
6445   if (getLexer().getTok().getString() != "dim")
6446     return MatchOperand_NoMatch;
6447 
6448   Parser.Lex();
6449   if (getLexer().isNot(AsmToken::Colon))
6450     return MatchOperand_ParseFail;
6451 
6452   Parser.Lex();
6453 
6454   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6455   // integer.
6456   std::string Token;
6457   if (getLexer().is(AsmToken::Integer)) {
6458     SMLoc Loc = getLexer().getTok().getEndLoc();
6459     Token = getLexer().getTok().getString();
6460     Parser.Lex();
6461     if (getLexer().getTok().getLoc() != Loc)
6462       return MatchOperand_ParseFail;
6463   }
6464   if (getLexer().isNot(AsmToken::Identifier))
6465     return MatchOperand_ParseFail;
6466   Token += getLexer().getTok().getString();
6467 
6468   StringRef DimId = Token;
6469   if (DimId.startswith("SQ_RSRC_IMG_"))
6470     DimId = DimId.substr(12);
6471 
6472   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6473   if (!DimInfo)
6474     return MatchOperand_ParseFail;
6475 
6476   Parser.Lex();
6477 
6478   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6479                                               AMDGPUOperand::ImmTyDim));
6480   return MatchOperand_Success;
6481 }
6482 
6483 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6484   SMLoc S = Parser.getTok().getLoc();
6485   StringRef Prefix;
6486 
6487   if (getLexer().getKind() == AsmToken::Identifier) {
6488     Prefix = Parser.getTok().getString();
6489   } else {
6490     return MatchOperand_NoMatch;
6491   }
6492 
6493   if (Prefix != "dpp8")
6494     return parseDPPCtrl(Operands);
6495   if (!isGFX10())
6496     return MatchOperand_NoMatch;
6497 
6498   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6499 
6500   int64_t Sels[8];
6501 
6502   Parser.Lex();
6503   if (getLexer().isNot(AsmToken::Colon))
6504     return MatchOperand_ParseFail;
6505 
6506   Parser.Lex();
6507   if (getLexer().isNot(AsmToken::LBrac))
6508     return MatchOperand_ParseFail;
6509 
6510   Parser.Lex();
6511   if (getParser().parseAbsoluteExpression(Sels[0]))
6512     return MatchOperand_ParseFail;
6513   if (0 > Sels[0] || 7 < Sels[0])
6514     return MatchOperand_ParseFail;
6515 
6516   for (size_t i = 1; i < 8; ++i) {
6517     if (getLexer().isNot(AsmToken::Comma))
6518       return MatchOperand_ParseFail;
6519 
6520     Parser.Lex();
6521     if (getParser().parseAbsoluteExpression(Sels[i]))
6522       return MatchOperand_ParseFail;
6523     if (0 > Sels[i] || 7 < Sels[i])
6524       return MatchOperand_ParseFail;
6525   }
6526 
6527   if (getLexer().isNot(AsmToken::RBrac))
6528     return MatchOperand_ParseFail;
6529   Parser.Lex();
6530 
6531   unsigned DPP8 = 0;
6532   for (size_t i = 0; i < 8; ++i)
6533     DPP8 |= (Sels[i] << (i * 3));
6534 
6535   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6536   return MatchOperand_Success;
6537 }
6538 
6539 OperandMatchResultTy
6540 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6541   using namespace AMDGPU::DPP;
6542 
6543   SMLoc S = Parser.getTok().getLoc();
6544   StringRef Prefix;
6545   int64_t Int;
6546 
6547   if (getLexer().getKind() == AsmToken::Identifier) {
6548     Prefix = Parser.getTok().getString();
6549   } else {
6550     return MatchOperand_NoMatch;
6551   }
6552 
6553   if (Prefix == "row_mirror") {
6554     Int = DppCtrl::ROW_MIRROR;
6555     Parser.Lex();
6556   } else if (Prefix == "row_half_mirror") {
6557     Int = DppCtrl::ROW_HALF_MIRROR;
6558     Parser.Lex();
6559   } else {
6560     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6561     if (Prefix != "quad_perm"
6562         && Prefix != "row_shl"
6563         && Prefix != "row_shr"
6564         && Prefix != "row_ror"
6565         && Prefix != "wave_shl"
6566         && Prefix != "wave_rol"
6567         && Prefix != "wave_shr"
6568         && Prefix != "wave_ror"
6569         && Prefix != "row_bcast"
6570         && Prefix != "row_share"
6571         && Prefix != "row_xmask") {
6572       return MatchOperand_NoMatch;
6573     }
6574 
6575     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6576       return MatchOperand_NoMatch;
6577 
6578     if (!isVI() && !isGFX9() &&
6579         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6580          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6581          Prefix == "row_bcast"))
6582       return MatchOperand_NoMatch;
6583 
6584     Parser.Lex();
6585     if (getLexer().isNot(AsmToken::Colon))
6586       return MatchOperand_ParseFail;
6587 
6588     if (Prefix == "quad_perm") {
6589       // quad_perm:[%d,%d,%d,%d]
6590       Parser.Lex();
6591       if (getLexer().isNot(AsmToken::LBrac))
6592         return MatchOperand_ParseFail;
6593       Parser.Lex();
6594 
6595       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6596         return MatchOperand_ParseFail;
6597 
6598       for (int i = 0; i < 3; ++i) {
6599         if (getLexer().isNot(AsmToken::Comma))
6600           return MatchOperand_ParseFail;
6601         Parser.Lex();
6602 
6603         int64_t Temp;
6604         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6605           return MatchOperand_ParseFail;
6606         const int shift = i*2 + 2;
6607         Int += (Temp << shift);
6608       }
6609 
6610       if (getLexer().isNot(AsmToken::RBrac))
6611         return MatchOperand_ParseFail;
6612       Parser.Lex();
6613     } else {
6614       // sel:%d
6615       Parser.Lex();
6616       if (getParser().parseAbsoluteExpression(Int))
6617         return MatchOperand_ParseFail;
6618 
6619       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6620         Int |= DppCtrl::ROW_SHL0;
6621       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6622         Int |= DppCtrl::ROW_SHR0;
6623       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6624         Int |= DppCtrl::ROW_ROR0;
6625       } else if (Prefix == "wave_shl" && 1 == Int) {
6626         Int = DppCtrl::WAVE_SHL1;
6627       } else if (Prefix == "wave_rol" && 1 == Int) {
6628         Int = DppCtrl::WAVE_ROL1;
6629       } else if (Prefix == "wave_shr" && 1 == Int) {
6630         Int = DppCtrl::WAVE_SHR1;
6631       } else if (Prefix == "wave_ror" && 1 == Int) {
6632         Int = DppCtrl::WAVE_ROR1;
6633       } else if (Prefix == "row_bcast") {
6634         if (Int == 15) {
6635           Int = DppCtrl::BCAST15;
6636         } else if (Int == 31) {
6637           Int = DppCtrl::BCAST31;
6638         } else {
6639           return MatchOperand_ParseFail;
6640         }
6641       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6642         Int |= DppCtrl::ROW_SHARE_FIRST;
6643       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6644         Int |= DppCtrl::ROW_XMASK_FIRST;
6645       } else {
6646         return MatchOperand_ParseFail;
6647       }
6648     }
6649   }
6650 
6651   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6652   return MatchOperand_Success;
6653 }
6654 
6655 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6656   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6657 }
6658 
6659 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6660   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6661 }
6662 
6663 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6664   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6665 }
6666 
6667 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6668   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6669 }
6670 
6671 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6672   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6673 }
6674 
6675 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6676   OptionalImmIndexMap OptionalIdx;
6677 
6678   unsigned I = 1;
6679   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6680   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6681     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6682   }
6683 
6684   int Fi = 0;
6685   for (unsigned E = Operands.size(); I != E; ++I) {
6686     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6687                                             MCOI::TIED_TO);
6688     if (TiedTo != -1) {
6689       assert((unsigned)TiedTo < Inst.getNumOperands());
6690       // handle tied old or src2 for MAC instructions
6691       Inst.addOperand(Inst.getOperand(TiedTo));
6692     }
6693     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6694     // Add the register arguments
6695     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6696       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6697       // Skip it.
6698       continue;
6699     }
6700 
6701     if (IsDPP8) {
6702       if (Op.isDPP8()) {
6703         Op.addImmOperands(Inst, 1);
6704       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6705         Op.addRegWithFPInputModsOperands(Inst, 2);
6706       } else if (Op.isFI()) {
6707         Fi = Op.getImm();
6708       } else if (Op.isReg()) {
6709         Op.addRegOperands(Inst, 1);
6710       } else {
6711         llvm_unreachable("Invalid operand type");
6712       }
6713     } else {
6714       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6715         Op.addRegWithFPInputModsOperands(Inst, 2);
6716       } else if (Op.isDPPCtrl()) {
6717         Op.addImmOperands(Inst, 1);
6718       } else if (Op.isImm()) {
6719         // Handle optional arguments
6720         OptionalIdx[Op.getImmTy()] = I;
6721       } else {
6722         llvm_unreachable("Invalid operand type");
6723       }
6724     }
6725   }
6726 
6727   if (IsDPP8) {
6728     using namespace llvm::AMDGPU::DPP;
6729     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6730   } else {
6731     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6732     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6733     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6734     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6735       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6736     }
6737   }
6738 }
6739 
6740 //===----------------------------------------------------------------------===//
6741 // sdwa
6742 //===----------------------------------------------------------------------===//
6743 
6744 OperandMatchResultTy
6745 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6746                               AMDGPUOperand::ImmTy Type) {
6747   using namespace llvm::AMDGPU::SDWA;
6748 
6749   SMLoc S = Parser.getTok().getLoc();
6750   StringRef Value;
6751   OperandMatchResultTy res;
6752 
6753   res = parseStringWithPrefix(Prefix, Value);
6754   if (res != MatchOperand_Success) {
6755     return res;
6756   }
6757 
6758   int64_t Int;
6759   Int = StringSwitch<int64_t>(Value)
6760         .Case("BYTE_0", SdwaSel::BYTE_0)
6761         .Case("BYTE_1", SdwaSel::BYTE_1)
6762         .Case("BYTE_2", SdwaSel::BYTE_2)
6763         .Case("BYTE_3", SdwaSel::BYTE_3)
6764         .Case("WORD_0", SdwaSel::WORD_0)
6765         .Case("WORD_1", SdwaSel::WORD_1)
6766         .Case("DWORD", SdwaSel::DWORD)
6767         .Default(0xffffffff);
6768   Parser.Lex(); // eat last token
6769 
6770   if (Int == 0xffffffff) {
6771     return MatchOperand_ParseFail;
6772   }
6773 
6774   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6775   return MatchOperand_Success;
6776 }
6777 
6778 OperandMatchResultTy
6779 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6780   using namespace llvm::AMDGPU::SDWA;
6781 
6782   SMLoc S = Parser.getTok().getLoc();
6783   StringRef Value;
6784   OperandMatchResultTy res;
6785 
6786   res = parseStringWithPrefix("dst_unused", Value);
6787   if (res != MatchOperand_Success) {
6788     return res;
6789   }
6790 
6791   int64_t Int;
6792   Int = StringSwitch<int64_t>(Value)
6793         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6794         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6795         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6796         .Default(0xffffffff);
6797   Parser.Lex(); // eat last token
6798 
6799   if (Int == 0xffffffff) {
6800     return MatchOperand_ParseFail;
6801   }
6802 
6803   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6804   return MatchOperand_Success;
6805 }
6806 
6807 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6808   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6809 }
6810 
6811 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6812   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6813 }
6814 
6815 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6816   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6817 }
6818 
6819 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6820   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6821 }
6822 
6823 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6824                               uint64_t BasicInstType, bool skipVcc) {
6825   using namespace llvm::AMDGPU::SDWA;
6826 
6827   OptionalImmIndexMap OptionalIdx;
6828   bool skippedVcc = false;
6829 
6830   unsigned I = 1;
6831   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6832   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6833     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6834   }
6835 
6836   for (unsigned E = Operands.size(); I != E; ++I) {
6837     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6838     if (skipVcc && !skippedVcc && Op.isReg() &&
6839         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6840       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6841       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6842       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6843       // Skip VCC only if we didn't skip it on previous iteration.
6844       if (BasicInstType == SIInstrFlags::VOP2 &&
6845           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6846         skippedVcc = true;
6847         continue;
6848       } else if (BasicInstType == SIInstrFlags::VOPC &&
6849                  Inst.getNumOperands() == 0) {
6850         skippedVcc = true;
6851         continue;
6852       }
6853     }
6854     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6855       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6856     } else if (Op.isImm()) {
6857       // Handle optional arguments
6858       OptionalIdx[Op.getImmTy()] = I;
6859     } else {
6860       llvm_unreachable("Invalid operand type");
6861     }
6862     skippedVcc = false;
6863   }
6864 
6865   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6866       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6867       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6868     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6869     switch (BasicInstType) {
6870     case SIInstrFlags::VOP1:
6871       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6872       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6873         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6874       }
6875       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6876       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6877       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6878       break;
6879 
6880     case SIInstrFlags::VOP2:
6881       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6882       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6883         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6884       }
6885       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6886       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6887       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6888       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6889       break;
6890 
6891     case SIInstrFlags::VOPC:
6892       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6893         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6894       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6895       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6896       break;
6897 
6898     default:
6899       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6900     }
6901   }
6902 
6903   // special case v_mac_{f16, f32}:
6904   // it has src2 register operand that is tied to dst operand
6905   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6906       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6907     auto it = Inst.begin();
6908     std::advance(
6909       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6910     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6911   }
6912 }
6913 
6914 //===----------------------------------------------------------------------===//
6915 // mAI
6916 //===----------------------------------------------------------------------===//
6917 
6918 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
6919   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
6920 }
6921 
6922 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
6923   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
6924 }
6925 
6926 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
6927   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
6928 }
6929 
6930 /// Force static initialization.
6931 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6932   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6933   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6934 }
6935 
6936 #define GET_REGISTER_MATCHER
6937 #define GET_MATCHER_IMPLEMENTATION
6938 #define GET_MNEMONIC_SPELL_CHECKER
6939 #include "AMDGPUGenAsmMatcher.inc"
6940 
6941 // This fuction should be defined after auto-generated include so that we have
6942 // MatchClassKind enum defined
6943 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6944                                                      unsigned Kind) {
6945   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6946   // But MatchInstructionImpl() expects to meet token and fails to validate
6947   // operand. This method checks if we are given immediate operand but expect to
6948   // get corresponding token.
6949   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6950   switch (Kind) {
6951   case MCK_addr64:
6952     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6953   case MCK_gds:
6954     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6955   case MCK_lds:
6956     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6957   case MCK_glc:
6958     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6959   case MCK_idxen:
6960     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6961   case MCK_offen:
6962     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6963   case MCK_SSrcB32:
6964     // When operands have expression values, they will return true for isToken,
6965     // because it is not possible to distinguish between a token and an
6966     // expression at parse time. MatchInstructionImpl() will always try to
6967     // match an operand as a token, when isToken returns true, and when the
6968     // name of the expression is not a valid token, the match will fail,
6969     // so we need to handle it here.
6970     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6971   case MCK_SSrcF32:
6972     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6973   case MCK_SoppBrTarget:
6974     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6975   case MCK_VReg32OrOff:
6976     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6977   case MCK_InterpSlot:
6978     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6979   case MCK_Attr:
6980     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6981   case MCK_AttrChan:
6982     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6983   case MCK_SReg_64:
6984   case MCK_SReg_64_XEXEC:
6985     // Null is defined as a 32-bit register but
6986     // it should also be enabled with 64-bit operands.
6987     // The following code enables it for SReg_64 operands
6988     // used as source and destination. Remaining source
6989     // operands are handled in isInlinableImm.
6990     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
6991   default:
6992     return Match_InvalidOperand;
6993   }
6994 }
6995 
6996 //===----------------------------------------------------------------------===//
6997 // endpgm
6998 //===----------------------------------------------------------------------===//
6999 
7000 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7001   SMLoc S = Parser.getTok().getLoc();
7002   int64_t Imm = 0;
7003 
7004   if (!parseExpr(Imm)) {
7005     // The operand is optional, if not present default to 0
7006     Imm = 0;
7007   }
7008 
7009   if (!isUInt<16>(Imm)) {
7010     Error(S, "expected a 16-bit value");
7011     return MatchOperand_ParseFail;
7012   }
7013 
7014   Operands.push_back(
7015       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7016   return MatchOperand_Success;
7017 }
7018 
7019 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7020