1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCInstrDesc.h"
29 #include "llvm/MC/MCParser/MCAsmLexer.h"
30 #include "llvm/MC/MCParser/MCAsmParser.h"
31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
32 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/MC/TargetRegistry.h"
35 #include "llvm/Support/AMDGPUMetadata.h"
36 #include "llvm/Support/AMDHSAKernelDescriptor.h"
37 #include "llvm/Support/Casting.h"
38 #include "llvm/Support/MachineValueType.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/Support/TargetParser.h"
41
42 using namespace llvm;
43 using namespace llvm::AMDGPU;
44 using namespace llvm::amdhsa;
45
46 namespace {
47
48 class AMDGPUAsmParser;
49
50 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
51
52 //===----------------------------------------------------------------------===//
53 // Operand
54 //===----------------------------------------------------------------------===//
55
56 class AMDGPUOperand : public MCParsedAsmOperand {
57 enum KindTy {
58 Token,
59 Immediate,
60 Register,
61 Expression
62 } Kind;
63
64 SMLoc StartLoc, EndLoc;
65 const AMDGPUAsmParser *AsmParser;
66
67 public:
AMDGPUOperand(KindTy Kind_,const AMDGPUAsmParser * AsmParser_)68 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
69 : Kind(Kind_), AsmParser(AsmParser_) {}
70
71 using Ptr = std::unique_ptr<AMDGPUOperand>;
72
73 struct Modifiers {
74 bool Abs = false;
75 bool Neg = false;
76 bool Sext = false;
77
hasFPModifiers__anon71e405cc0111::AMDGPUOperand::Modifiers78 bool hasFPModifiers() const { return Abs || Neg; }
hasIntModifiers__anon71e405cc0111::AMDGPUOperand::Modifiers79 bool hasIntModifiers() const { return Sext; }
hasModifiers__anon71e405cc0111::AMDGPUOperand::Modifiers80 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
81
getFPModifiersOperand__anon71e405cc0111::AMDGPUOperand::Modifiers82 int64_t getFPModifiersOperand() const {
83 int64_t Operand = 0;
84 Operand |= Abs ? SISrcMods::ABS : 0u;
85 Operand |= Neg ? SISrcMods::NEG : 0u;
86 return Operand;
87 }
88
getIntModifiersOperand__anon71e405cc0111::AMDGPUOperand::Modifiers89 int64_t getIntModifiersOperand() const {
90 int64_t Operand = 0;
91 Operand |= Sext ? SISrcMods::SEXT : 0u;
92 return Operand;
93 }
94
getModifiersOperand__anon71e405cc0111::AMDGPUOperand::Modifiers95 int64_t getModifiersOperand() const {
96 assert(!(hasFPModifiers() && hasIntModifiers())
97 && "fp and int modifiers should not be used simultaneously");
98 if (hasFPModifiers()) {
99 return getFPModifiersOperand();
100 } else if (hasIntModifiers()) {
101 return getIntModifiersOperand();
102 } else {
103 return 0;
104 }
105 }
106
107 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
108 };
109
110 enum ImmTy {
111 ImmTyNone,
112 ImmTyGDS,
113 ImmTyLDS,
114 ImmTyOffen,
115 ImmTyIdxen,
116 ImmTyAddr64,
117 ImmTyOffset,
118 ImmTyInstOffset,
119 ImmTyOffset0,
120 ImmTyOffset1,
121 ImmTyCPol,
122 ImmTySWZ,
123 ImmTyTFE,
124 ImmTyD16,
125 ImmTyClampSI,
126 ImmTyOModSI,
127 ImmTySdwaDstSel,
128 ImmTySdwaSrc0Sel,
129 ImmTySdwaSrc1Sel,
130 ImmTySdwaDstUnused,
131 ImmTyDMask,
132 ImmTyDim,
133 ImmTyUNorm,
134 ImmTyDA,
135 ImmTyR128A16,
136 ImmTyA16,
137 ImmTyLWE,
138 ImmTyExpTgt,
139 ImmTyExpCompr,
140 ImmTyExpVM,
141 ImmTyFORMAT,
142 ImmTyHwreg,
143 ImmTyOff,
144 ImmTySendMsg,
145 ImmTyInterpSlot,
146 ImmTyInterpAttr,
147 ImmTyAttrChan,
148 ImmTyOpSel,
149 ImmTyOpSelHi,
150 ImmTyNegLo,
151 ImmTyNegHi,
152 ImmTyDPP8,
153 ImmTyDppCtrl,
154 ImmTyDppRowMask,
155 ImmTyDppBankMask,
156 ImmTyDppBoundCtrl,
157 ImmTyDppFi,
158 ImmTySwizzle,
159 ImmTyGprIdxMode,
160 ImmTyHigh,
161 ImmTyBLGP,
162 ImmTyCBSZ,
163 ImmTyABID,
164 ImmTyEndpgm,
165 ImmTyWaitVDST,
166 ImmTyWaitEXP,
167 };
168
169 enum ImmKindTy {
170 ImmKindTyNone,
171 ImmKindTyLiteral,
172 ImmKindTyConst,
173 };
174
175 private:
176 struct TokOp {
177 const char *Data;
178 unsigned Length;
179 };
180
181 struct ImmOp {
182 int64_t Val;
183 ImmTy Type;
184 bool IsFPImm;
185 mutable ImmKindTy Kind;
186 Modifiers Mods;
187 };
188
189 struct RegOp {
190 unsigned RegNo;
191 Modifiers Mods;
192 };
193
194 union {
195 TokOp Tok;
196 ImmOp Imm;
197 RegOp Reg;
198 const MCExpr *Expr;
199 };
200
201 public:
isToken() const202 bool isToken() const override {
203 if (Kind == Token)
204 return true;
205
206 // When parsing operands, we can't always tell if something was meant to be
207 // a token, like 'gds', or an expression that references a global variable.
208 // In this case, we assume the string is an expression, and if we need to
209 // interpret is a token, then we treat the symbol name as the token.
210 return isSymbolRefExpr();
211 }
212
isSymbolRefExpr() const213 bool isSymbolRefExpr() const {
214 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
215 }
216
isImm() const217 bool isImm() const override {
218 return Kind == Immediate;
219 }
220
setImmKindNone() const221 void setImmKindNone() const {
222 assert(isImm());
223 Imm.Kind = ImmKindTyNone;
224 }
225
setImmKindLiteral() const226 void setImmKindLiteral() const {
227 assert(isImm());
228 Imm.Kind = ImmKindTyLiteral;
229 }
230
setImmKindConst() const231 void setImmKindConst() const {
232 assert(isImm());
233 Imm.Kind = ImmKindTyConst;
234 }
235
IsImmKindLiteral() const236 bool IsImmKindLiteral() const {
237 return isImm() && Imm.Kind == ImmKindTyLiteral;
238 }
239
isImmKindConst() const240 bool isImmKindConst() const {
241 return isImm() && Imm.Kind == ImmKindTyConst;
242 }
243
244 bool isInlinableImm(MVT type) const;
245 bool isLiteralImm(MVT type) const;
246
isRegKind() const247 bool isRegKind() const {
248 return Kind == Register;
249 }
250
isReg() const251 bool isReg() const override {
252 return isRegKind() && !hasModifiers();
253 }
254
isRegOrInline(unsigned RCID,MVT type) const255 bool isRegOrInline(unsigned RCID, MVT type) const {
256 return isRegClass(RCID) || isInlinableImm(type);
257 }
258
isRegOrImmWithInputMods(unsigned RCID,MVT type) const259 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
260 return isRegOrInline(RCID, type) || isLiteralImm(type);
261 }
262
isRegOrImmWithInt16InputMods() const263 bool isRegOrImmWithInt16InputMods() const {
264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
265 }
266
isRegOrImmWithInt32InputMods() const267 bool isRegOrImmWithInt32InputMods() const {
268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
269 }
270
isRegOrInlineImmWithInt16InputMods() const271 bool isRegOrInlineImmWithInt16InputMods() const {
272 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
273 }
274
isRegOrInlineImmWithInt32InputMods() const275 bool isRegOrInlineImmWithInt32InputMods() const {
276 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
277 }
278
isRegOrImmWithInt64InputMods() const279 bool isRegOrImmWithInt64InputMods() const {
280 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
281 }
282
isRegOrImmWithFP16InputMods() const283 bool isRegOrImmWithFP16InputMods() const {
284 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
285 }
286
isRegOrImmWithFP32InputMods() const287 bool isRegOrImmWithFP32InputMods() const {
288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
289 }
290
isRegOrImmWithFP64InputMods() const291 bool isRegOrImmWithFP64InputMods() const {
292 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
293 }
294
isRegOrInlineImmWithFP16InputMods() const295 bool isRegOrInlineImmWithFP16InputMods() const {
296 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
297 }
298
isRegOrInlineImmWithFP32InputMods() const299 bool isRegOrInlineImmWithFP32InputMods() const {
300 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
301 }
302
303
isVReg() const304 bool isVReg() const {
305 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
306 isRegClass(AMDGPU::VReg_64RegClassID) ||
307 isRegClass(AMDGPU::VReg_96RegClassID) ||
308 isRegClass(AMDGPU::VReg_128RegClassID) ||
309 isRegClass(AMDGPU::VReg_160RegClassID) ||
310 isRegClass(AMDGPU::VReg_192RegClassID) ||
311 isRegClass(AMDGPU::VReg_256RegClassID) ||
312 isRegClass(AMDGPU::VReg_512RegClassID) ||
313 isRegClass(AMDGPU::VReg_1024RegClassID);
314 }
315
isVReg32() const316 bool isVReg32() const {
317 return isRegClass(AMDGPU::VGPR_32RegClassID);
318 }
319
isVReg32OrOff() const320 bool isVReg32OrOff() const {
321 return isOff() || isVReg32();
322 }
323
isNull() const324 bool isNull() const {
325 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
326 }
327
328 bool isVRegWithInputMods() const;
329
330 bool isSDWAOperand(MVT type) const;
331 bool isSDWAFP16Operand() const;
332 bool isSDWAFP32Operand() const;
333 bool isSDWAInt16Operand() const;
334 bool isSDWAInt32Operand() const;
335
isImmTy(ImmTy ImmT) const336 bool isImmTy(ImmTy ImmT) const {
337 return isImm() && Imm.Type == ImmT;
338 }
339
isImmModifier() const340 bool isImmModifier() const {
341 return isImm() && Imm.Type != ImmTyNone;
342 }
343
isClampSI() const344 bool isClampSI() const { return isImmTy(ImmTyClampSI); }
isOModSI() const345 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
isDMask() const346 bool isDMask() const { return isImmTy(ImmTyDMask); }
isDim() const347 bool isDim() const { return isImmTy(ImmTyDim); }
isUNorm() const348 bool isUNorm() const { return isImmTy(ImmTyUNorm); }
isDA() const349 bool isDA() const { return isImmTy(ImmTyDA); }
isR128A16() const350 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
isGFX10A16() const351 bool isGFX10A16() const { return isImmTy(ImmTyA16); }
isLWE() const352 bool isLWE() const { return isImmTy(ImmTyLWE); }
isOff() const353 bool isOff() const { return isImmTy(ImmTyOff); }
isExpTgt() const354 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
isExpVM() const355 bool isExpVM() const { return isImmTy(ImmTyExpVM); }
isExpCompr() const356 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
isOffen() const357 bool isOffen() const { return isImmTy(ImmTyOffen); }
isIdxen() const358 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
isAddr64() const359 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
isOffset() const360 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
isOffset0() const361 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
isOffset1() const362 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
363
isFlatOffset() const364 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
isGDS() const365 bool isGDS() const { return isImmTy(ImmTyGDS); }
isLDS() const366 bool isLDS() const { return isImmTy(ImmTyLDS); }
isCPol() const367 bool isCPol() const { return isImmTy(ImmTyCPol); }
isSWZ() const368 bool isSWZ() const { return isImmTy(ImmTySWZ); }
isTFE() const369 bool isTFE() const { return isImmTy(ImmTyTFE); }
isD16() const370 bool isD16() const { return isImmTy(ImmTyD16); }
isFORMAT() const371 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
isBankMask() const372 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
isRowMask() const373 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
isBoundCtrl() const374 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
isFI() const375 bool isFI() const { return isImmTy(ImmTyDppFi); }
isSDWADstSel() const376 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
isSDWASrc0Sel() const377 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
isSDWASrc1Sel() const378 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
isSDWADstUnused() const379 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
isInterpSlot() const380 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
isInterpAttr() const381 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
isAttrChan() const382 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
isOpSel() const383 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
isOpSelHi() const384 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
isNegLo() const385 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
isNegHi() const386 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
isHigh() const387 bool isHigh() const { return isImmTy(ImmTyHigh); }
388
isMod() const389 bool isMod() const {
390 return isClampSI() || isOModSI();
391 }
392
isRegOrImm() const393 bool isRegOrImm() const {
394 return isReg() || isImm();
395 }
396
397 bool isRegClass(unsigned RCID) const;
398
399 bool isInlineValue() const;
400
isRegOrInlineNoMods(unsigned RCID,MVT type) const401 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
402 return isRegOrInline(RCID, type) && !hasModifiers();
403 }
404
isSCSrcB16() const405 bool isSCSrcB16() const {
406 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
407 }
408
isSCSrcV2B16() const409 bool isSCSrcV2B16() const {
410 return isSCSrcB16();
411 }
412
isSCSrcB32() const413 bool isSCSrcB32() const {
414 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
415 }
416
isSCSrcB64() const417 bool isSCSrcB64() const {
418 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
419 }
420
421 bool isBoolReg() const;
422
isSCSrcF16() const423 bool isSCSrcF16() const {
424 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
425 }
426
isSCSrcV2F16() const427 bool isSCSrcV2F16() const {
428 return isSCSrcF16();
429 }
430
isSCSrcF32() const431 bool isSCSrcF32() const {
432 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
433 }
434
isSCSrcF64() const435 bool isSCSrcF64() const {
436 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
437 }
438
isSSrcB32() const439 bool isSSrcB32() const {
440 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
441 }
442
isSSrcB16() const443 bool isSSrcB16() const {
444 return isSCSrcB16() || isLiteralImm(MVT::i16);
445 }
446
isSSrcV2B16() const447 bool isSSrcV2B16() const {
448 llvm_unreachable("cannot happen");
449 return isSSrcB16();
450 }
451
isSSrcB64() const452 bool isSSrcB64() const {
453 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
454 // See isVSrc64().
455 return isSCSrcB64() || isLiteralImm(MVT::i64);
456 }
457
isSSrcF32() const458 bool isSSrcF32() const {
459 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
460 }
461
isSSrcF64() const462 bool isSSrcF64() const {
463 return isSCSrcB64() || isLiteralImm(MVT::f64);
464 }
465
isSSrcF16() const466 bool isSSrcF16() const {
467 return isSCSrcB16() || isLiteralImm(MVT::f16);
468 }
469
isSSrcV2F16() const470 bool isSSrcV2F16() const {
471 llvm_unreachable("cannot happen");
472 return isSSrcF16();
473 }
474
isSSrcV2FP32() const475 bool isSSrcV2FP32() const {
476 llvm_unreachable("cannot happen");
477 return isSSrcF32();
478 }
479
isSCSrcV2FP32() const480 bool isSCSrcV2FP32() const {
481 llvm_unreachable("cannot happen");
482 return isSCSrcF32();
483 }
484
isSSrcV2INT32() const485 bool isSSrcV2INT32() const {
486 llvm_unreachable("cannot happen");
487 return isSSrcB32();
488 }
489
isSCSrcV2INT32() const490 bool isSCSrcV2INT32() const {
491 llvm_unreachable("cannot happen");
492 return isSCSrcB32();
493 }
494
isSSrcOrLdsB32() const495 bool isSSrcOrLdsB32() const {
496 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
497 isLiteralImm(MVT::i32) || isExpr();
498 }
499
isVCSrcB32() const500 bool isVCSrcB32() const {
501 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
502 }
503
isVCSrcB64() const504 bool isVCSrcB64() const {
505 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
506 }
507
isVCSrcB16() const508 bool isVCSrcB16() const {
509 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
510 }
511
isVCSrcV2B16() const512 bool isVCSrcV2B16() const {
513 return isVCSrcB16();
514 }
515
isVCSrcF32() const516 bool isVCSrcF32() const {
517 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
518 }
519
isVCSrcF64() const520 bool isVCSrcF64() const {
521 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
522 }
523
isVCSrcF16() const524 bool isVCSrcF16() const {
525 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
526 }
527
isVCSrcV2F16() const528 bool isVCSrcV2F16() const {
529 return isVCSrcF16();
530 }
531
isVSrcB32() const532 bool isVSrcB32() const {
533 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
534 }
535
isVSrcB64() const536 bool isVSrcB64() const {
537 return isVCSrcF64() || isLiteralImm(MVT::i64);
538 }
539
isVSrcB16() const540 bool isVSrcB16() const {
541 return isVCSrcB16() || isLiteralImm(MVT::i16);
542 }
543
isVSrcV2B16() const544 bool isVSrcV2B16() const {
545 return isVSrcB16() || isLiteralImm(MVT::v2i16);
546 }
547
isVCSrcV2FP32() const548 bool isVCSrcV2FP32() const {
549 return isVCSrcF64();
550 }
551
isVSrcV2FP32() const552 bool isVSrcV2FP32() const {
553 return isVSrcF64() || isLiteralImm(MVT::v2f32);
554 }
555
isVCSrcV2INT32() const556 bool isVCSrcV2INT32() const {
557 return isVCSrcB64();
558 }
559
isVSrcV2INT32() const560 bool isVSrcV2INT32() const {
561 return isVSrcB64() || isLiteralImm(MVT::v2i32);
562 }
563
isVSrcF32() const564 bool isVSrcF32() const {
565 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
566 }
567
isVSrcF64() const568 bool isVSrcF64() const {
569 return isVCSrcF64() || isLiteralImm(MVT::f64);
570 }
571
isVSrcF16() const572 bool isVSrcF16() const {
573 return isVCSrcF16() || isLiteralImm(MVT::f16);
574 }
575
isVSrcV2F16() const576 bool isVSrcV2F16() const {
577 return isVSrcF16() || isLiteralImm(MVT::v2f16);
578 }
579
isVISrcB32() const580 bool isVISrcB32() const {
581 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
582 }
583
isVISrcB16() const584 bool isVISrcB16() const {
585 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
586 }
587
isVISrcV2B16() const588 bool isVISrcV2B16() const {
589 return isVISrcB16();
590 }
591
isVISrcF32() const592 bool isVISrcF32() const {
593 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
594 }
595
isVISrcF16() const596 bool isVISrcF16() const {
597 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
598 }
599
isVISrcV2F16() const600 bool isVISrcV2F16() const {
601 return isVISrcF16() || isVISrcB32();
602 }
603
isVISrc_64B64() const604 bool isVISrc_64B64() const {
605 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
606 }
607
isVISrc_64F64() const608 bool isVISrc_64F64() const {
609 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
610 }
611
isVISrc_64V2FP32() const612 bool isVISrc_64V2FP32() const {
613 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
614 }
615
isVISrc_64V2INT32() const616 bool isVISrc_64V2INT32() const {
617 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
618 }
619
isVISrc_256B64() const620 bool isVISrc_256B64() const {
621 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
622 }
623
isVISrc_256F64() const624 bool isVISrc_256F64() const {
625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
626 }
627
isVISrc_128B16() const628 bool isVISrc_128B16() const {
629 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
630 }
631
isVISrc_128V2B16() const632 bool isVISrc_128V2B16() const {
633 return isVISrc_128B16();
634 }
635
isVISrc_128B32() const636 bool isVISrc_128B32() const {
637 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
638 }
639
isVISrc_128F32() const640 bool isVISrc_128F32() const {
641 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
642 }
643
isVISrc_256V2FP32() const644 bool isVISrc_256V2FP32() const {
645 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
646 }
647
isVISrc_256V2INT32() const648 bool isVISrc_256V2INT32() const {
649 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
650 }
651
isVISrc_512B32() const652 bool isVISrc_512B32() const {
653 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
654 }
655
isVISrc_512B16() const656 bool isVISrc_512B16() const {
657 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
658 }
659
isVISrc_512V2B16() const660 bool isVISrc_512V2B16() const {
661 return isVISrc_512B16();
662 }
663
isVISrc_512F32() const664 bool isVISrc_512F32() const {
665 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
666 }
667
isVISrc_512F16() const668 bool isVISrc_512F16() const {
669 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
670 }
671
isVISrc_512V2F16() const672 bool isVISrc_512V2F16() const {
673 return isVISrc_512F16() || isVISrc_512B32();
674 }
675
isVISrc_1024B32() const676 bool isVISrc_1024B32() const {
677 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
678 }
679
isVISrc_1024B16() const680 bool isVISrc_1024B16() const {
681 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
682 }
683
isVISrc_1024V2B16() const684 bool isVISrc_1024V2B16() const {
685 return isVISrc_1024B16();
686 }
687
isVISrc_1024F32() const688 bool isVISrc_1024F32() const {
689 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
690 }
691
isVISrc_1024F16() const692 bool isVISrc_1024F16() const {
693 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
694 }
695
isVISrc_1024V2F16() const696 bool isVISrc_1024V2F16() const {
697 return isVISrc_1024F16() || isVISrc_1024B32();
698 }
699
isAISrcB32() const700 bool isAISrcB32() const {
701 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
702 }
703
isAISrcB16() const704 bool isAISrcB16() const {
705 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
706 }
707
isAISrcV2B16() const708 bool isAISrcV2B16() const {
709 return isAISrcB16();
710 }
711
isAISrcF32() const712 bool isAISrcF32() const {
713 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
714 }
715
isAISrcF16() const716 bool isAISrcF16() const {
717 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
718 }
719
isAISrcV2F16() const720 bool isAISrcV2F16() const {
721 return isAISrcF16() || isAISrcB32();
722 }
723
isAISrc_64B64() const724 bool isAISrc_64B64() const {
725 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
726 }
727
isAISrc_64F64() const728 bool isAISrc_64F64() const {
729 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
730 }
731
isAISrc_128B32() const732 bool isAISrc_128B32() const {
733 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
734 }
735
isAISrc_128B16() const736 bool isAISrc_128B16() const {
737 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
738 }
739
isAISrc_128V2B16() const740 bool isAISrc_128V2B16() const {
741 return isAISrc_128B16();
742 }
743
isAISrc_128F32() const744 bool isAISrc_128F32() const {
745 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
746 }
747
isAISrc_128F16() const748 bool isAISrc_128F16() const {
749 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
750 }
751
isAISrc_128V2F16() const752 bool isAISrc_128V2F16() const {
753 return isAISrc_128F16() || isAISrc_128B32();
754 }
755
isVISrc_128F16() const756 bool isVISrc_128F16() const {
757 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
758 }
759
isVISrc_128V2F16() const760 bool isVISrc_128V2F16() const {
761 return isVISrc_128F16() || isVISrc_128B32();
762 }
763
isAISrc_256B64() const764 bool isAISrc_256B64() const {
765 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
766 }
767
isAISrc_256F64() const768 bool isAISrc_256F64() const {
769 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
770 }
771
isAISrc_512B32() const772 bool isAISrc_512B32() const {
773 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
774 }
775
isAISrc_512B16() const776 bool isAISrc_512B16() const {
777 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
778 }
779
isAISrc_512V2B16() const780 bool isAISrc_512V2B16() const {
781 return isAISrc_512B16();
782 }
783
isAISrc_512F32() const784 bool isAISrc_512F32() const {
785 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
786 }
787
isAISrc_512F16() const788 bool isAISrc_512F16() const {
789 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
790 }
791
isAISrc_512V2F16() const792 bool isAISrc_512V2F16() const {
793 return isAISrc_512F16() || isAISrc_512B32();
794 }
795
isAISrc_1024B32() const796 bool isAISrc_1024B32() const {
797 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
798 }
799
isAISrc_1024B16() const800 bool isAISrc_1024B16() const {
801 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
802 }
803
isAISrc_1024V2B16() const804 bool isAISrc_1024V2B16() const {
805 return isAISrc_1024B16();
806 }
807
isAISrc_1024F32() const808 bool isAISrc_1024F32() const {
809 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
810 }
811
isAISrc_1024F16() const812 bool isAISrc_1024F16() const {
813 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
814 }
815
isAISrc_1024V2F16() const816 bool isAISrc_1024V2F16() const {
817 return isAISrc_1024F16() || isAISrc_1024B32();
818 }
819
isKImmFP32() const820 bool isKImmFP32() const {
821 return isLiteralImm(MVT::f32);
822 }
823
isKImmFP16() const824 bool isKImmFP16() const {
825 return isLiteralImm(MVT::f16);
826 }
827
isMem() const828 bool isMem() const override {
829 return false;
830 }
831
isExpr() const832 bool isExpr() const {
833 return Kind == Expression;
834 }
835
isSoppBrTarget() const836 bool isSoppBrTarget() const {
837 return isExpr() || isImm();
838 }
839
840 bool isSWaitCnt() const;
841 bool isDepCtr() const;
842 bool isSDelayAlu() const;
843 bool isHwreg() const;
844 bool isSendMsg() const;
845 bool isSwizzle() const;
846 bool isSMRDOffset8() const;
847 bool isSMEMOffset() const;
848 bool isSMRDLiteralOffset() const;
849 bool isDPP8() const;
850 bool isDPPCtrl() const;
851 bool isBLGP() const;
852 bool isCBSZ() const;
853 bool isABID() const;
854 bool isGPRIdxMode() const;
855 bool isS16Imm() const;
856 bool isU16Imm() const;
857 bool isEndpgm() const;
858 bool isWaitVDST() const;
859 bool isWaitEXP() const;
860
getExpressionAsToken() const861 StringRef getExpressionAsToken() const {
862 assert(isExpr());
863 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
864 return S->getSymbol().getName();
865 }
866
getToken() const867 StringRef getToken() const {
868 assert(isToken());
869
870 if (Kind == Expression)
871 return getExpressionAsToken();
872
873 return StringRef(Tok.Data, Tok.Length);
874 }
875
getImm() const876 int64_t getImm() const {
877 assert(isImm());
878 return Imm.Val;
879 }
880
setImm(int64_t Val)881 void setImm(int64_t Val) {
882 assert(isImm());
883 Imm.Val = Val;
884 }
885
getImmTy() const886 ImmTy getImmTy() const {
887 assert(isImm());
888 return Imm.Type;
889 }
890
getReg() const891 unsigned getReg() const override {
892 assert(isRegKind());
893 return Reg.RegNo;
894 }
895
getStartLoc() const896 SMLoc getStartLoc() const override {
897 return StartLoc;
898 }
899
getEndLoc() const900 SMLoc getEndLoc() const override {
901 return EndLoc;
902 }
903
getLocRange() const904 SMRange getLocRange() const {
905 return SMRange(StartLoc, EndLoc);
906 }
907
getModifiers() const908 Modifiers getModifiers() const {
909 assert(isRegKind() || isImmTy(ImmTyNone));
910 return isRegKind() ? Reg.Mods : Imm.Mods;
911 }
912
setModifiers(Modifiers Mods)913 void setModifiers(Modifiers Mods) {
914 assert(isRegKind() || isImmTy(ImmTyNone));
915 if (isRegKind())
916 Reg.Mods = Mods;
917 else
918 Imm.Mods = Mods;
919 }
920
hasModifiers() const921 bool hasModifiers() const {
922 return getModifiers().hasModifiers();
923 }
924
hasFPModifiers() const925 bool hasFPModifiers() const {
926 return getModifiers().hasFPModifiers();
927 }
928
hasIntModifiers() const929 bool hasIntModifiers() const {
930 return getModifiers().hasIntModifiers();
931 }
932
933 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
934
935 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
936
937 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
938
939 template <unsigned Bitwidth>
940 void addKImmFPOperands(MCInst &Inst, unsigned N) const;
941
addKImmFP16Operands(MCInst & Inst,unsigned N) const942 void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
943 addKImmFPOperands<16>(Inst, N);
944 }
945
addKImmFP32Operands(MCInst & Inst,unsigned N) const946 void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
947 addKImmFPOperands<32>(Inst, N);
948 }
949
950 void addRegOperands(MCInst &Inst, unsigned N) const;
951
addBoolRegOperands(MCInst & Inst,unsigned N) const952 void addBoolRegOperands(MCInst &Inst, unsigned N) const {
953 addRegOperands(Inst, N);
954 }
955
addRegOrImmOperands(MCInst & Inst,unsigned N) const956 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
957 if (isRegKind())
958 addRegOperands(Inst, N);
959 else if (isExpr())
960 Inst.addOperand(MCOperand::createExpr(Expr));
961 else
962 addImmOperands(Inst, N);
963 }
964
addRegOrImmWithInputModsOperands(MCInst & Inst,unsigned N) const965 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
966 Modifiers Mods = getModifiers();
967 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
968 if (isRegKind()) {
969 addRegOperands(Inst, N);
970 } else {
971 addImmOperands(Inst, N, false);
972 }
973 }
974
addRegOrImmWithFPInputModsOperands(MCInst & Inst,unsigned N) const975 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
976 assert(!hasIntModifiers());
977 addRegOrImmWithInputModsOperands(Inst, N);
978 }
979
addRegOrImmWithIntInputModsOperands(MCInst & Inst,unsigned N) const980 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
981 assert(!hasFPModifiers());
982 addRegOrImmWithInputModsOperands(Inst, N);
983 }
984
addRegWithInputModsOperands(MCInst & Inst,unsigned N) const985 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
986 Modifiers Mods = getModifiers();
987 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
988 assert(isRegKind());
989 addRegOperands(Inst, N);
990 }
991
addRegWithFPInputModsOperands(MCInst & Inst,unsigned N) const992 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
993 assert(!hasIntModifiers());
994 addRegWithInputModsOperands(Inst, N);
995 }
996
addRegWithIntInputModsOperands(MCInst & Inst,unsigned N) const997 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
998 assert(!hasFPModifiers());
999 addRegWithInputModsOperands(Inst, N);
1000 }
1001
addSoppBrTargetOperands(MCInst & Inst,unsigned N) const1002 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
1003 if (isImm())
1004 addImmOperands(Inst, N);
1005 else {
1006 assert(isExpr());
1007 Inst.addOperand(MCOperand::createExpr(Expr));
1008 }
1009 }
1010
printImmTy(raw_ostream & OS,ImmTy Type)1011 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1012 switch (Type) {
1013 case ImmTyNone: OS << "None"; break;
1014 case ImmTyGDS: OS << "GDS"; break;
1015 case ImmTyLDS: OS << "LDS"; break;
1016 case ImmTyOffen: OS << "Offen"; break;
1017 case ImmTyIdxen: OS << "Idxen"; break;
1018 case ImmTyAddr64: OS << "Addr64"; break;
1019 case ImmTyOffset: OS << "Offset"; break;
1020 case ImmTyInstOffset: OS << "InstOffset"; break;
1021 case ImmTyOffset0: OS << "Offset0"; break;
1022 case ImmTyOffset1: OS << "Offset1"; break;
1023 case ImmTyCPol: OS << "CPol"; break;
1024 case ImmTySWZ: OS << "SWZ"; break;
1025 case ImmTyTFE: OS << "TFE"; break;
1026 case ImmTyD16: OS << "D16"; break;
1027 case ImmTyFORMAT: OS << "FORMAT"; break;
1028 case ImmTyClampSI: OS << "ClampSI"; break;
1029 case ImmTyOModSI: OS << "OModSI"; break;
1030 case ImmTyDPP8: OS << "DPP8"; break;
1031 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1032 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1033 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1034 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1035 case ImmTyDppFi: OS << "FI"; break;
1036 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1037 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1038 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1039 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1040 case ImmTyDMask: OS << "DMask"; break;
1041 case ImmTyDim: OS << "Dim"; break;
1042 case ImmTyUNorm: OS << "UNorm"; break;
1043 case ImmTyDA: OS << "DA"; break;
1044 case ImmTyR128A16: OS << "R128A16"; break;
1045 case ImmTyA16: OS << "A16"; break;
1046 case ImmTyLWE: OS << "LWE"; break;
1047 case ImmTyOff: OS << "Off"; break;
1048 case ImmTyExpTgt: OS << "ExpTgt"; break;
1049 case ImmTyExpCompr: OS << "ExpCompr"; break;
1050 case ImmTyExpVM: OS << "ExpVM"; break;
1051 case ImmTyHwreg: OS << "Hwreg"; break;
1052 case ImmTySendMsg: OS << "SendMsg"; break;
1053 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1054 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1055 case ImmTyAttrChan: OS << "AttrChan"; break;
1056 case ImmTyOpSel: OS << "OpSel"; break;
1057 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1058 case ImmTyNegLo: OS << "NegLo"; break;
1059 case ImmTyNegHi: OS << "NegHi"; break;
1060 case ImmTySwizzle: OS << "Swizzle"; break;
1061 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1062 case ImmTyHigh: OS << "High"; break;
1063 case ImmTyBLGP: OS << "BLGP"; break;
1064 case ImmTyCBSZ: OS << "CBSZ"; break;
1065 case ImmTyABID: OS << "ABID"; break;
1066 case ImmTyEndpgm: OS << "Endpgm"; break;
1067 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1068 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1069 }
1070 }
1071
print(raw_ostream & OS) const1072 void print(raw_ostream &OS) const override {
1073 switch (Kind) {
1074 case Register:
1075 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1076 break;
1077 case Immediate:
1078 OS << '<' << getImm();
1079 if (getImmTy() != ImmTyNone) {
1080 OS << " type: "; printImmTy(OS, getImmTy());
1081 }
1082 OS << " mods: " << Imm.Mods << '>';
1083 break;
1084 case Token:
1085 OS << '\'' << getToken() << '\'';
1086 break;
1087 case Expression:
1088 OS << "<expr " << *Expr << '>';
1089 break;
1090 }
1091 }
1092
CreateImm(const AMDGPUAsmParser * AsmParser,int64_t Val,SMLoc Loc,ImmTy Type=ImmTyNone,bool IsFPImm=false)1093 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1094 int64_t Val, SMLoc Loc,
1095 ImmTy Type = ImmTyNone,
1096 bool IsFPImm = false) {
1097 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1098 Op->Imm.Val = Val;
1099 Op->Imm.IsFPImm = IsFPImm;
1100 Op->Imm.Kind = ImmKindTyNone;
1101 Op->Imm.Type = Type;
1102 Op->Imm.Mods = Modifiers();
1103 Op->StartLoc = Loc;
1104 Op->EndLoc = Loc;
1105 return Op;
1106 }
1107
CreateToken(const AMDGPUAsmParser * AsmParser,StringRef Str,SMLoc Loc,bool HasExplicitEncodingSize=true)1108 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1109 StringRef Str, SMLoc Loc,
1110 bool HasExplicitEncodingSize = true) {
1111 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1112 Res->Tok.Data = Str.data();
1113 Res->Tok.Length = Str.size();
1114 Res->StartLoc = Loc;
1115 Res->EndLoc = Loc;
1116 return Res;
1117 }
1118
CreateReg(const AMDGPUAsmParser * AsmParser,unsigned RegNo,SMLoc S,SMLoc E)1119 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1120 unsigned RegNo, SMLoc S,
1121 SMLoc E) {
1122 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1123 Op->Reg.RegNo = RegNo;
1124 Op->Reg.Mods = Modifiers();
1125 Op->StartLoc = S;
1126 Op->EndLoc = E;
1127 return Op;
1128 }
1129
CreateExpr(const AMDGPUAsmParser * AsmParser,const class MCExpr * Expr,SMLoc S)1130 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1131 const class MCExpr *Expr, SMLoc S) {
1132 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1133 Op->Expr = Expr;
1134 Op->StartLoc = S;
1135 Op->EndLoc = S;
1136 return Op;
1137 }
1138 };
1139
operator <<(raw_ostream & OS,AMDGPUOperand::Modifiers Mods)1140 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1141 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1142 return OS;
1143 }
1144
1145 //===----------------------------------------------------------------------===//
1146 // AsmParser
1147 //===----------------------------------------------------------------------===//
1148
1149 // Holds info related to the current kernel, e.g. count of SGPRs used.
1150 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1151 // .amdgpu_hsa_kernel or at EOF.
1152 class KernelScopeInfo {
1153 int SgprIndexUnusedMin = -1;
1154 int VgprIndexUnusedMin = -1;
1155 int AgprIndexUnusedMin = -1;
1156 MCContext *Ctx = nullptr;
1157 MCSubtargetInfo const *MSTI = nullptr;
1158
usesSgprAt(int i)1159 void usesSgprAt(int i) {
1160 if (i >= SgprIndexUnusedMin) {
1161 SgprIndexUnusedMin = ++i;
1162 if (Ctx) {
1163 MCSymbol* const Sym =
1164 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1165 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1166 }
1167 }
1168 }
1169
usesVgprAt(int i)1170 void usesVgprAt(int i) {
1171 if (i >= VgprIndexUnusedMin) {
1172 VgprIndexUnusedMin = ++i;
1173 if (Ctx) {
1174 MCSymbol* const Sym =
1175 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1176 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1177 VgprIndexUnusedMin);
1178 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1179 }
1180 }
1181 }
1182
usesAgprAt(int i)1183 void usesAgprAt(int i) {
1184 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1185 if (!hasMAIInsts(*MSTI))
1186 return;
1187
1188 if (i >= AgprIndexUnusedMin) {
1189 AgprIndexUnusedMin = ++i;
1190 if (Ctx) {
1191 MCSymbol* const Sym =
1192 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1193 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1194
1195 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1196 MCSymbol* const vSym =
1197 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1198 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1199 VgprIndexUnusedMin);
1200 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1201 }
1202 }
1203 }
1204
1205 public:
1206 KernelScopeInfo() = default;
1207
initialize(MCContext & Context)1208 void initialize(MCContext &Context) {
1209 Ctx = &Context;
1210 MSTI = Ctx->getSubtargetInfo();
1211
1212 usesSgprAt(SgprIndexUnusedMin = -1);
1213 usesVgprAt(VgprIndexUnusedMin = -1);
1214 if (hasMAIInsts(*MSTI)) {
1215 usesAgprAt(AgprIndexUnusedMin = -1);
1216 }
1217 }
1218
usesRegister(RegisterKind RegKind,unsigned DwordRegIndex,unsigned RegWidth)1219 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1220 unsigned RegWidth) {
1221 switch (RegKind) {
1222 case IS_SGPR:
1223 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1224 break;
1225 case IS_AGPR:
1226 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1227 break;
1228 case IS_VGPR:
1229 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1230 break;
1231 default:
1232 break;
1233 }
1234 }
1235 };
1236
1237 class AMDGPUAsmParser : public MCTargetAsmParser {
1238 MCAsmParser &Parser;
1239
1240 // Number of extra operands parsed after the first optional operand.
1241 // This may be necessary to skip hardcoded mandatory operands.
1242 static const unsigned MAX_OPR_LOOKAHEAD = 8;
1243
1244 unsigned ForcedEncodingSize = 0;
1245 bool ForcedDPP = false;
1246 bool ForcedSDWA = false;
1247 KernelScopeInfo KernelScope;
1248 unsigned CPolSeen;
1249
1250 /// @name Auto-generated Match Functions
1251 /// {
1252
1253 #define GET_ASSEMBLER_HEADER
1254 #include "AMDGPUGenAsmMatcher.inc"
1255
1256 /// }
1257
1258 private:
1259 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1260 bool OutOfRangeError(SMRange Range);
1261 /// Calculate VGPR/SGPR blocks required for given target, reserved
1262 /// registers, and user-specified NextFreeXGPR values.
1263 ///
1264 /// \param Features [in] Target features, used for bug corrections.
1265 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1266 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1267 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1268 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1269 /// descriptor field, if valid.
1270 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1271 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1272 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1273 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1274 /// \param VGPRBlocks [out] Result VGPR block count.
1275 /// \param SGPRBlocks [out] Result SGPR block count.
1276 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1277 bool FlatScrUsed, bool XNACKUsed,
1278 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1279 SMRange VGPRRange, unsigned NextFreeSGPR,
1280 SMRange SGPRRange, unsigned &VGPRBlocks,
1281 unsigned &SGPRBlocks);
1282 bool ParseDirectiveAMDGCNTarget();
1283 bool ParseDirectiveAMDHSAKernel();
1284 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1285 bool ParseDirectiveHSACodeObjectVersion();
1286 bool ParseDirectiveHSACodeObjectISA();
1287 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1288 bool ParseDirectiveAMDKernelCodeT();
1289 // TODO: Possibly make subtargetHasRegister const.
1290 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1291 bool ParseDirectiveAMDGPUHsaKernel();
1292
1293 bool ParseDirectiveISAVersion();
1294 bool ParseDirectiveHSAMetadata();
1295 bool ParseDirectivePALMetadataBegin();
1296 bool ParseDirectivePALMetadata();
1297 bool ParseDirectiveAMDGPULDS();
1298
1299 /// Common code to parse out a block of text (typically YAML) between start and
1300 /// end directives.
1301 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1302 const char *AssemblerDirectiveEnd,
1303 std::string &CollectString);
1304
1305 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1306 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1307 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1308 unsigned &RegNum, unsigned &RegWidth,
1309 bool RestoreOnFailure = false);
1310 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1311 unsigned &RegNum, unsigned &RegWidth,
1312 SmallVectorImpl<AsmToken> &Tokens);
1313 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1314 unsigned &RegWidth,
1315 SmallVectorImpl<AsmToken> &Tokens);
1316 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1317 unsigned &RegWidth,
1318 SmallVectorImpl<AsmToken> &Tokens);
1319 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1320 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1321 bool ParseRegRange(unsigned& Num, unsigned& Width);
1322 unsigned getRegularReg(RegisterKind RegKind,
1323 unsigned RegNum,
1324 unsigned RegWidth,
1325 SMLoc Loc);
1326
1327 bool isRegister();
1328 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1329 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1330 void initializeGprCountSymbol(RegisterKind RegKind);
1331 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1332 unsigned RegWidth);
1333 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1334 bool IsAtomic, bool IsLds = false);
1335 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1336 bool IsGdsHardcoded);
1337
1338 public:
1339 enum AMDGPUMatchResultTy {
1340 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1341 };
1342 enum OperandMode {
1343 OperandMode_Default,
1344 OperandMode_NSA,
1345 };
1346
1347 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1348
AMDGPUAsmParser(const MCSubtargetInfo & STI,MCAsmParser & _Parser,const MCInstrInfo & MII,const MCTargetOptions & Options)1349 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1350 const MCInstrInfo &MII,
1351 const MCTargetOptions &Options)
1352 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1353 MCAsmParserExtension::Initialize(Parser);
1354
1355 if (getFeatureBits().none()) {
1356 // Set default features.
1357 copySTI().ToggleFeature("southern-islands");
1358 }
1359
1360 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1361
1362 {
1363 // TODO: make those pre-defined variables read-only.
1364 // Currently there is none suitable machinery in the core llvm-mc for this.
1365 // MCSymbol::isRedefinable is intended for another purpose, and
1366 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1367 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1368 MCContext &Ctx = getContext();
1369 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1370 MCSymbol *Sym =
1371 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1372 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1373 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1374 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1375 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1376 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1377 } else {
1378 MCSymbol *Sym =
1379 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1380 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1381 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1382 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1383 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1384 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1385 }
1386 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1387 initializeGprCountSymbol(IS_VGPR);
1388 initializeGprCountSymbol(IS_SGPR);
1389 } else
1390 KernelScope.initialize(getContext());
1391 }
1392 }
1393
hasMIMG_R128() const1394 bool hasMIMG_R128() const {
1395 return AMDGPU::hasMIMG_R128(getSTI());
1396 }
1397
hasPackedD16() const1398 bool hasPackedD16() const {
1399 return AMDGPU::hasPackedD16(getSTI());
1400 }
1401
hasGFX10A16() const1402 bool hasGFX10A16() const {
1403 return AMDGPU::hasGFX10A16(getSTI());
1404 }
1405
hasG16() const1406 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1407
isSI() const1408 bool isSI() const {
1409 return AMDGPU::isSI(getSTI());
1410 }
1411
isCI() const1412 bool isCI() const {
1413 return AMDGPU::isCI(getSTI());
1414 }
1415
isVI() const1416 bool isVI() const {
1417 return AMDGPU::isVI(getSTI());
1418 }
1419
isGFX9() const1420 bool isGFX9() const {
1421 return AMDGPU::isGFX9(getSTI());
1422 }
1423
1424 // TODO: isGFX90A is also true for GFX940. We need to clean it.
isGFX90A() const1425 bool isGFX90A() const {
1426 return AMDGPU::isGFX90A(getSTI());
1427 }
1428
isGFX940() const1429 bool isGFX940() const {
1430 return AMDGPU::isGFX940(getSTI());
1431 }
1432
isGFX9Plus() const1433 bool isGFX9Plus() const {
1434 return AMDGPU::isGFX9Plus(getSTI());
1435 }
1436
isGFX10() const1437 bool isGFX10() const {
1438 return AMDGPU::isGFX10(getSTI());
1439 }
1440
isGFX10Plus() const1441 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1442
isGFX11() const1443 bool isGFX11() const {
1444 return AMDGPU::isGFX11(getSTI());
1445 }
1446
isGFX11Plus() const1447 bool isGFX11Plus() const {
1448 return AMDGPU::isGFX11Plus(getSTI());
1449 }
1450
isGFX10_BEncoding() const1451 bool isGFX10_BEncoding() const {
1452 return AMDGPU::isGFX10_BEncoding(getSTI());
1453 }
1454
hasInv2PiInlineImm() const1455 bool hasInv2PiInlineImm() const {
1456 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1457 }
1458
hasFlatOffsets() const1459 bool hasFlatOffsets() const {
1460 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1461 }
1462
hasArchitectedFlatScratch() const1463 bool hasArchitectedFlatScratch() const {
1464 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1465 }
1466
hasSGPR102_SGPR103() const1467 bool hasSGPR102_SGPR103() const {
1468 return !isVI() && !isGFX9();
1469 }
1470
hasSGPR104_SGPR105() const1471 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1472
hasIntClamp() const1473 bool hasIntClamp() const {
1474 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1475 }
1476
getTargetStreamer()1477 AMDGPUTargetStreamer &getTargetStreamer() {
1478 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1479 return static_cast<AMDGPUTargetStreamer &>(TS);
1480 }
1481
getMRI() const1482 const MCRegisterInfo *getMRI() const {
1483 // We need this const_cast because for some reason getContext() is not const
1484 // in MCAsmParser.
1485 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1486 }
1487
getMII() const1488 const MCInstrInfo *getMII() const {
1489 return &MII;
1490 }
1491
getFeatureBits() const1492 const FeatureBitset &getFeatureBits() const {
1493 return getSTI().getFeatureBits();
1494 }
1495
setForcedEncodingSize(unsigned Size)1496 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
setForcedDPP(bool ForceDPP_)1497 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
setForcedSDWA(bool ForceSDWA_)1498 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1499
getForcedEncodingSize() const1500 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
isForcedVOP3() const1501 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
isForcedDPP() const1502 bool isForcedDPP() const { return ForcedDPP; }
isForcedSDWA() const1503 bool isForcedSDWA() const { return ForcedSDWA; }
1504 ArrayRef<unsigned> getMatchedVariants() const;
1505 StringRef getMatchedVariantName() const;
1506
1507 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1508 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1509 bool RestoreOnFailure);
1510 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1511 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1512 SMLoc &EndLoc) override;
1513 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1514 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1515 unsigned Kind) override;
1516 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1517 OperandVector &Operands, MCStreamer &Out,
1518 uint64_t &ErrorInfo,
1519 bool MatchingInlineAsm) override;
1520 bool ParseDirective(AsmToken DirectiveID) override;
1521 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1522 OperandMode Mode = OperandMode_Default);
1523 StringRef parseMnemonicSuffix(StringRef Name);
1524 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1525 SMLoc NameLoc, OperandVector &Operands) override;
1526 //bool ProcessInstruction(MCInst &Inst);
1527
1528 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1529
1530 OperandMatchResultTy
1531 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1532 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1533 bool (*ConvertResult)(int64_t &) = nullptr);
1534
1535 OperandMatchResultTy
1536 parseOperandArrayWithPrefix(const char *Prefix,
1537 OperandVector &Operands,
1538 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1539 bool (*ConvertResult)(int64_t&) = nullptr);
1540
1541 OperandMatchResultTy
1542 parseNamedBit(StringRef Name, OperandVector &Operands,
1543 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1544 OperandMatchResultTy parseCPol(OperandVector &Operands);
1545 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1546 StringRef &Value,
1547 SMLoc &StringLoc);
1548
1549 bool isModifier();
1550 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1551 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1552 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1553 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1554 bool parseSP3NegModifier();
1555 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1556 OperandMatchResultTy parseReg(OperandVector &Operands);
1557 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1558 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1559 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1560 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1561 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1562 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1563 OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1564 OperandMatchResultTy parseUfmt(int64_t &Format);
1565 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1566 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1567 OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1568 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1569 OperandMatchResultTy parseNumericFormat(int64_t &Format);
1570 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1571 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1572
1573 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
cvtDS(MCInst & Inst,const OperandVector & Operands)1574 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
cvtDSGds(MCInst & Inst,const OperandVector & Operands)1575 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1576 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1577
1578 bool parseCnt(int64_t &IntVal);
1579 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1580
1581 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1582 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1583 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1584
1585 bool parseDelay(int64_t &Delay);
1586 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1587
1588 OperandMatchResultTy parseHwreg(OperandVector &Operands);
1589
1590 private:
1591 struct OperandInfoTy {
1592 SMLoc Loc;
1593 int64_t Id;
1594 bool IsSymbolic = false;
1595 bool IsDefined = false;
1596
OperandInfoTy__anon71e405cc0111::AMDGPUAsmParser::OperandInfoTy1597 OperandInfoTy(int64_t Id_) : Id(Id_) {}
1598 };
1599
1600 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1601 bool validateSendMsg(const OperandInfoTy &Msg,
1602 const OperandInfoTy &Op,
1603 const OperandInfoTy &Stream);
1604
1605 bool parseHwregBody(OperandInfoTy &HwReg,
1606 OperandInfoTy &Offset,
1607 OperandInfoTy &Width);
1608 bool validateHwreg(const OperandInfoTy &HwReg,
1609 const OperandInfoTy &Offset,
1610 const OperandInfoTy &Width);
1611
1612 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1613 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1614 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1615
1616 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1617 const OperandVector &Operands) const;
1618 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1619 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1620 SMLoc getLitLoc(const OperandVector &Operands) const;
1621 SMLoc getConstLoc(const OperandVector &Operands) const;
1622
1623 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1624 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1625 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1626 bool validateSOPLiteral(const MCInst &Inst) const;
1627 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1628 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1629 bool validateIntClampSupported(const MCInst &Inst);
1630 bool validateMIMGAtomicDMask(const MCInst &Inst);
1631 bool validateMIMGGatherDMask(const MCInst &Inst);
1632 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1633 Optional<StringRef> validateMIMGDataSize(const MCInst &Inst);
1634 bool validateMIMGAddrSize(const MCInst &Inst);
1635 bool validateMIMGD16(const MCInst &Inst);
1636 bool validateMIMGDim(const MCInst &Inst);
1637 bool validateMIMGMSAA(const MCInst &Inst);
1638 bool validateOpSel(const MCInst &Inst);
1639 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1640 bool validateVccOperand(unsigned Reg) const;
1641 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1642 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1643 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1644 bool validateAGPRLdSt(const MCInst &Inst) const;
1645 bool validateVGPRAlign(const MCInst &Inst) const;
1646 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1647 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1648 bool validateDivScale(const MCInst &Inst);
1649 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1650 const SMLoc &IDLoc);
1651 bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands,
1652 const SMLoc &IDLoc);
1653 bool validateExeczVcczOperands(const OperandVector &Operands);
1654 Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1655 unsigned getConstantBusLimit(unsigned Opcode) const;
1656 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1657 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1658 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1659
1660 bool isSupportedMnemo(StringRef Mnemo,
1661 const FeatureBitset &FBS);
1662 bool isSupportedMnemo(StringRef Mnemo,
1663 const FeatureBitset &FBS,
1664 ArrayRef<unsigned> Variants);
1665 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1666
1667 bool isId(const StringRef Id) const;
1668 bool isId(const AsmToken &Token, const StringRef Id) const;
1669 bool isToken(const AsmToken::TokenKind Kind) const;
1670 bool trySkipId(const StringRef Id);
1671 bool trySkipId(const StringRef Pref, const StringRef Id);
1672 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1673 bool trySkipToken(const AsmToken::TokenKind Kind);
1674 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1675 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1676 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1677
1678 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1679 AsmToken::TokenKind getTokenKind() const;
1680 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1681 bool parseExpr(OperandVector &Operands);
1682 StringRef getTokenStr() const;
1683 AsmToken peekToken(bool ShouldSkipSpace = true);
1684 AsmToken getToken() const;
1685 SMLoc getLoc() const;
1686 void lex();
1687
1688 public:
1689 void onBeginOfFile() override;
1690
1691 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1692 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1693
1694 OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1695 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1696 OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1697 OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1698 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1699 OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1700
1701 bool parseSwizzleOperand(int64_t &Op,
1702 const unsigned MinVal,
1703 const unsigned MaxVal,
1704 const StringRef ErrMsg,
1705 SMLoc &Loc);
1706 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1707 const unsigned MinVal,
1708 const unsigned MaxVal,
1709 const StringRef ErrMsg);
1710 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1711 bool parseSwizzleOffset(int64_t &Imm);
1712 bool parseSwizzleMacro(int64_t &Imm);
1713 bool parseSwizzleQuadPerm(int64_t &Imm);
1714 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1715 bool parseSwizzleBroadcast(int64_t &Imm);
1716 bool parseSwizzleSwap(int64_t &Imm);
1717 bool parseSwizzleReverse(int64_t &Imm);
1718
1719 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1720 int64_t parseGPRIdxMacro();
1721
cvtMubuf(MCInst & Inst,const OperandVector & Operands)1722 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
cvtMubufAtomic(MCInst & Inst,const OperandVector & Operands)1723 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
cvtMubufLds(MCInst & Inst,const OperandVector & Operands)1724 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1725 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1726
1727 AMDGPUOperand::Ptr defaultCPol() const;
1728
1729 AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1730 AMDGPUOperand::Ptr defaultSMEMOffset() const;
1731 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1732 AMDGPUOperand::Ptr defaultFlatOffset() const;
1733
1734 OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1735
1736 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1737 OptionalImmIndexMap &OptionalIdx);
1738 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1739 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1740 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1741 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1742 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1743 OptionalImmIndexMap &OptionalIdx);
1744 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1745 OptionalImmIndexMap &OptionalIdx);
1746
1747 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1748 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1749
1750 void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1751 bool IsAtomic = false);
1752 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1753 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1754
1755 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1756
1757 bool parseDimId(unsigned &Encoding);
1758 OperandMatchResultTy parseDim(OperandVector &Operands);
1759 OperandMatchResultTy parseDPP8(OperandVector &Operands);
1760 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1761 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1762 int64_t parseDPPCtrlSel(StringRef Ctrl);
1763 int64_t parseDPPCtrlPerm();
1764 AMDGPUOperand::Ptr defaultRowMask() const;
1765 AMDGPUOperand::Ptr defaultBankMask() const;
1766 AMDGPUOperand::Ptr defaultBoundCtrl() const;
1767 AMDGPUOperand::Ptr defaultFI() const;
1768 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
cvtDPP8(MCInst & Inst,const OperandVector & Operands)1769 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1770 cvtDPP(Inst, Operands, true);
1771 }
1772 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1773 bool IsDPP8 = false);
cvtVOP3DPP8(MCInst & Inst,const OperandVector & Operands)1774 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1775 cvtVOP3DPP(Inst, Operands, true);
1776 }
1777
1778 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1779 AMDGPUOperand::ImmTy Type);
1780 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1781 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1782 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1783 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1784 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1785 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1786 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1787 uint64_t BasicInstType,
1788 bool SkipDstVcc = false,
1789 bool SkipSrcVcc = false);
1790
1791 AMDGPUOperand::Ptr defaultBLGP() const;
1792 AMDGPUOperand::Ptr defaultCBSZ() const;
1793 AMDGPUOperand::Ptr defaultABID() const;
1794
1795 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1796 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1797
1798 AMDGPUOperand::Ptr defaultWaitVDST() const;
1799 AMDGPUOperand::Ptr defaultWaitEXP() const;
1800 OperandMatchResultTy parseVOPD(OperandVector &Operands);
1801 };
1802
1803 struct OptionalOperand {
1804 const char *Name;
1805 AMDGPUOperand::ImmTy Type;
1806 bool IsBit;
1807 bool (*ConvertResult)(int64_t&);
1808 };
1809
1810 } // end anonymous namespace
1811
1812 // May be called with integer type with equivalent bitwidth.
getFltSemantics(unsigned Size)1813 static const fltSemantics *getFltSemantics(unsigned Size) {
1814 switch (Size) {
1815 case 4:
1816 return &APFloat::IEEEsingle();
1817 case 8:
1818 return &APFloat::IEEEdouble();
1819 case 2:
1820 return &APFloat::IEEEhalf();
1821 default:
1822 llvm_unreachable("unsupported fp type");
1823 }
1824 }
1825
getFltSemantics(MVT VT)1826 static const fltSemantics *getFltSemantics(MVT VT) {
1827 return getFltSemantics(VT.getSizeInBits() / 8);
1828 }
1829
getOpFltSemantics(uint8_t OperandType)1830 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1831 switch (OperandType) {
1832 case AMDGPU::OPERAND_REG_IMM_INT32:
1833 case AMDGPU::OPERAND_REG_IMM_FP32:
1834 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1835 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1836 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1837 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1838 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1839 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1840 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1841 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1842 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1843 case AMDGPU::OPERAND_KIMM32:
1844 return &APFloat::IEEEsingle();
1845 case AMDGPU::OPERAND_REG_IMM_INT64:
1846 case AMDGPU::OPERAND_REG_IMM_FP64:
1847 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1848 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1849 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1850 return &APFloat::IEEEdouble();
1851 case AMDGPU::OPERAND_REG_IMM_INT16:
1852 case AMDGPU::OPERAND_REG_IMM_FP16:
1853 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1854 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1855 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1856 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1857 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1858 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1859 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1860 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1861 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1862 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1863 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1864 case AMDGPU::OPERAND_KIMM16:
1865 return &APFloat::IEEEhalf();
1866 default:
1867 llvm_unreachable("unsupported fp type");
1868 }
1869 }
1870
1871 //===----------------------------------------------------------------------===//
1872 // Operand
1873 //===----------------------------------------------------------------------===//
1874
canLosslesslyConvertToFPType(APFloat & FPLiteral,MVT VT)1875 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1876 bool Lost;
1877
1878 // Convert literal to single precision
1879 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1880 APFloat::rmNearestTiesToEven,
1881 &Lost);
1882 // We allow precision lost but not overflow or underflow
1883 if (Status != APFloat::opOK &&
1884 Lost &&
1885 ((Status & APFloat::opOverflow) != 0 ||
1886 (Status & APFloat::opUnderflow) != 0)) {
1887 return false;
1888 }
1889
1890 return true;
1891 }
1892
isSafeTruncation(int64_t Val,unsigned Size)1893 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1894 return isUIntN(Size, Val) || isIntN(Size, Val);
1895 }
1896
isInlineableLiteralOp16(int64_t Val,MVT VT,bool HasInv2Pi)1897 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1898 if (VT.getScalarType() == MVT::i16) {
1899 // FP immediate values are broken.
1900 return isInlinableIntLiteral(Val);
1901 }
1902
1903 // f16/v2f16 operands work correctly for all values.
1904 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1905 }
1906
isInlinableImm(MVT type) const1907 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1908
1909 // This is a hack to enable named inline values like
1910 // shared_base with both 32-bit and 64-bit operands.
1911 // Note that these values are defined as
1912 // 32-bit operands only.
1913 if (isInlineValue()) {
1914 return true;
1915 }
1916
1917 if (!isImmTy(ImmTyNone)) {
1918 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1919 return false;
1920 }
1921 // TODO: We should avoid using host float here. It would be better to
1922 // check the float bit values which is what a few other places do.
1923 // We've had bot failures before due to weird NaN support on mips hosts.
1924
1925 APInt Literal(64, Imm.Val);
1926
1927 if (Imm.IsFPImm) { // We got fp literal token
1928 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1929 return AMDGPU::isInlinableLiteral64(Imm.Val,
1930 AsmParser->hasInv2PiInlineImm());
1931 }
1932
1933 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1934 if (!canLosslesslyConvertToFPType(FPLiteral, type))
1935 return false;
1936
1937 if (type.getScalarSizeInBits() == 16) {
1938 return isInlineableLiteralOp16(
1939 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1940 type, AsmParser->hasInv2PiInlineImm());
1941 }
1942
1943 // Check if single precision literal is inlinable
1944 return AMDGPU::isInlinableLiteral32(
1945 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1946 AsmParser->hasInv2PiInlineImm());
1947 }
1948
1949 // We got int literal token.
1950 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1951 return AMDGPU::isInlinableLiteral64(Imm.Val,
1952 AsmParser->hasInv2PiInlineImm());
1953 }
1954
1955 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1956 return false;
1957 }
1958
1959 if (type.getScalarSizeInBits() == 16) {
1960 return isInlineableLiteralOp16(
1961 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1962 type, AsmParser->hasInv2PiInlineImm());
1963 }
1964
1965 return AMDGPU::isInlinableLiteral32(
1966 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1967 AsmParser->hasInv2PiInlineImm());
1968 }
1969
isLiteralImm(MVT type) const1970 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1971 // Check that this immediate can be added as literal
1972 if (!isImmTy(ImmTyNone)) {
1973 return false;
1974 }
1975
1976 if (!Imm.IsFPImm) {
1977 // We got int literal token.
1978
1979 if (type == MVT::f64 && hasFPModifiers()) {
1980 // Cannot apply fp modifiers to int literals preserving the same semantics
1981 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1982 // disable these cases.
1983 return false;
1984 }
1985
1986 unsigned Size = type.getSizeInBits();
1987 if (Size == 64)
1988 Size = 32;
1989
1990 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1991 // types.
1992 return isSafeTruncation(Imm.Val, Size);
1993 }
1994
1995 // We got fp literal token
1996 if (type == MVT::f64) { // Expected 64-bit fp operand
1997 // We would set low 64-bits of literal to zeroes but we accept this literals
1998 return true;
1999 }
2000
2001 if (type == MVT::i64) { // Expected 64-bit int operand
2002 // We don't allow fp literals in 64-bit integer instructions. It is
2003 // unclear how we should encode them.
2004 return false;
2005 }
2006
2007 // We allow fp literals with f16x2 operands assuming that the specified
2008 // literal goes into the lower half and the upper half is zero. We also
2009 // require that the literal may be losslessly converted to f16.
2010 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
2011 (type == MVT::v2i16)? MVT::i16 :
2012 (type == MVT::v2f32)? MVT::f32 : type;
2013
2014 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2015 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2016 }
2017
isRegClass(unsigned RCID) const2018 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2019 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2020 }
2021
isVRegWithInputMods() const2022 bool AMDGPUOperand::isVRegWithInputMods() const {
2023 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2024 // GFX90A allows DPP on 64-bit operands.
2025 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2026 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
2027 }
2028
isSDWAOperand(MVT type) const2029 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2030 if (AsmParser->isVI())
2031 return isVReg32();
2032 else if (AsmParser->isGFX9Plus())
2033 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2034 else
2035 return false;
2036 }
2037
isSDWAFP16Operand() const2038 bool AMDGPUOperand::isSDWAFP16Operand() const {
2039 return isSDWAOperand(MVT::f16);
2040 }
2041
isSDWAFP32Operand() const2042 bool AMDGPUOperand::isSDWAFP32Operand() const {
2043 return isSDWAOperand(MVT::f32);
2044 }
2045
isSDWAInt16Operand() const2046 bool AMDGPUOperand::isSDWAInt16Operand() const {
2047 return isSDWAOperand(MVT::i16);
2048 }
2049
isSDWAInt32Operand() const2050 bool AMDGPUOperand::isSDWAInt32Operand() const {
2051 return isSDWAOperand(MVT::i32);
2052 }
2053
isBoolReg() const2054 bool AMDGPUOperand::isBoolReg() const {
2055 auto FB = AsmParser->getFeatureBits();
2056 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2057 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2058 }
2059
applyInputFPModifiers(uint64_t Val,unsigned Size) const2060 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2061 {
2062 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2063 assert(Size == 2 || Size == 4 || Size == 8);
2064
2065 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2066
2067 if (Imm.Mods.Abs) {
2068 Val &= ~FpSignMask;
2069 }
2070 if (Imm.Mods.Neg) {
2071 Val ^= FpSignMask;
2072 }
2073
2074 return Val;
2075 }
2076
addImmOperands(MCInst & Inst,unsigned N,bool ApplyModifiers) const2077 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2078 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2079 Inst.getNumOperands())) {
2080 addLiteralImmOperand(Inst, Imm.Val,
2081 ApplyModifiers &
2082 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2083 } else {
2084 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2085 Inst.addOperand(MCOperand::createImm(Imm.Val));
2086 setImmKindNone();
2087 }
2088 }
2089
addLiteralImmOperand(MCInst & Inst,int64_t Val,bool ApplyModifiers) const2090 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2091 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2092 auto OpNum = Inst.getNumOperands();
2093 // Check that this operand accepts literals
2094 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2095
2096 if (ApplyModifiers) {
2097 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2098 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2099 Val = applyInputFPModifiers(Val, Size);
2100 }
2101
2102 APInt Literal(64, Val);
2103 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2104
2105 if (Imm.IsFPImm) { // We got fp literal token
2106 switch (OpTy) {
2107 case AMDGPU::OPERAND_REG_IMM_INT64:
2108 case AMDGPU::OPERAND_REG_IMM_FP64:
2109 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2110 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2111 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2112 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2113 AsmParser->hasInv2PiInlineImm())) {
2114 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2115 setImmKindConst();
2116 return;
2117 }
2118
2119 // Non-inlineable
2120 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2121 // For fp operands we check if low 32 bits are zeros
2122 if (Literal.getLoBits(32) != 0) {
2123 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2124 "Can't encode literal as exact 64-bit floating-point operand. "
2125 "Low 32-bits will be set to zero");
2126 }
2127
2128 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2129 setImmKindLiteral();
2130 return;
2131 }
2132
2133 // We don't allow fp literals in 64-bit integer instructions. It is
2134 // unclear how we should encode them. This case should be checked earlier
2135 // in predicate methods (isLiteralImm())
2136 llvm_unreachable("fp literal in 64-bit integer instruction.");
2137
2138 case AMDGPU::OPERAND_REG_IMM_INT32:
2139 case AMDGPU::OPERAND_REG_IMM_FP32:
2140 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2141 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2142 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2143 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2144 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2145 case AMDGPU::OPERAND_REG_IMM_INT16:
2146 case AMDGPU::OPERAND_REG_IMM_FP16:
2147 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2148 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2149 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2150 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2151 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2152 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2153 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2154 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2155 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2156 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2157 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2158 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2159 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2160 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2161 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2162 case AMDGPU::OPERAND_KIMM32:
2163 case AMDGPU::OPERAND_KIMM16: {
2164 bool lost;
2165 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2166 // Convert literal to single precision
2167 FPLiteral.convert(*getOpFltSemantics(OpTy),
2168 APFloat::rmNearestTiesToEven, &lost);
2169 // We allow precision lost but not overflow or underflow. This should be
2170 // checked earlier in isLiteralImm()
2171
2172 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2173 Inst.addOperand(MCOperand::createImm(ImmVal));
2174 setImmKindLiteral();
2175 return;
2176 }
2177 default:
2178 llvm_unreachable("invalid operand size");
2179 }
2180
2181 return;
2182 }
2183
2184 // We got int literal token.
2185 // Only sign extend inline immediates.
2186 switch (OpTy) {
2187 case AMDGPU::OPERAND_REG_IMM_INT32:
2188 case AMDGPU::OPERAND_REG_IMM_FP32:
2189 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2190 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2191 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2192 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2193 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2194 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2195 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2196 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2197 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2198 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2199 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2200 if (isSafeTruncation(Val, 32) &&
2201 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2202 AsmParser->hasInv2PiInlineImm())) {
2203 Inst.addOperand(MCOperand::createImm(Val));
2204 setImmKindConst();
2205 return;
2206 }
2207
2208 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2209 setImmKindLiteral();
2210 return;
2211
2212 case AMDGPU::OPERAND_REG_IMM_INT64:
2213 case AMDGPU::OPERAND_REG_IMM_FP64:
2214 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2215 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2216 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2217 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2218 Inst.addOperand(MCOperand::createImm(Val));
2219 setImmKindConst();
2220 return;
2221 }
2222
2223 Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2224 setImmKindLiteral();
2225 return;
2226
2227 case AMDGPU::OPERAND_REG_IMM_INT16:
2228 case AMDGPU::OPERAND_REG_IMM_FP16:
2229 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2230 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2231 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2232 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2233 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2234 if (isSafeTruncation(Val, 16) &&
2235 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2236 AsmParser->hasInv2PiInlineImm())) {
2237 Inst.addOperand(MCOperand::createImm(Val));
2238 setImmKindConst();
2239 return;
2240 }
2241
2242 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2243 setImmKindLiteral();
2244 return;
2245
2246 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2247 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2248 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2249 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2250 assert(isSafeTruncation(Val, 16));
2251 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2252 AsmParser->hasInv2PiInlineImm()));
2253
2254 Inst.addOperand(MCOperand::createImm(Val));
2255 return;
2256 }
2257 case AMDGPU::OPERAND_KIMM32:
2258 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2259 setImmKindNone();
2260 return;
2261 case AMDGPU::OPERAND_KIMM16:
2262 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2263 setImmKindNone();
2264 return;
2265 default:
2266 llvm_unreachable("invalid operand size");
2267 }
2268 }
2269
2270 template <unsigned Bitwidth>
addKImmFPOperands(MCInst & Inst,unsigned N) const2271 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2272 APInt Literal(64, Imm.Val);
2273 setImmKindNone();
2274
2275 if (!Imm.IsFPImm) {
2276 // We got int literal token.
2277 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2278 return;
2279 }
2280
2281 bool Lost;
2282 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2283 FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2284 APFloat::rmNearestTiesToEven, &Lost);
2285 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2286 }
2287
addRegOperands(MCInst & Inst,unsigned N) const2288 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2289 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2290 }
2291
isInlineValue(unsigned Reg)2292 static bool isInlineValue(unsigned Reg) {
2293 switch (Reg) {
2294 case AMDGPU::SRC_SHARED_BASE:
2295 case AMDGPU::SRC_SHARED_LIMIT:
2296 case AMDGPU::SRC_PRIVATE_BASE:
2297 case AMDGPU::SRC_PRIVATE_LIMIT:
2298 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2299 return true;
2300 case AMDGPU::SRC_VCCZ:
2301 case AMDGPU::SRC_EXECZ:
2302 case AMDGPU::SRC_SCC:
2303 return true;
2304 case AMDGPU::SGPR_NULL:
2305 return true;
2306 default:
2307 return false;
2308 }
2309 }
2310
isInlineValue() const2311 bool AMDGPUOperand::isInlineValue() const {
2312 return isRegKind() && ::isInlineValue(getReg());
2313 }
2314
2315 //===----------------------------------------------------------------------===//
2316 // AsmParser
2317 //===----------------------------------------------------------------------===//
2318
getRegClass(RegisterKind Is,unsigned RegWidth)2319 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2320 if (Is == IS_VGPR) {
2321 switch (RegWidth) {
2322 default: return -1;
2323 case 32:
2324 return AMDGPU::VGPR_32RegClassID;
2325 case 64:
2326 return AMDGPU::VReg_64RegClassID;
2327 case 96:
2328 return AMDGPU::VReg_96RegClassID;
2329 case 128:
2330 return AMDGPU::VReg_128RegClassID;
2331 case 160:
2332 return AMDGPU::VReg_160RegClassID;
2333 case 192:
2334 return AMDGPU::VReg_192RegClassID;
2335 case 224:
2336 return AMDGPU::VReg_224RegClassID;
2337 case 256:
2338 return AMDGPU::VReg_256RegClassID;
2339 case 512:
2340 return AMDGPU::VReg_512RegClassID;
2341 case 1024:
2342 return AMDGPU::VReg_1024RegClassID;
2343 }
2344 } else if (Is == IS_TTMP) {
2345 switch (RegWidth) {
2346 default: return -1;
2347 case 32:
2348 return AMDGPU::TTMP_32RegClassID;
2349 case 64:
2350 return AMDGPU::TTMP_64RegClassID;
2351 case 128:
2352 return AMDGPU::TTMP_128RegClassID;
2353 case 256:
2354 return AMDGPU::TTMP_256RegClassID;
2355 case 512:
2356 return AMDGPU::TTMP_512RegClassID;
2357 }
2358 } else if (Is == IS_SGPR) {
2359 switch (RegWidth) {
2360 default: return -1;
2361 case 32:
2362 return AMDGPU::SGPR_32RegClassID;
2363 case 64:
2364 return AMDGPU::SGPR_64RegClassID;
2365 case 96:
2366 return AMDGPU::SGPR_96RegClassID;
2367 case 128:
2368 return AMDGPU::SGPR_128RegClassID;
2369 case 160:
2370 return AMDGPU::SGPR_160RegClassID;
2371 case 192:
2372 return AMDGPU::SGPR_192RegClassID;
2373 case 224:
2374 return AMDGPU::SGPR_224RegClassID;
2375 case 256:
2376 return AMDGPU::SGPR_256RegClassID;
2377 case 512:
2378 return AMDGPU::SGPR_512RegClassID;
2379 }
2380 } else if (Is == IS_AGPR) {
2381 switch (RegWidth) {
2382 default: return -1;
2383 case 32:
2384 return AMDGPU::AGPR_32RegClassID;
2385 case 64:
2386 return AMDGPU::AReg_64RegClassID;
2387 case 96:
2388 return AMDGPU::AReg_96RegClassID;
2389 case 128:
2390 return AMDGPU::AReg_128RegClassID;
2391 case 160:
2392 return AMDGPU::AReg_160RegClassID;
2393 case 192:
2394 return AMDGPU::AReg_192RegClassID;
2395 case 224:
2396 return AMDGPU::AReg_224RegClassID;
2397 case 256:
2398 return AMDGPU::AReg_256RegClassID;
2399 case 512:
2400 return AMDGPU::AReg_512RegClassID;
2401 case 1024:
2402 return AMDGPU::AReg_1024RegClassID;
2403 }
2404 }
2405 return -1;
2406 }
2407
getSpecialRegForName(StringRef RegName)2408 static unsigned getSpecialRegForName(StringRef RegName) {
2409 return StringSwitch<unsigned>(RegName)
2410 .Case("exec", AMDGPU::EXEC)
2411 .Case("vcc", AMDGPU::VCC)
2412 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2413 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2414 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2415 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2416 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2417 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2418 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2419 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2420 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2421 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2422 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2423 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2424 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2425 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2426 .Case("m0", AMDGPU::M0)
2427 .Case("vccz", AMDGPU::SRC_VCCZ)
2428 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2429 .Case("execz", AMDGPU::SRC_EXECZ)
2430 .Case("src_execz", AMDGPU::SRC_EXECZ)
2431 .Case("scc", AMDGPU::SRC_SCC)
2432 .Case("src_scc", AMDGPU::SRC_SCC)
2433 .Case("tba", AMDGPU::TBA)
2434 .Case("tma", AMDGPU::TMA)
2435 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2436 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2437 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2438 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2439 .Case("vcc_lo", AMDGPU::VCC_LO)
2440 .Case("vcc_hi", AMDGPU::VCC_HI)
2441 .Case("exec_lo", AMDGPU::EXEC_LO)
2442 .Case("exec_hi", AMDGPU::EXEC_HI)
2443 .Case("tma_lo", AMDGPU::TMA_LO)
2444 .Case("tma_hi", AMDGPU::TMA_HI)
2445 .Case("tba_lo", AMDGPU::TBA_LO)
2446 .Case("tba_hi", AMDGPU::TBA_HI)
2447 .Case("pc", AMDGPU::PC_REG)
2448 .Case("null", AMDGPU::SGPR_NULL)
2449 .Default(AMDGPU::NoRegister);
2450 }
2451
ParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc,bool RestoreOnFailure)2452 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2453 SMLoc &EndLoc, bool RestoreOnFailure) {
2454 auto R = parseRegister();
2455 if (!R) return true;
2456 assert(R->isReg());
2457 RegNo = R->getReg();
2458 StartLoc = R->getStartLoc();
2459 EndLoc = R->getEndLoc();
2460 return false;
2461 }
2462
ParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)2463 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2464 SMLoc &EndLoc) {
2465 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2466 }
2467
tryParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)2468 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2469 SMLoc &StartLoc,
2470 SMLoc &EndLoc) {
2471 bool Result =
2472 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2473 bool PendingErrors = getParser().hasPendingError();
2474 getParser().clearPendingErrors();
2475 if (PendingErrors)
2476 return MatchOperand_ParseFail;
2477 if (Result)
2478 return MatchOperand_NoMatch;
2479 return MatchOperand_Success;
2480 }
2481
AddNextRegisterToList(unsigned & Reg,unsigned & RegWidth,RegisterKind RegKind,unsigned Reg1,SMLoc Loc)2482 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2483 RegisterKind RegKind, unsigned Reg1,
2484 SMLoc Loc) {
2485 switch (RegKind) {
2486 case IS_SPECIAL:
2487 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2488 Reg = AMDGPU::EXEC;
2489 RegWidth = 64;
2490 return true;
2491 }
2492 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2493 Reg = AMDGPU::FLAT_SCR;
2494 RegWidth = 64;
2495 return true;
2496 }
2497 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2498 Reg = AMDGPU::XNACK_MASK;
2499 RegWidth = 64;
2500 return true;
2501 }
2502 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2503 Reg = AMDGPU::VCC;
2504 RegWidth = 64;
2505 return true;
2506 }
2507 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2508 Reg = AMDGPU::TBA;
2509 RegWidth = 64;
2510 return true;
2511 }
2512 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2513 Reg = AMDGPU::TMA;
2514 RegWidth = 64;
2515 return true;
2516 }
2517 Error(Loc, "register does not fit in the list");
2518 return false;
2519 case IS_VGPR:
2520 case IS_SGPR:
2521 case IS_AGPR:
2522 case IS_TTMP:
2523 if (Reg1 != Reg + RegWidth / 32) {
2524 Error(Loc, "registers in a list must have consecutive indices");
2525 return false;
2526 }
2527 RegWidth += 32;
2528 return true;
2529 default:
2530 llvm_unreachable("unexpected register kind");
2531 }
2532 }
2533
2534 struct RegInfo {
2535 StringLiteral Name;
2536 RegisterKind Kind;
2537 };
2538
2539 static constexpr RegInfo RegularRegisters[] = {
2540 {{"v"}, IS_VGPR},
2541 {{"s"}, IS_SGPR},
2542 {{"ttmp"}, IS_TTMP},
2543 {{"acc"}, IS_AGPR},
2544 {{"a"}, IS_AGPR},
2545 };
2546
isRegularReg(RegisterKind Kind)2547 static bool isRegularReg(RegisterKind Kind) {
2548 return Kind == IS_VGPR ||
2549 Kind == IS_SGPR ||
2550 Kind == IS_TTMP ||
2551 Kind == IS_AGPR;
2552 }
2553
getRegularRegInfo(StringRef Str)2554 static const RegInfo* getRegularRegInfo(StringRef Str) {
2555 for (const RegInfo &Reg : RegularRegisters)
2556 if (Str.startswith(Reg.Name))
2557 return &Reg;
2558 return nullptr;
2559 }
2560
getRegNum(StringRef Str,unsigned & Num)2561 static bool getRegNum(StringRef Str, unsigned& Num) {
2562 return !Str.getAsInteger(10, Num);
2563 }
2564
2565 bool
isRegister(const AsmToken & Token,const AsmToken & NextToken) const2566 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2567 const AsmToken &NextToken) const {
2568
2569 // A list of consecutive registers: [s0,s1,s2,s3]
2570 if (Token.is(AsmToken::LBrac))
2571 return true;
2572
2573 if (!Token.is(AsmToken::Identifier))
2574 return false;
2575
2576 // A single register like s0 or a range of registers like s[0:1]
2577
2578 StringRef Str = Token.getString();
2579 const RegInfo *Reg = getRegularRegInfo(Str);
2580 if (Reg) {
2581 StringRef RegName = Reg->Name;
2582 StringRef RegSuffix = Str.substr(RegName.size());
2583 if (!RegSuffix.empty()) {
2584 unsigned Num;
2585 // A single register with an index: rXX
2586 if (getRegNum(RegSuffix, Num))
2587 return true;
2588 } else {
2589 // A range of registers: r[XX:YY].
2590 if (NextToken.is(AsmToken::LBrac))
2591 return true;
2592 }
2593 }
2594
2595 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2596 }
2597
2598 bool
isRegister()2599 AMDGPUAsmParser::isRegister()
2600 {
2601 return isRegister(getToken(), peekToken());
2602 }
2603
2604 unsigned
getRegularReg(RegisterKind RegKind,unsigned RegNum,unsigned RegWidth,SMLoc Loc)2605 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2606 unsigned RegNum,
2607 unsigned RegWidth,
2608 SMLoc Loc) {
2609
2610 assert(isRegularReg(RegKind));
2611
2612 unsigned AlignSize = 1;
2613 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2614 // SGPR and TTMP registers must be aligned.
2615 // Max required alignment is 4 dwords.
2616 AlignSize = std::min(RegWidth / 32, 4u);
2617 }
2618
2619 if (RegNum % AlignSize != 0) {
2620 Error(Loc, "invalid register alignment");
2621 return AMDGPU::NoRegister;
2622 }
2623
2624 unsigned RegIdx = RegNum / AlignSize;
2625 int RCID = getRegClass(RegKind, RegWidth);
2626 if (RCID == -1) {
2627 Error(Loc, "invalid or unsupported register size");
2628 return AMDGPU::NoRegister;
2629 }
2630
2631 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2632 const MCRegisterClass RC = TRI->getRegClass(RCID);
2633 if (RegIdx >= RC.getNumRegs()) {
2634 Error(Loc, "register index is out of range");
2635 return AMDGPU::NoRegister;
2636 }
2637
2638 return RC.getRegister(RegIdx);
2639 }
2640
ParseRegRange(unsigned & Num,unsigned & RegWidth)2641 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2642 int64_t RegLo, RegHi;
2643 if (!skipToken(AsmToken::LBrac, "missing register index"))
2644 return false;
2645
2646 SMLoc FirstIdxLoc = getLoc();
2647 SMLoc SecondIdxLoc;
2648
2649 if (!parseExpr(RegLo))
2650 return false;
2651
2652 if (trySkipToken(AsmToken::Colon)) {
2653 SecondIdxLoc = getLoc();
2654 if (!parseExpr(RegHi))
2655 return false;
2656 } else {
2657 RegHi = RegLo;
2658 }
2659
2660 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2661 return false;
2662
2663 if (!isUInt<32>(RegLo)) {
2664 Error(FirstIdxLoc, "invalid register index");
2665 return false;
2666 }
2667
2668 if (!isUInt<32>(RegHi)) {
2669 Error(SecondIdxLoc, "invalid register index");
2670 return false;
2671 }
2672
2673 if (RegLo > RegHi) {
2674 Error(FirstIdxLoc, "first register index should not exceed second index");
2675 return false;
2676 }
2677
2678 Num = static_cast<unsigned>(RegLo);
2679 RegWidth = 32 * ((RegHi - RegLo) + 1);
2680 return true;
2681 }
2682
ParseSpecialReg(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2683 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2684 unsigned &RegNum, unsigned &RegWidth,
2685 SmallVectorImpl<AsmToken> &Tokens) {
2686 assert(isToken(AsmToken::Identifier));
2687 unsigned Reg = getSpecialRegForName(getTokenStr());
2688 if (Reg) {
2689 RegNum = 0;
2690 RegWidth = 32;
2691 RegKind = IS_SPECIAL;
2692 Tokens.push_back(getToken());
2693 lex(); // skip register name
2694 }
2695 return Reg;
2696 }
2697
ParseRegularReg(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2698 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2699 unsigned &RegNum, unsigned &RegWidth,
2700 SmallVectorImpl<AsmToken> &Tokens) {
2701 assert(isToken(AsmToken::Identifier));
2702 StringRef RegName = getTokenStr();
2703 auto Loc = getLoc();
2704
2705 const RegInfo *RI = getRegularRegInfo(RegName);
2706 if (!RI) {
2707 Error(Loc, "invalid register name");
2708 return AMDGPU::NoRegister;
2709 }
2710
2711 Tokens.push_back(getToken());
2712 lex(); // skip register name
2713
2714 RegKind = RI->Kind;
2715 StringRef RegSuffix = RegName.substr(RI->Name.size());
2716 if (!RegSuffix.empty()) {
2717 // Single 32-bit register: vXX.
2718 if (!getRegNum(RegSuffix, RegNum)) {
2719 Error(Loc, "invalid register index");
2720 return AMDGPU::NoRegister;
2721 }
2722 RegWidth = 32;
2723 } else {
2724 // Range of registers: v[XX:YY]. ":YY" is optional.
2725 if (!ParseRegRange(RegNum, RegWidth))
2726 return AMDGPU::NoRegister;
2727 }
2728
2729 return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2730 }
2731
ParseRegList(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2732 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2733 unsigned &RegWidth,
2734 SmallVectorImpl<AsmToken> &Tokens) {
2735 unsigned Reg = AMDGPU::NoRegister;
2736 auto ListLoc = getLoc();
2737
2738 if (!skipToken(AsmToken::LBrac,
2739 "expected a register or a list of registers")) {
2740 return AMDGPU::NoRegister;
2741 }
2742
2743 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2744
2745 auto Loc = getLoc();
2746 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2747 return AMDGPU::NoRegister;
2748 if (RegWidth != 32) {
2749 Error(Loc, "expected a single 32-bit register");
2750 return AMDGPU::NoRegister;
2751 }
2752
2753 for (; trySkipToken(AsmToken::Comma); ) {
2754 RegisterKind NextRegKind;
2755 unsigned NextReg, NextRegNum, NextRegWidth;
2756 Loc = getLoc();
2757
2758 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2759 NextRegNum, NextRegWidth,
2760 Tokens)) {
2761 return AMDGPU::NoRegister;
2762 }
2763 if (NextRegWidth != 32) {
2764 Error(Loc, "expected a single 32-bit register");
2765 return AMDGPU::NoRegister;
2766 }
2767 if (NextRegKind != RegKind) {
2768 Error(Loc, "registers in a list must be of the same kind");
2769 return AMDGPU::NoRegister;
2770 }
2771 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2772 return AMDGPU::NoRegister;
2773 }
2774
2775 if (!skipToken(AsmToken::RBrac,
2776 "expected a comma or a closing square bracket")) {
2777 return AMDGPU::NoRegister;
2778 }
2779
2780 if (isRegularReg(RegKind))
2781 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2782
2783 return Reg;
2784 }
2785
ParseAMDGPURegister(RegisterKind & RegKind,unsigned & Reg,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2786 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2787 unsigned &RegNum, unsigned &RegWidth,
2788 SmallVectorImpl<AsmToken> &Tokens) {
2789 auto Loc = getLoc();
2790 Reg = AMDGPU::NoRegister;
2791
2792 if (isToken(AsmToken::Identifier)) {
2793 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2794 if (Reg == AMDGPU::NoRegister)
2795 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2796 } else {
2797 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2798 }
2799
2800 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2801 if (Reg == AMDGPU::NoRegister) {
2802 assert(Parser.hasPendingError());
2803 return false;
2804 }
2805
2806 if (!subtargetHasRegister(*TRI, Reg)) {
2807 if (Reg == AMDGPU::SGPR_NULL) {
2808 Error(Loc, "'null' operand is not supported on this GPU");
2809 } else {
2810 Error(Loc, "register not available on this GPU");
2811 }
2812 return false;
2813 }
2814
2815 return true;
2816 }
2817
ParseAMDGPURegister(RegisterKind & RegKind,unsigned & Reg,unsigned & RegNum,unsigned & RegWidth,bool RestoreOnFailure)2818 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2819 unsigned &RegNum, unsigned &RegWidth,
2820 bool RestoreOnFailure /*=false*/) {
2821 Reg = AMDGPU::NoRegister;
2822
2823 SmallVector<AsmToken, 1> Tokens;
2824 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2825 if (RestoreOnFailure) {
2826 while (!Tokens.empty()) {
2827 getLexer().UnLex(Tokens.pop_back_val());
2828 }
2829 }
2830 return true;
2831 }
2832 return false;
2833 }
2834
2835 Optional<StringRef>
getGprCountSymbolName(RegisterKind RegKind)2836 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2837 switch (RegKind) {
2838 case IS_VGPR:
2839 return StringRef(".amdgcn.next_free_vgpr");
2840 case IS_SGPR:
2841 return StringRef(".amdgcn.next_free_sgpr");
2842 default:
2843 return None;
2844 }
2845 }
2846
initializeGprCountSymbol(RegisterKind RegKind)2847 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2848 auto SymbolName = getGprCountSymbolName(RegKind);
2849 assert(SymbolName && "initializing invalid register kind");
2850 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2851 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2852 }
2853
updateGprCountSymbols(RegisterKind RegKind,unsigned DwordRegIndex,unsigned RegWidth)2854 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2855 unsigned DwordRegIndex,
2856 unsigned RegWidth) {
2857 // Symbols are only defined for GCN targets
2858 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2859 return true;
2860
2861 auto SymbolName = getGprCountSymbolName(RegKind);
2862 if (!SymbolName)
2863 return true;
2864 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2865
2866 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2867 int64_t OldCount;
2868
2869 if (!Sym->isVariable())
2870 return !Error(getLoc(),
2871 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2872 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2873 return !Error(
2874 getLoc(),
2875 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2876
2877 if (OldCount <= NewMax)
2878 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2879
2880 return true;
2881 }
2882
2883 std::unique_ptr<AMDGPUOperand>
parseRegister(bool RestoreOnFailure)2884 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2885 const auto &Tok = getToken();
2886 SMLoc StartLoc = Tok.getLoc();
2887 SMLoc EndLoc = Tok.getEndLoc();
2888 RegisterKind RegKind;
2889 unsigned Reg, RegNum, RegWidth;
2890
2891 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2892 return nullptr;
2893 }
2894 if (isHsaAbiVersion3AndAbove(&getSTI())) {
2895 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2896 return nullptr;
2897 } else
2898 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2899 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2900 }
2901
2902 OperandMatchResultTy
parseImm(OperandVector & Operands,bool HasSP3AbsModifier)2903 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2904 // TODO: add syntactic sugar for 1/(2*PI)
2905
2906 if (isRegister())
2907 return MatchOperand_NoMatch;
2908 assert(!isModifier());
2909
2910 const auto& Tok = getToken();
2911 const auto& NextTok = peekToken();
2912 bool IsReal = Tok.is(AsmToken::Real);
2913 SMLoc S = getLoc();
2914 bool Negate = false;
2915
2916 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2917 lex();
2918 IsReal = true;
2919 Negate = true;
2920 }
2921
2922 if (IsReal) {
2923 // Floating-point expressions are not supported.
2924 // Can only allow floating-point literals with an
2925 // optional sign.
2926
2927 StringRef Num = getTokenStr();
2928 lex();
2929
2930 APFloat RealVal(APFloat::IEEEdouble());
2931 auto roundMode = APFloat::rmNearestTiesToEven;
2932 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2933 return MatchOperand_ParseFail;
2934 }
2935 if (Negate)
2936 RealVal.changeSign();
2937
2938 Operands.push_back(
2939 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2940 AMDGPUOperand::ImmTyNone, true));
2941
2942 return MatchOperand_Success;
2943
2944 } else {
2945 int64_t IntVal;
2946 const MCExpr *Expr;
2947 SMLoc S = getLoc();
2948
2949 if (HasSP3AbsModifier) {
2950 // This is a workaround for handling expressions
2951 // as arguments of SP3 'abs' modifier, for example:
2952 // |1.0|
2953 // |-1|
2954 // |1+x|
2955 // This syntax is not compatible with syntax of standard
2956 // MC expressions (due to the trailing '|').
2957 SMLoc EndLoc;
2958 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2959 return MatchOperand_ParseFail;
2960 } else {
2961 if (Parser.parseExpression(Expr))
2962 return MatchOperand_ParseFail;
2963 }
2964
2965 if (Expr->evaluateAsAbsolute(IntVal)) {
2966 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2967 } else {
2968 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2969 }
2970
2971 return MatchOperand_Success;
2972 }
2973
2974 return MatchOperand_NoMatch;
2975 }
2976
2977 OperandMatchResultTy
parseReg(OperandVector & Operands)2978 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2979 if (!isRegister())
2980 return MatchOperand_NoMatch;
2981
2982 if (auto R = parseRegister()) {
2983 assert(R->isReg());
2984 Operands.push_back(std::move(R));
2985 return MatchOperand_Success;
2986 }
2987 return MatchOperand_ParseFail;
2988 }
2989
2990 OperandMatchResultTy
parseRegOrImm(OperandVector & Operands,bool HasSP3AbsMod)2991 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2992 auto res = parseReg(Operands);
2993 if (res != MatchOperand_NoMatch) {
2994 return res;
2995 } else if (isModifier()) {
2996 return MatchOperand_NoMatch;
2997 } else {
2998 return parseImm(Operands, HasSP3AbsMod);
2999 }
3000 }
3001
3002 bool
isNamedOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const3003 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3004 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3005 const auto &str = Token.getString();
3006 return str == "abs" || str == "neg" || str == "sext";
3007 }
3008 return false;
3009 }
3010
3011 bool
isOpcodeModifierWithVal(const AsmToken & Token,const AsmToken & NextToken) const3012 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3013 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3014 }
3015
3016 bool
isOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const3017 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3018 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3019 }
3020
3021 bool
isRegOrOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const3022 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3023 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3024 }
3025
3026 // Check if this is an operand modifier or an opcode modifier
3027 // which may look like an expression but it is not. We should
3028 // avoid parsing these modifiers as expressions. Currently
3029 // recognized sequences are:
3030 // |...|
3031 // abs(...)
3032 // neg(...)
3033 // sext(...)
3034 // -reg
3035 // -|...|
3036 // -abs(...)
3037 // name:...
3038 // Note that simple opcode modifiers like 'gds' may be parsed as
3039 // expressions; this is a special case. See getExpressionAsToken.
3040 //
3041 bool
isModifier()3042 AMDGPUAsmParser::isModifier() {
3043
3044 AsmToken Tok = getToken();
3045 AsmToken NextToken[2];
3046 peekTokens(NextToken);
3047
3048 return isOperandModifier(Tok, NextToken[0]) ||
3049 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3050 isOpcodeModifierWithVal(Tok, NextToken[0]);
3051 }
3052
3053 // Check if the current token is an SP3 'neg' modifier.
3054 // Currently this modifier is allowed in the following context:
3055 //
3056 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3057 // 2. Before an 'abs' modifier: -abs(...)
3058 // 3. Before an SP3 'abs' modifier: -|...|
3059 //
3060 // In all other cases "-" is handled as a part
3061 // of an expression that follows the sign.
3062 //
3063 // Note: When "-" is followed by an integer literal,
3064 // this is interpreted as integer negation rather
3065 // than a floating-point NEG modifier applied to N.
3066 // Beside being contr-intuitive, such use of floating-point
3067 // NEG modifier would have resulted in different meaning
3068 // of integer literals used with VOP1/2/C and VOP3,
3069 // for example:
3070 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3071 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3072 // Negative fp literals with preceding "-" are
3073 // handled likewise for uniformity
3074 //
3075 bool
parseSP3NegModifier()3076 AMDGPUAsmParser::parseSP3NegModifier() {
3077
3078 AsmToken NextToken[2];
3079 peekTokens(NextToken);
3080
3081 if (isToken(AsmToken::Minus) &&
3082 (isRegister(NextToken[0], NextToken[1]) ||
3083 NextToken[0].is(AsmToken::Pipe) ||
3084 isId(NextToken[0], "abs"))) {
3085 lex();
3086 return true;
3087 }
3088
3089 return false;
3090 }
3091
3092 OperandMatchResultTy
parseRegOrImmWithFPInputMods(OperandVector & Operands,bool AllowImm)3093 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3094 bool AllowImm) {
3095 bool Neg, SP3Neg;
3096 bool Abs, SP3Abs;
3097 SMLoc Loc;
3098
3099 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3100 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3101 Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3102 return MatchOperand_ParseFail;
3103 }
3104
3105 SP3Neg = parseSP3NegModifier();
3106
3107 Loc = getLoc();
3108 Neg = trySkipId("neg");
3109 if (Neg && SP3Neg) {
3110 Error(Loc, "expected register or immediate");
3111 return MatchOperand_ParseFail;
3112 }
3113 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3114 return MatchOperand_ParseFail;
3115
3116 Abs = trySkipId("abs");
3117 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3118 return MatchOperand_ParseFail;
3119
3120 Loc = getLoc();
3121 SP3Abs = trySkipToken(AsmToken::Pipe);
3122 if (Abs && SP3Abs) {
3123 Error(Loc, "expected register or immediate");
3124 return MatchOperand_ParseFail;
3125 }
3126
3127 OperandMatchResultTy Res;
3128 if (AllowImm) {
3129 Res = parseRegOrImm(Operands, SP3Abs);
3130 } else {
3131 Res = parseReg(Operands);
3132 }
3133 if (Res != MatchOperand_Success) {
3134 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3135 }
3136
3137 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3138 return MatchOperand_ParseFail;
3139 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3140 return MatchOperand_ParseFail;
3141 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3142 return MatchOperand_ParseFail;
3143
3144 AMDGPUOperand::Modifiers Mods;
3145 Mods.Abs = Abs || SP3Abs;
3146 Mods.Neg = Neg || SP3Neg;
3147
3148 if (Mods.hasFPModifiers()) {
3149 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3150 if (Op.isExpr()) {
3151 Error(Op.getStartLoc(), "expected an absolute expression");
3152 return MatchOperand_ParseFail;
3153 }
3154 Op.setModifiers(Mods);
3155 }
3156 return MatchOperand_Success;
3157 }
3158
3159 OperandMatchResultTy
parseRegOrImmWithIntInputMods(OperandVector & Operands,bool AllowImm)3160 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3161 bool AllowImm) {
3162 bool Sext = trySkipId("sext");
3163 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3164 return MatchOperand_ParseFail;
3165
3166 OperandMatchResultTy Res;
3167 if (AllowImm) {
3168 Res = parseRegOrImm(Operands);
3169 } else {
3170 Res = parseReg(Operands);
3171 }
3172 if (Res != MatchOperand_Success) {
3173 return Sext? MatchOperand_ParseFail : Res;
3174 }
3175
3176 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3177 return MatchOperand_ParseFail;
3178
3179 AMDGPUOperand::Modifiers Mods;
3180 Mods.Sext = Sext;
3181
3182 if (Mods.hasIntModifiers()) {
3183 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3184 if (Op.isExpr()) {
3185 Error(Op.getStartLoc(), "expected an absolute expression");
3186 return MatchOperand_ParseFail;
3187 }
3188 Op.setModifiers(Mods);
3189 }
3190
3191 return MatchOperand_Success;
3192 }
3193
3194 OperandMatchResultTy
parseRegWithFPInputMods(OperandVector & Operands)3195 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3196 return parseRegOrImmWithFPInputMods(Operands, false);
3197 }
3198
3199 OperandMatchResultTy
parseRegWithIntInputMods(OperandVector & Operands)3200 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3201 return parseRegOrImmWithIntInputMods(Operands, false);
3202 }
3203
parseVReg32OrOff(OperandVector & Operands)3204 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3205 auto Loc = getLoc();
3206 if (trySkipId("off")) {
3207 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3208 AMDGPUOperand::ImmTyOff, false));
3209 return MatchOperand_Success;
3210 }
3211
3212 if (!isRegister())
3213 return MatchOperand_NoMatch;
3214
3215 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3216 if (Reg) {
3217 Operands.push_back(std::move(Reg));
3218 return MatchOperand_Success;
3219 }
3220
3221 return MatchOperand_ParseFail;
3222
3223 }
3224
checkTargetMatchPredicate(MCInst & Inst)3225 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3226 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3227
3228 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3229 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3230 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3231 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3232 return Match_InvalidOperand;
3233
3234 if ((TSFlags & SIInstrFlags::VOP3) &&
3235 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3236 getForcedEncodingSize() != 64)
3237 return Match_PreferE32;
3238
3239 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3240 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3241 // v_mac_f32/16 allow only dst_sel == DWORD;
3242 auto OpNum =
3243 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3244 const auto &Op = Inst.getOperand(OpNum);
3245 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3246 return Match_InvalidOperand;
3247 }
3248 }
3249
3250 return Match_Success;
3251 }
3252
getAllVariants()3253 static ArrayRef<unsigned> getAllVariants() {
3254 static const unsigned Variants[] = {
3255 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3256 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3257 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3258 };
3259
3260 return makeArrayRef(Variants);
3261 }
3262
3263 // What asm variants we should check
getMatchedVariants() const3264 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3265 if (isForcedDPP() && isForcedVOP3()) {
3266 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3267 return makeArrayRef(Variants);
3268 }
3269 if (getForcedEncodingSize() == 32) {
3270 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3271 return makeArrayRef(Variants);
3272 }
3273
3274 if (isForcedVOP3()) {
3275 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3276 return makeArrayRef(Variants);
3277 }
3278
3279 if (isForcedSDWA()) {
3280 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3281 AMDGPUAsmVariants::SDWA9};
3282 return makeArrayRef(Variants);
3283 }
3284
3285 if (isForcedDPP()) {
3286 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3287 return makeArrayRef(Variants);
3288 }
3289
3290 return getAllVariants();
3291 }
3292
getMatchedVariantName() const3293 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3294 if (isForcedDPP() && isForcedVOP3())
3295 return "e64_dpp";
3296
3297 if (getForcedEncodingSize() == 32)
3298 return "e32";
3299
3300 if (isForcedVOP3())
3301 return "e64";
3302
3303 if (isForcedSDWA())
3304 return "sdwa";
3305
3306 if (isForcedDPP())
3307 return "dpp";
3308
3309 return "";
3310 }
3311
findImplicitSGPRReadInVOP(const MCInst & Inst) const3312 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3313 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3314 const unsigned Num = Desc.getNumImplicitUses();
3315 for (unsigned i = 0; i < Num; ++i) {
3316 unsigned Reg = Desc.ImplicitUses[i];
3317 switch (Reg) {
3318 case AMDGPU::FLAT_SCR:
3319 case AMDGPU::VCC:
3320 case AMDGPU::VCC_LO:
3321 case AMDGPU::VCC_HI:
3322 case AMDGPU::M0:
3323 return Reg;
3324 default:
3325 break;
3326 }
3327 }
3328 return AMDGPU::NoRegister;
3329 }
3330
3331 // NB: This code is correct only when used to check constant
3332 // bus limitations because GFX7 support no f16 inline constants.
3333 // Note that there are no cases when a GFX7 opcode violates
3334 // constant bus limitations due to the use of an f16 constant.
isInlineConstant(const MCInst & Inst,unsigned OpIdx) const3335 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3336 unsigned OpIdx) const {
3337 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3338
3339 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3340 return false;
3341 }
3342
3343 const MCOperand &MO = Inst.getOperand(OpIdx);
3344
3345 int64_t Val = MO.getImm();
3346 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3347
3348 switch (OpSize) { // expected operand size
3349 case 8:
3350 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3351 case 4:
3352 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3353 case 2: {
3354 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3355 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3356 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3357 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3358 return AMDGPU::isInlinableIntLiteral(Val);
3359
3360 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3361 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3362 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3363 return AMDGPU::isInlinableIntLiteralV216(Val);
3364
3365 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3366 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3367 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3368 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3369
3370 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3371 }
3372 default:
3373 llvm_unreachable("invalid operand size");
3374 }
3375 }
3376
getConstantBusLimit(unsigned Opcode) const3377 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3378 if (!isGFX10Plus())
3379 return 1;
3380
3381 switch (Opcode) {
3382 // 64-bit shift instructions can use only one scalar value input
3383 case AMDGPU::V_LSHLREV_B64_e64:
3384 case AMDGPU::V_LSHLREV_B64_gfx10:
3385 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3386 case AMDGPU::V_LSHRREV_B64_e64:
3387 case AMDGPU::V_LSHRREV_B64_gfx10:
3388 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3389 case AMDGPU::V_ASHRREV_I64_e64:
3390 case AMDGPU::V_ASHRREV_I64_gfx10:
3391 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3392 case AMDGPU::V_LSHL_B64_e64:
3393 case AMDGPU::V_LSHR_B64_e64:
3394 case AMDGPU::V_ASHR_I64_e64:
3395 return 1;
3396 default:
3397 return 2;
3398 }
3399 }
3400
usesConstantBus(const MCInst & Inst,unsigned OpIdx)3401 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3402 const MCOperand &MO = Inst.getOperand(OpIdx);
3403 if (MO.isImm()) {
3404 return !isInlineConstant(Inst, OpIdx);
3405 } else if (MO.isReg()) {
3406 auto Reg = MO.getReg();
3407 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3408 auto PReg = mc2PseudoReg(Reg);
3409 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3410 } else {
3411 return true;
3412 }
3413 }
3414
3415 bool
validateConstantBusLimitations(const MCInst & Inst,const OperandVector & Operands)3416 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3417 const OperandVector &Operands) {
3418 const unsigned Opcode = Inst.getOpcode();
3419 const MCInstrDesc &Desc = MII.get(Opcode);
3420 unsigned LastSGPR = AMDGPU::NoRegister;
3421 unsigned ConstantBusUseCount = 0;
3422 unsigned NumLiterals = 0;
3423 unsigned LiteralSize;
3424
3425 if (Desc.TSFlags &
3426 (SIInstrFlags::VOPC |
3427 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3428 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3429 SIInstrFlags::SDWA)) {
3430 // Check special imm operands (used by madmk, etc)
3431 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3432 ++NumLiterals;
3433 LiteralSize = 4;
3434 }
3435
3436 SmallDenseSet<unsigned> SGPRsUsed;
3437 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3438 if (SGPRUsed != AMDGPU::NoRegister) {
3439 SGPRsUsed.insert(SGPRUsed);
3440 ++ConstantBusUseCount;
3441 }
3442
3443 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3444 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3445 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3446
3447 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3448
3449 for (int OpIdx : OpIndices) {
3450 if (OpIdx == -1) break;
3451
3452 const MCOperand &MO = Inst.getOperand(OpIdx);
3453 if (usesConstantBus(Inst, OpIdx)) {
3454 if (MO.isReg()) {
3455 LastSGPR = mc2PseudoReg(MO.getReg());
3456 // Pairs of registers with a partial intersections like these
3457 // s0, s[0:1]
3458 // flat_scratch_lo, flat_scratch
3459 // flat_scratch_lo, flat_scratch_hi
3460 // are theoretically valid but they are disabled anyway.
3461 // Note that this code mimics SIInstrInfo::verifyInstruction
3462 if (SGPRsUsed.insert(LastSGPR).second) {
3463 ++ConstantBusUseCount;
3464 }
3465 } else { // Expression or a literal
3466
3467 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3468 continue; // special operand like VINTERP attr_chan
3469
3470 // An instruction may use only one literal.
3471 // This has been validated on the previous step.
3472 // See validateVOPLiteral.
3473 // This literal may be used as more than one operand.
3474 // If all these operands are of the same size,
3475 // this literal counts as one scalar value.
3476 // Otherwise it counts as 2 scalar values.
3477 // See "GFX10 Shader Programming", section 3.6.2.3.
3478
3479 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3480 if (Size < 4) Size = 4;
3481
3482 if (NumLiterals == 0) {
3483 NumLiterals = 1;
3484 LiteralSize = Size;
3485 } else if (LiteralSize != Size) {
3486 NumLiterals = 2;
3487 }
3488 }
3489 }
3490 }
3491 }
3492 ConstantBusUseCount += NumLiterals;
3493
3494 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3495 return true;
3496
3497 SMLoc LitLoc = getLitLoc(Operands);
3498 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3499 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3500 Error(Loc, "invalid operand (violates constant bus restrictions)");
3501 return false;
3502 }
3503
3504 bool
validateEarlyClobberLimitations(const MCInst & Inst,const OperandVector & Operands)3505 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3506 const OperandVector &Operands) {
3507 const unsigned Opcode = Inst.getOpcode();
3508 const MCInstrDesc &Desc = MII.get(Opcode);
3509
3510 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3511 if (DstIdx == -1 ||
3512 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3513 return true;
3514 }
3515
3516 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3517
3518 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3519 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3520 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3521
3522 assert(DstIdx != -1);
3523 const MCOperand &Dst = Inst.getOperand(DstIdx);
3524 assert(Dst.isReg());
3525
3526 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3527
3528 for (int SrcIdx : SrcIndices) {
3529 if (SrcIdx == -1) break;
3530 const MCOperand &Src = Inst.getOperand(SrcIdx);
3531 if (Src.isReg()) {
3532 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3533 const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3534 Error(getRegLoc(SrcReg, Operands),
3535 "destination must be different than all sources");
3536 return false;
3537 }
3538 }
3539 }
3540
3541 return true;
3542 }
3543
validateIntClampSupported(const MCInst & Inst)3544 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3545
3546 const unsigned Opc = Inst.getOpcode();
3547 const MCInstrDesc &Desc = MII.get(Opc);
3548
3549 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3550 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3551 assert(ClampIdx != -1);
3552 return Inst.getOperand(ClampIdx).getImm() == 0;
3553 }
3554
3555 return true;
3556 }
3557
validateMIMGDataSize(const MCInst & Inst)3558 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3559
3560 const unsigned Opc = Inst.getOpcode();
3561 const MCInstrDesc &Desc = MII.get(Opc);
3562
3563 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3564 return None;
3565
3566 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3567 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3568 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3569
3570 assert(VDataIdx != -1);
3571
3572 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3573 return None;
3574
3575 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3576 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3577 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3578 if (DMask == 0)
3579 DMask = 1;
3580
3581 bool isPackedD16 = false;
3582 unsigned DataSize =
3583 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3584 if (hasPackedD16()) {
3585 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3586 isPackedD16 = D16Idx >= 0;
3587 if (isPackedD16 && Inst.getOperand(D16Idx).getImm())
3588 DataSize = (DataSize + 1) / 2;
3589 }
3590
3591 if ((VDataSize / 4) == DataSize + TFESize)
3592 return None;
3593
3594 return StringRef(isPackedD16
3595 ? "image data size does not match dmask, d16 and tfe"
3596 : "image data size does not match dmask and tfe");
3597 }
3598
validateMIMGAddrSize(const MCInst & Inst)3599 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3600 const unsigned Opc = Inst.getOpcode();
3601 const MCInstrDesc &Desc = MII.get(Opc);
3602
3603 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3604 return true;
3605
3606 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3607
3608 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3609 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3610 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3611 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3612 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3613 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3614
3615 assert(VAddr0Idx != -1);
3616 assert(SrsrcIdx != -1);
3617 assert(SrsrcIdx > VAddr0Idx);
3618
3619 if (DimIdx == -1)
3620 return true; // intersect_ray
3621
3622 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3623 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3624 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3625 unsigned ActualAddrSize =
3626 IsNSA ? SrsrcIdx - VAddr0Idx
3627 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3628 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3629
3630 unsigned ExpectedAddrSize =
3631 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3632
3633 if (!IsNSA) {
3634 if (ExpectedAddrSize > 8)
3635 ExpectedAddrSize = 16;
3636
3637 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3638 // This provides backward compatibility for assembly created
3639 // before 160b/192b/224b types were directly supported.
3640 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3641 return true;
3642 }
3643
3644 return ActualAddrSize == ExpectedAddrSize;
3645 }
3646
validateMIMGAtomicDMask(const MCInst & Inst)3647 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3648
3649 const unsigned Opc = Inst.getOpcode();
3650 const MCInstrDesc &Desc = MII.get(Opc);
3651
3652 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3653 return true;
3654 if (!Desc.mayLoad() || !Desc.mayStore())
3655 return true; // Not atomic
3656
3657 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3658 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3659
3660 // This is an incomplete check because image_atomic_cmpswap
3661 // may only use 0x3 and 0xf while other atomic operations
3662 // may use 0x1 and 0x3. However these limitations are
3663 // verified when we check that dmask matches dst size.
3664 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3665 }
3666
validateMIMGGatherDMask(const MCInst & Inst)3667 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3668
3669 const unsigned Opc = Inst.getOpcode();
3670 const MCInstrDesc &Desc = MII.get(Opc);
3671
3672 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3673 return true;
3674
3675 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3676 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3677
3678 // GATHER4 instructions use dmask in a different fashion compared to
3679 // other MIMG instructions. The only useful DMASK values are
3680 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3681 // (red,red,red,red) etc.) The ISA document doesn't mention
3682 // this.
3683 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3684 }
3685
validateMIMGMSAA(const MCInst & Inst)3686 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3687 const unsigned Opc = Inst.getOpcode();
3688 const MCInstrDesc &Desc = MII.get(Opc);
3689
3690 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3691 return true;
3692
3693 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3694 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3695 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3696
3697 if (!BaseOpcode->MSAA)
3698 return true;
3699
3700 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3701 assert(DimIdx != -1);
3702
3703 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3704 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3705
3706 return DimInfo->MSAA;
3707 }
3708
IsMovrelsSDWAOpcode(const unsigned Opcode)3709 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3710 {
3711 switch (Opcode) {
3712 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3713 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3714 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3715 return true;
3716 default:
3717 return false;
3718 }
3719 }
3720
3721 // movrels* opcodes should only allow VGPRS as src0.
3722 // This is specified in .td description for vop1/vop3,
3723 // but sdwa is handled differently. See isSDWAOperand.
validateMovrels(const MCInst & Inst,const OperandVector & Operands)3724 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3725 const OperandVector &Operands) {
3726
3727 const unsigned Opc = Inst.getOpcode();
3728 const MCInstrDesc &Desc = MII.get(Opc);
3729
3730 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3731 return true;
3732
3733 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3734 assert(Src0Idx != -1);
3735
3736 SMLoc ErrLoc;
3737 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3738 if (Src0.isReg()) {
3739 auto Reg = mc2PseudoReg(Src0.getReg());
3740 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3741 if (!isSGPR(Reg, TRI))
3742 return true;
3743 ErrLoc = getRegLoc(Reg, Operands);
3744 } else {
3745 ErrLoc = getConstLoc(Operands);
3746 }
3747
3748 Error(ErrLoc, "source operand must be a VGPR");
3749 return false;
3750 }
3751
validateMAIAccWrite(const MCInst & Inst,const OperandVector & Operands)3752 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3753 const OperandVector &Operands) {
3754
3755 const unsigned Opc = Inst.getOpcode();
3756
3757 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3758 return true;
3759
3760 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3761 assert(Src0Idx != -1);
3762
3763 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3764 if (!Src0.isReg())
3765 return true;
3766
3767 auto Reg = mc2PseudoReg(Src0.getReg());
3768 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3769 if (!isGFX90A() && isSGPR(Reg, TRI)) {
3770 Error(getRegLoc(Reg, Operands),
3771 "source operand must be either a VGPR or an inline constant");
3772 return false;
3773 }
3774
3775 return true;
3776 }
3777
validateMFMA(const MCInst & Inst,const OperandVector & Operands)3778 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3779 const OperandVector &Operands) {
3780 const unsigned Opc = Inst.getOpcode();
3781 const MCInstrDesc &Desc = MII.get(Opc);
3782
3783 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3784 return true;
3785
3786 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3787 if (Src2Idx == -1)
3788 return true;
3789
3790 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3791 if (!Src2.isReg())
3792 return true;
3793
3794 MCRegister Src2Reg = Src2.getReg();
3795 MCRegister DstReg = Inst.getOperand(0).getReg();
3796 if (Src2Reg == DstReg)
3797 return true;
3798
3799 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3800 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3801 return true;
3802
3803 if (TRI->regsOverlap(Src2Reg, DstReg)) {
3804 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3805 "source 2 operand must not partially overlap with dst");
3806 return false;
3807 }
3808
3809 return true;
3810 }
3811
validateDivScale(const MCInst & Inst)3812 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3813 switch (Inst.getOpcode()) {
3814 default:
3815 return true;
3816 case V_DIV_SCALE_F32_gfx6_gfx7:
3817 case V_DIV_SCALE_F32_vi:
3818 case V_DIV_SCALE_F32_gfx10:
3819 case V_DIV_SCALE_F64_gfx6_gfx7:
3820 case V_DIV_SCALE_F64_vi:
3821 case V_DIV_SCALE_F64_gfx10:
3822 break;
3823 }
3824
3825 // TODO: Check that src0 = src1 or src2.
3826
3827 for (auto Name : {AMDGPU::OpName::src0_modifiers,
3828 AMDGPU::OpName::src2_modifiers,
3829 AMDGPU::OpName::src2_modifiers}) {
3830 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3831 .getImm() &
3832 SISrcMods::ABS) {
3833 return false;
3834 }
3835 }
3836
3837 return true;
3838 }
3839
validateMIMGD16(const MCInst & Inst)3840 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3841
3842 const unsigned Opc = Inst.getOpcode();
3843 const MCInstrDesc &Desc = MII.get(Opc);
3844
3845 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3846 return true;
3847
3848 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3849 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3850 if (isCI() || isSI())
3851 return false;
3852 }
3853
3854 return true;
3855 }
3856
validateMIMGDim(const MCInst & Inst)3857 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3858 const unsigned Opc = Inst.getOpcode();
3859 const MCInstrDesc &Desc = MII.get(Opc);
3860
3861 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3862 return true;
3863
3864 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3865 if (DimIdx < 0)
3866 return true;
3867
3868 long Imm = Inst.getOperand(DimIdx).getImm();
3869 if (Imm < 0 || Imm >= 8)
3870 return false;
3871
3872 return true;
3873 }
3874
IsRevOpcode(const unsigned Opcode)3875 static bool IsRevOpcode(const unsigned Opcode)
3876 {
3877 switch (Opcode) {
3878 case AMDGPU::V_SUBREV_F32_e32:
3879 case AMDGPU::V_SUBREV_F32_e64:
3880 case AMDGPU::V_SUBREV_F32_e32_gfx10:
3881 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3882 case AMDGPU::V_SUBREV_F32_e32_vi:
3883 case AMDGPU::V_SUBREV_F32_e64_gfx10:
3884 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3885 case AMDGPU::V_SUBREV_F32_e64_vi:
3886
3887 case AMDGPU::V_SUBREV_CO_U32_e32:
3888 case AMDGPU::V_SUBREV_CO_U32_e64:
3889 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3890 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3891
3892 case AMDGPU::V_SUBBREV_U32_e32:
3893 case AMDGPU::V_SUBBREV_U32_e64:
3894 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3895 case AMDGPU::V_SUBBREV_U32_e32_vi:
3896 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3897 case AMDGPU::V_SUBBREV_U32_e64_vi:
3898
3899 case AMDGPU::V_SUBREV_U32_e32:
3900 case AMDGPU::V_SUBREV_U32_e64:
3901 case AMDGPU::V_SUBREV_U32_e32_gfx9:
3902 case AMDGPU::V_SUBREV_U32_e32_vi:
3903 case AMDGPU::V_SUBREV_U32_e64_gfx9:
3904 case AMDGPU::V_SUBREV_U32_e64_vi:
3905
3906 case AMDGPU::V_SUBREV_F16_e32:
3907 case AMDGPU::V_SUBREV_F16_e64:
3908 case AMDGPU::V_SUBREV_F16_e32_gfx10:
3909 case AMDGPU::V_SUBREV_F16_e32_vi:
3910 case AMDGPU::V_SUBREV_F16_e64_gfx10:
3911 case AMDGPU::V_SUBREV_F16_e64_vi:
3912
3913 case AMDGPU::V_SUBREV_U16_e32:
3914 case AMDGPU::V_SUBREV_U16_e64:
3915 case AMDGPU::V_SUBREV_U16_e32_vi:
3916 case AMDGPU::V_SUBREV_U16_e64_vi:
3917
3918 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3919 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3920 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3921
3922 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3923 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3924
3925 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3926 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3927
3928 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3929 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3930
3931 case AMDGPU::V_LSHRREV_B32_e32:
3932 case AMDGPU::V_LSHRREV_B32_e64:
3933 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3934 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3935 case AMDGPU::V_LSHRREV_B32_e32_vi:
3936 case AMDGPU::V_LSHRREV_B32_e64_vi:
3937 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3938 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3939
3940 case AMDGPU::V_ASHRREV_I32_e32:
3941 case AMDGPU::V_ASHRREV_I32_e64:
3942 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3943 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3944 case AMDGPU::V_ASHRREV_I32_e32_vi:
3945 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3946 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3947 case AMDGPU::V_ASHRREV_I32_e64_vi:
3948
3949 case AMDGPU::V_LSHLREV_B32_e32:
3950 case AMDGPU::V_LSHLREV_B32_e64:
3951 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3952 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3953 case AMDGPU::V_LSHLREV_B32_e32_vi:
3954 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3955 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3956 case AMDGPU::V_LSHLREV_B32_e64_vi:
3957
3958 case AMDGPU::V_LSHLREV_B16_e32:
3959 case AMDGPU::V_LSHLREV_B16_e64:
3960 case AMDGPU::V_LSHLREV_B16_e32_vi:
3961 case AMDGPU::V_LSHLREV_B16_e64_vi:
3962 case AMDGPU::V_LSHLREV_B16_gfx10:
3963
3964 case AMDGPU::V_LSHRREV_B16_e32:
3965 case AMDGPU::V_LSHRREV_B16_e64:
3966 case AMDGPU::V_LSHRREV_B16_e32_vi:
3967 case AMDGPU::V_LSHRREV_B16_e64_vi:
3968 case AMDGPU::V_LSHRREV_B16_gfx10:
3969
3970 case AMDGPU::V_ASHRREV_I16_e32:
3971 case AMDGPU::V_ASHRREV_I16_e64:
3972 case AMDGPU::V_ASHRREV_I16_e32_vi:
3973 case AMDGPU::V_ASHRREV_I16_e64_vi:
3974 case AMDGPU::V_ASHRREV_I16_gfx10:
3975
3976 case AMDGPU::V_LSHLREV_B64_e64:
3977 case AMDGPU::V_LSHLREV_B64_gfx10:
3978 case AMDGPU::V_LSHLREV_B64_vi:
3979
3980 case AMDGPU::V_LSHRREV_B64_e64:
3981 case AMDGPU::V_LSHRREV_B64_gfx10:
3982 case AMDGPU::V_LSHRREV_B64_vi:
3983
3984 case AMDGPU::V_ASHRREV_I64_e64:
3985 case AMDGPU::V_ASHRREV_I64_gfx10:
3986 case AMDGPU::V_ASHRREV_I64_vi:
3987
3988 case AMDGPU::V_PK_LSHLREV_B16:
3989 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3990 case AMDGPU::V_PK_LSHLREV_B16_vi:
3991
3992 case AMDGPU::V_PK_LSHRREV_B16:
3993 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3994 case AMDGPU::V_PK_LSHRREV_B16_vi:
3995 case AMDGPU::V_PK_ASHRREV_I16:
3996 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3997 case AMDGPU::V_PK_ASHRREV_I16_vi:
3998 return true;
3999 default:
4000 return false;
4001 }
4002 }
4003
validateLdsDirect(const MCInst & Inst)4004 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4005
4006 using namespace SIInstrFlags;
4007 const unsigned Opcode = Inst.getOpcode();
4008 const MCInstrDesc &Desc = MII.get(Opcode);
4009
4010 // lds_direct register is defined so that it can be used
4011 // with 9-bit operands only. Ignore encodings which do not accept these.
4012 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4013 if ((Desc.TSFlags & Enc) == 0)
4014 return None;
4015
4016 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4017 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4018 if (SrcIdx == -1)
4019 break;
4020 const auto &Src = Inst.getOperand(SrcIdx);
4021 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4022
4023 if (isGFX90A() || isGFX11Plus())
4024 return StringRef("lds_direct is not supported on this GPU");
4025
4026 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4027 return StringRef("lds_direct cannot be used with this instruction");
4028
4029 if (SrcName != OpName::src0)
4030 return StringRef("lds_direct may be used as src0 only");
4031 }
4032 }
4033
4034 return None;
4035 }
4036
getFlatOffsetLoc(const OperandVector & Operands) const4037 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4038 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4039 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4040 if (Op.isFlatOffset())
4041 return Op.getStartLoc();
4042 }
4043 return getLoc();
4044 }
4045
validateFlatOffset(const MCInst & Inst,const OperandVector & Operands)4046 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4047 const OperandVector &Operands) {
4048 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4049 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4050 return true;
4051
4052 auto Opcode = Inst.getOpcode();
4053 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4054 assert(OpNum != -1);
4055
4056 const auto &Op = Inst.getOperand(OpNum);
4057 if (!hasFlatOffsets() && Op.getImm() != 0) {
4058 Error(getFlatOffsetLoc(Operands),
4059 "flat offset modifier is not supported on this GPU");
4060 return false;
4061 }
4062
4063 // For FLAT segment the offset must be positive;
4064 // MSB is ignored and forced to zero.
4065 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
4066 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4067 if (!isIntN(OffsetSize, Op.getImm())) {
4068 Error(getFlatOffsetLoc(Operands),
4069 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4070 return false;
4071 }
4072 } else {
4073 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4074 if (!isUIntN(OffsetSize, Op.getImm())) {
4075 Error(getFlatOffsetLoc(Operands),
4076 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4077 return false;
4078 }
4079 }
4080
4081 return true;
4082 }
4083
getSMEMOffsetLoc(const OperandVector & Operands) const4084 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4085 // Start with second operand because SMEM Offset cannot be dst or src0.
4086 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4087 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4088 if (Op.isSMEMOffset())
4089 return Op.getStartLoc();
4090 }
4091 return getLoc();
4092 }
4093
validateSMEMOffset(const MCInst & Inst,const OperandVector & Operands)4094 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4095 const OperandVector &Operands) {
4096 if (isCI() || isSI())
4097 return true;
4098
4099 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4100 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4101 return true;
4102
4103 auto Opcode = Inst.getOpcode();
4104 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4105 if (OpNum == -1)
4106 return true;
4107
4108 const auto &Op = Inst.getOperand(OpNum);
4109 if (!Op.isImm())
4110 return true;
4111
4112 uint64_t Offset = Op.getImm();
4113 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4114 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4115 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4116 return true;
4117
4118 Error(getSMEMOffsetLoc(Operands),
4119 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4120 "expected a 21-bit signed offset");
4121
4122 return false;
4123 }
4124
validateSOPLiteral(const MCInst & Inst) const4125 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4126 unsigned Opcode = Inst.getOpcode();
4127 const MCInstrDesc &Desc = MII.get(Opcode);
4128 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4129 return true;
4130
4131 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4132 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4133
4134 const int OpIndices[] = { Src0Idx, Src1Idx };
4135
4136 unsigned NumExprs = 0;
4137 unsigned NumLiterals = 0;
4138 uint32_t LiteralValue;
4139
4140 for (int OpIdx : OpIndices) {
4141 if (OpIdx == -1) break;
4142
4143 const MCOperand &MO = Inst.getOperand(OpIdx);
4144 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4145 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4146 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4147 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4148 if (NumLiterals == 0 || LiteralValue != Value) {
4149 LiteralValue = Value;
4150 ++NumLiterals;
4151 }
4152 } else if (MO.isExpr()) {
4153 ++NumExprs;
4154 }
4155 }
4156 }
4157
4158 return NumLiterals + NumExprs <= 1;
4159 }
4160
validateOpSel(const MCInst & Inst)4161 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4162 const unsigned Opc = Inst.getOpcode();
4163 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4164 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4165 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4166 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4167
4168 if (OpSel & ~3)
4169 return false;
4170 }
4171
4172 uint64_t TSFlags = MII.get(Opc).TSFlags;
4173
4174 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4175 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4176 if (OpSelIdx != -1) {
4177 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4178 return false;
4179 }
4180 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4181 if (OpSelHiIdx != -1) {
4182 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4183 return false;
4184 }
4185 }
4186
4187 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4188 if ((TSFlags & SIInstrFlags::IsDOT) && (TSFlags & SIInstrFlags::VOP3) &&
4189 !(TSFlags & SIInstrFlags::VOP3P)) {
4190 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4191 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4192 if (OpSel & 3)
4193 return false;
4194 }
4195
4196 return true;
4197 }
4198
validateDPP(const MCInst & Inst,const OperandVector & Operands)4199 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4200 const OperandVector &Operands) {
4201 const unsigned Opc = Inst.getOpcode();
4202 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4203 if (DppCtrlIdx < 0)
4204 return true;
4205 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4206
4207 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4208 // DPP64 is supported for row_newbcast only.
4209 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4210 if (Src0Idx >= 0 &&
4211 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4212 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4213 Error(S, "64 bit dpp only supports row_newbcast");
4214 return false;
4215 }
4216 }
4217
4218 return true;
4219 }
4220
4221 // Check if VCC register matches wavefront size
validateVccOperand(unsigned Reg) const4222 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4223 auto FB = getFeatureBits();
4224 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4225 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4226 }
4227
4228 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
validateVOPLiteral(const MCInst & Inst,const OperandVector & Operands)4229 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4230 const OperandVector &Operands) {
4231 unsigned Opcode = Inst.getOpcode();
4232 const MCInstrDesc &Desc = MII.get(Opcode);
4233 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4234 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4235 ImmIdx == -1)
4236 return true;
4237
4238 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4239 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4240 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4241
4242 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4243
4244 unsigned NumExprs = 0;
4245 unsigned NumLiterals = 0;
4246 uint32_t LiteralValue;
4247
4248 for (int OpIdx : OpIndices) {
4249 if (OpIdx == -1)
4250 continue;
4251
4252 const MCOperand &MO = Inst.getOperand(OpIdx);
4253 if (!MO.isImm() && !MO.isExpr())
4254 continue;
4255 if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4256 continue;
4257
4258 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4259 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4260 Error(getConstLoc(Operands),
4261 "inline constants are not allowed for this operand");
4262 return false;
4263 }
4264
4265 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4266 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4267 if (NumLiterals == 0 || LiteralValue != Value) {
4268 LiteralValue = Value;
4269 ++NumLiterals;
4270 }
4271 } else if (MO.isExpr()) {
4272 ++NumExprs;
4273 }
4274 }
4275 NumLiterals += NumExprs;
4276
4277 if (!NumLiterals)
4278 return true;
4279
4280 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4281 Error(getLitLoc(Operands), "literal operands are not supported");
4282 return false;
4283 }
4284
4285 if (NumLiterals > 1) {
4286 Error(getLitLoc(Operands), "only one literal operand is allowed");
4287 return false;
4288 }
4289
4290 return true;
4291 }
4292
4293 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
IsAGPROperand(const MCInst & Inst,uint16_t NameIdx,const MCRegisterInfo * MRI)4294 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4295 const MCRegisterInfo *MRI) {
4296 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4297 if (OpIdx < 0)
4298 return -1;
4299
4300 const MCOperand &Op = Inst.getOperand(OpIdx);
4301 if (!Op.isReg())
4302 return -1;
4303
4304 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4305 auto Reg = Sub ? Sub : Op.getReg();
4306 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4307 return AGPR32.contains(Reg) ? 1 : 0;
4308 }
4309
validateAGPRLdSt(const MCInst & Inst) const4310 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4311 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4312 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4313 SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4314 SIInstrFlags::DS)) == 0)
4315 return true;
4316
4317 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4318 : AMDGPU::OpName::vdata;
4319
4320 const MCRegisterInfo *MRI = getMRI();
4321 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4322 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4323
4324 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4325 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4326 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4327 return false;
4328 }
4329
4330 auto FB = getFeatureBits();
4331 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4332 if (DataAreg < 0 || DstAreg < 0)
4333 return true;
4334 return DstAreg == DataAreg;
4335 }
4336
4337 return DstAreg < 1 && DataAreg < 1;
4338 }
4339
validateVGPRAlign(const MCInst & Inst) const4340 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4341 auto FB = getFeatureBits();
4342 if (!FB[AMDGPU::FeatureGFX90AInsts])
4343 return true;
4344
4345 const MCRegisterInfo *MRI = getMRI();
4346 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4347 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4348 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4349 const MCOperand &Op = Inst.getOperand(I);
4350 if (!Op.isReg())
4351 continue;
4352
4353 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4354 if (!Sub)
4355 continue;
4356
4357 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4358 return false;
4359 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4360 return false;
4361 }
4362
4363 return true;
4364 }
4365
getBLGPLoc(const OperandVector & Operands) const4366 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4367 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4368 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4369 if (Op.isBLGP())
4370 return Op.getStartLoc();
4371 }
4372 return SMLoc();
4373 }
4374
validateBLGP(const MCInst & Inst,const OperandVector & Operands)4375 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4376 const OperandVector &Operands) {
4377 unsigned Opc = Inst.getOpcode();
4378 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4379 if (BlgpIdx == -1)
4380 return true;
4381 SMLoc BLGPLoc = getBLGPLoc(Operands);
4382 if (!BLGPLoc.isValid())
4383 return true;
4384 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4385 auto FB = getFeatureBits();
4386 bool UsesNeg = false;
4387 if (FB[AMDGPU::FeatureGFX940Insts]) {
4388 switch (Opc) {
4389 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4390 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4391 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4392 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4393 UsesNeg = true;
4394 }
4395 }
4396
4397 if (IsNeg == UsesNeg)
4398 return true;
4399
4400 Error(BLGPLoc,
4401 UsesNeg ? "invalid modifier: blgp is not supported"
4402 : "invalid modifier: neg is not supported");
4403
4404 return false;
4405 }
4406
4407 // gfx90a has an undocumented limitation:
4408 // DS_GWS opcodes must use even aligned registers.
validateGWS(const MCInst & Inst,const OperandVector & Operands)4409 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4410 const OperandVector &Operands) {
4411 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4412 return true;
4413
4414 int Opc = Inst.getOpcode();
4415 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4416 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4417 return true;
4418
4419 const MCRegisterInfo *MRI = getMRI();
4420 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4421 int Data0Pos =
4422 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4423 assert(Data0Pos != -1);
4424 auto Reg = Inst.getOperand(Data0Pos).getReg();
4425 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4426 if (RegIdx & 1) {
4427 SMLoc RegLoc = getRegLoc(Reg, Operands);
4428 Error(RegLoc, "vgpr must be even aligned");
4429 return false;
4430 }
4431
4432 return true;
4433 }
4434
validateCoherencyBits(const MCInst & Inst,const OperandVector & Operands,const SMLoc & IDLoc)4435 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4436 const OperandVector &Operands,
4437 const SMLoc &IDLoc) {
4438 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4439 AMDGPU::OpName::cpol);
4440 if (CPolPos == -1)
4441 return true;
4442
4443 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4444
4445 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4446 if (TSFlags & SIInstrFlags::SMRD) {
4447 if (CPol && (isSI() || isCI())) {
4448 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4449 Error(S, "cache policy is not supported for SMRD instructions");
4450 return false;
4451 }
4452 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4453 Error(IDLoc, "invalid cache policy for SMEM instruction");
4454 return false;
4455 }
4456 }
4457
4458 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4459 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4460 StringRef CStr(S.getPointer());
4461 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4462 Error(S, "scc is not supported on this GPU");
4463 return false;
4464 }
4465
4466 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4467 return true;
4468
4469 if (TSFlags & SIInstrFlags::IsAtomicRet) {
4470 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4471 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4472 : "instruction must use glc");
4473 return false;
4474 }
4475 } else {
4476 if (CPol & CPol::GLC) {
4477 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4478 StringRef CStr(S.getPointer());
4479 S = SMLoc::getFromPointer(
4480 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4481 Error(S, isGFX940() ? "instruction must not use sc0"
4482 : "instruction must not use glc");
4483 return false;
4484 }
4485 }
4486
4487 return true;
4488 }
4489
validateFlatLdsDMA(const MCInst & Inst,const OperandVector & Operands,const SMLoc & IDLoc)4490 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst,
4491 const OperandVector &Operands,
4492 const SMLoc &IDLoc) {
4493 if (isGFX940())
4494 return true;
4495
4496 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4497 if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) !=
4498 (SIInstrFlags::VALU | SIInstrFlags::FLAT))
4499 return true;
4500 // This is FLAT LDS DMA.
4501
4502 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands);
4503 StringRef CStr(S.getPointer());
4504 if (!CStr.startswith("lds")) {
4505 // This is incorrectly selected LDS DMA version of a FLAT load opcode.
4506 // And LDS version should have 'lds' modifier, but it follows optional
4507 // operands so its absense is ignored by the matcher.
4508 Error(IDLoc, "invalid operands for instruction");
4509 return false;
4510 }
4511
4512 return true;
4513 }
4514
validateExeczVcczOperands(const OperandVector & Operands)4515 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4516 if (!isGFX11Plus())
4517 return true;
4518 for (auto &Operand : Operands) {
4519 if (!Operand->isReg())
4520 continue;
4521 unsigned Reg = Operand->getReg();
4522 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4523 Error(getRegLoc(Reg, Operands),
4524 "execz and vccz are not supported on this GPU");
4525 return false;
4526 }
4527 }
4528 return true;
4529 }
4530
validateInstruction(const MCInst & Inst,const SMLoc & IDLoc,const OperandVector & Operands)4531 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4532 const SMLoc &IDLoc,
4533 const OperandVector &Operands) {
4534 if (auto ErrMsg = validateLdsDirect(Inst)) {
4535 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4536 return false;
4537 }
4538 if (!validateSOPLiteral(Inst)) {
4539 Error(getLitLoc(Operands),
4540 "only one literal operand is allowed");
4541 return false;
4542 }
4543 if (!validateVOPLiteral(Inst, Operands)) {
4544 return false;
4545 }
4546 if (!validateConstantBusLimitations(Inst, Operands)) {
4547 return false;
4548 }
4549 if (!validateEarlyClobberLimitations(Inst, Operands)) {
4550 return false;
4551 }
4552 if (!validateIntClampSupported(Inst)) {
4553 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4554 "integer clamping is not supported on this GPU");
4555 return false;
4556 }
4557 if (!validateOpSel(Inst)) {
4558 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4559 "invalid op_sel operand");
4560 return false;
4561 }
4562 if (!validateDPP(Inst, Operands)) {
4563 return false;
4564 }
4565 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4566 if (!validateMIMGD16(Inst)) {
4567 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4568 "d16 modifier is not supported on this GPU");
4569 return false;
4570 }
4571 if (!validateMIMGDim(Inst)) {
4572 Error(IDLoc, "dim modifier is required on this GPU");
4573 return false;
4574 }
4575 if (!validateMIMGMSAA(Inst)) {
4576 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4577 "invalid dim; must be MSAA type");
4578 return false;
4579 }
4580 if (auto ErrMsg = validateMIMGDataSize(Inst)) {
4581 Error(IDLoc, *ErrMsg);
4582 return false;
4583 }
4584 if (!validateMIMGAddrSize(Inst)) {
4585 Error(IDLoc,
4586 "image address size does not match dim and a16");
4587 return false;
4588 }
4589 if (!validateMIMGAtomicDMask(Inst)) {
4590 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4591 "invalid atomic image dmask");
4592 return false;
4593 }
4594 if (!validateMIMGGatherDMask(Inst)) {
4595 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4596 "invalid image_gather dmask: only one bit must be set");
4597 return false;
4598 }
4599 if (!validateMovrels(Inst, Operands)) {
4600 return false;
4601 }
4602 if (!validateFlatOffset(Inst, Operands)) {
4603 return false;
4604 }
4605 if (!validateSMEMOffset(Inst, Operands)) {
4606 return false;
4607 }
4608 if (!validateMAIAccWrite(Inst, Operands)) {
4609 return false;
4610 }
4611 if (!validateMFMA(Inst, Operands)) {
4612 return false;
4613 }
4614 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4615 return false;
4616 }
4617
4618 if (!validateAGPRLdSt(Inst)) {
4619 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4620 ? "invalid register class: data and dst should be all VGPR or AGPR"
4621 : "invalid register class: agpr loads and stores not supported on this GPU"
4622 );
4623 return false;
4624 }
4625 if (!validateVGPRAlign(Inst)) {
4626 Error(IDLoc,
4627 "invalid register class: vgpr tuples must be 64 bit aligned");
4628 return false;
4629 }
4630 if (!validateGWS(Inst, Operands)) {
4631 return false;
4632 }
4633
4634 if (!validateBLGP(Inst, Operands)) {
4635 return false;
4636 }
4637
4638 if (!validateDivScale(Inst)) {
4639 Error(IDLoc, "ABS not allowed in VOP3B instructions");
4640 return false;
4641 }
4642 if (!validateExeczVcczOperands(Operands)) {
4643 return false;
4644 }
4645
4646 if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) {
4647 return false;
4648 }
4649
4650 return true;
4651 }
4652
4653 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4654 const FeatureBitset &FBS,
4655 unsigned VariantID = 0);
4656
4657 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4658 const FeatureBitset &AvailableFeatures,
4659 unsigned VariantID);
4660
isSupportedMnemo(StringRef Mnemo,const FeatureBitset & FBS)4661 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4662 const FeatureBitset &FBS) {
4663 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4664 }
4665
isSupportedMnemo(StringRef Mnemo,const FeatureBitset & FBS,ArrayRef<unsigned> Variants)4666 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4667 const FeatureBitset &FBS,
4668 ArrayRef<unsigned> Variants) {
4669 for (auto Variant : Variants) {
4670 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4671 return true;
4672 }
4673
4674 return false;
4675 }
4676
checkUnsupportedInstruction(StringRef Mnemo,const SMLoc & IDLoc)4677 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4678 const SMLoc &IDLoc) {
4679 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4680
4681 // Check if requested instruction variant is supported.
4682 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4683 return false;
4684
4685 // This instruction is not supported.
4686 // Clear any other pending errors because they are no longer relevant.
4687 getParser().clearPendingErrors();
4688
4689 // Requested instruction variant is not supported.
4690 // Check if any other variants are supported.
4691 StringRef VariantName = getMatchedVariantName();
4692 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4693 return Error(IDLoc,
4694 Twine(VariantName,
4695 " variant of this instruction is not supported"));
4696 }
4697
4698 // Finally check if this instruction is supported on any other GPU.
4699 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4700 return Error(IDLoc, "instruction not supported on this GPU");
4701 }
4702
4703 // Instruction not supported on any GPU. Probably a typo.
4704 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4705 return Error(IDLoc, "invalid instruction" + Suggestion);
4706 }
4707
MatchAndEmitInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)4708 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4709 OperandVector &Operands,
4710 MCStreamer &Out,
4711 uint64_t &ErrorInfo,
4712 bool MatchingInlineAsm) {
4713 MCInst Inst;
4714 unsigned Result = Match_Success;
4715 for (auto Variant : getMatchedVariants()) {
4716 uint64_t EI;
4717 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4718 Variant);
4719 // We order match statuses from least to most specific. We use most specific
4720 // status as resulting
4721 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4722 if ((R == Match_Success) ||
4723 (R == Match_PreferE32) ||
4724 (R == Match_MissingFeature && Result != Match_PreferE32) ||
4725 (R == Match_InvalidOperand && Result != Match_MissingFeature
4726 && Result != Match_PreferE32) ||
4727 (R == Match_MnemonicFail && Result != Match_InvalidOperand
4728 && Result != Match_MissingFeature
4729 && Result != Match_PreferE32)) {
4730 Result = R;
4731 ErrorInfo = EI;
4732 }
4733 if (R == Match_Success)
4734 break;
4735 }
4736
4737 if (Result == Match_Success) {
4738 if (!validateInstruction(Inst, IDLoc, Operands)) {
4739 return true;
4740 }
4741 Inst.setLoc(IDLoc);
4742 Out.emitInstruction(Inst, getSTI());
4743 return false;
4744 }
4745
4746 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4747 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4748 return true;
4749 }
4750
4751 switch (Result) {
4752 default: break;
4753 case Match_MissingFeature:
4754 // It has been verified that the specified instruction
4755 // mnemonic is valid. A match was found but it requires
4756 // features which are not supported on this GPU.
4757 return Error(IDLoc, "operands are not valid for this GPU or mode");
4758
4759 case Match_InvalidOperand: {
4760 SMLoc ErrorLoc = IDLoc;
4761 if (ErrorInfo != ~0ULL) {
4762 if (ErrorInfo >= Operands.size()) {
4763 return Error(IDLoc, "too few operands for instruction");
4764 }
4765 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4766 if (ErrorLoc == SMLoc())
4767 ErrorLoc = IDLoc;
4768 }
4769 return Error(ErrorLoc, "invalid operand for instruction");
4770 }
4771
4772 case Match_PreferE32:
4773 return Error(IDLoc, "internal error: instruction without _e64 suffix "
4774 "should be encoded as e32");
4775 case Match_MnemonicFail:
4776 llvm_unreachable("Invalid instructions should have been handled already");
4777 }
4778 llvm_unreachable("Implement any new match types added!");
4779 }
4780
ParseAsAbsoluteExpression(uint32_t & Ret)4781 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4782 int64_t Tmp = -1;
4783 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4784 return true;
4785 }
4786 if (getParser().parseAbsoluteExpression(Tmp)) {
4787 return true;
4788 }
4789 Ret = static_cast<uint32_t>(Tmp);
4790 return false;
4791 }
4792
ParseDirectiveMajorMinor(uint32_t & Major,uint32_t & Minor)4793 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4794 uint32_t &Minor) {
4795 if (ParseAsAbsoluteExpression(Major))
4796 return TokError("invalid major version");
4797
4798 if (!trySkipToken(AsmToken::Comma))
4799 return TokError("minor version number required, comma expected");
4800
4801 if (ParseAsAbsoluteExpression(Minor))
4802 return TokError("invalid minor version");
4803
4804 return false;
4805 }
4806
ParseDirectiveAMDGCNTarget()4807 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4808 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4809 return TokError("directive only supported for amdgcn architecture");
4810
4811 std::string TargetIDDirective;
4812 SMLoc TargetStart = getTok().getLoc();
4813 if (getParser().parseEscapedString(TargetIDDirective))
4814 return true;
4815
4816 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4817 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4818 return getParser().Error(TargetRange.Start,
4819 (Twine(".amdgcn_target directive's target id ") +
4820 Twine(TargetIDDirective) +
4821 Twine(" does not match the specified target id ") +
4822 Twine(getTargetStreamer().getTargetID()->toString())).str());
4823
4824 return false;
4825 }
4826
OutOfRangeError(SMRange Range)4827 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4828 return Error(Range.Start, "value out of range", Range);
4829 }
4830
calculateGPRBlocks(const FeatureBitset & Features,bool VCCUsed,bool FlatScrUsed,bool XNACKUsed,Optional<bool> EnableWavefrontSize32,unsigned NextFreeVGPR,SMRange VGPRRange,unsigned NextFreeSGPR,SMRange SGPRRange,unsigned & VGPRBlocks,unsigned & SGPRBlocks)4831 bool AMDGPUAsmParser::calculateGPRBlocks(
4832 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4833 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4834 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4835 unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4836 // TODO(scott.linder): These calculations are duplicated from
4837 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4838 IsaVersion Version = getIsaVersion(getSTI().getCPU());
4839
4840 unsigned NumVGPRs = NextFreeVGPR;
4841 unsigned NumSGPRs = NextFreeSGPR;
4842
4843 if (Version.Major >= 10)
4844 NumSGPRs = 0;
4845 else {
4846 unsigned MaxAddressableNumSGPRs =
4847 IsaInfo::getAddressableNumSGPRs(&getSTI());
4848
4849 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4850 NumSGPRs > MaxAddressableNumSGPRs)
4851 return OutOfRangeError(SGPRRange);
4852
4853 NumSGPRs +=
4854 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4855
4856 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4857 NumSGPRs > MaxAddressableNumSGPRs)
4858 return OutOfRangeError(SGPRRange);
4859
4860 if (Features.test(FeatureSGPRInitBug))
4861 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4862 }
4863
4864 VGPRBlocks =
4865 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4866 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4867
4868 return false;
4869 }
4870
ParseDirectiveAMDHSAKernel()4871 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4872 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4873 return TokError("directive only supported for amdgcn architecture");
4874
4875 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4876 return TokError("directive only supported for amdhsa OS");
4877
4878 StringRef KernelName;
4879 if (getParser().parseIdentifier(KernelName))
4880 return true;
4881
4882 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4883
4884 StringSet<> Seen;
4885
4886 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4887
4888 SMRange VGPRRange;
4889 uint64_t NextFreeVGPR = 0;
4890 uint64_t AccumOffset = 0;
4891 uint64_t SharedVGPRCount = 0;
4892 SMRange SGPRRange;
4893 uint64_t NextFreeSGPR = 0;
4894
4895 // Count the number of user SGPRs implied from the enabled feature bits.
4896 unsigned ImpliedUserSGPRCount = 0;
4897
4898 // Track if the asm explicitly contains the directive for the user SGPR
4899 // count.
4900 Optional<unsigned> ExplicitUserSGPRCount;
4901 bool ReserveVCC = true;
4902 bool ReserveFlatScr = true;
4903 Optional<bool> EnableWavefrontSize32;
4904
4905 while (true) {
4906 while (trySkipToken(AsmToken::EndOfStatement));
4907
4908 StringRef ID;
4909 SMRange IDRange = getTok().getLocRange();
4910 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4911 return true;
4912
4913 if (ID == ".end_amdhsa_kernel")
4914 break;
4915
4916 if (!Seen.insert(ID).second)
4917 return TokError(".amdhsa_ directives cannot be repeated");
4918
4919 SMLoc ValStart = getLoc();
4920 int64_t IVal;
4921 if (getParser().parseAbsoluteExpression(IVal))
4922 return true;
4923 SMLoc ValEnd = getLoc();
4924 SMRange ValRange = SMRange(ValStart, ValEnd);
4925
4926 if (IVal < 0)
4927 return OutOfRangeError(ValRange);
4928
4929 uint64_t Val = IVal;
4930
4931 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
4932 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
4933 return OutOfRangeError(RANGE); \
4934 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4935
4936 if (ID == ".amdhsa_group_segment_fixed_size") {
4937 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4938 return OutOfRangeError(ValRange);
4939 KD.group_segment_fixed_size = Val;
4940 } else if (ID == ".amdhsa_private_segment_fixed_size") {
4941 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4942 return OutOfRangeError(ValRange);
4943 KD.private_segment_fixed_size = Val;
4944 } else if (ID == ".amdhsa_kernarg_size") {
4945 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4946 return OutOfRangeError(ValRange);
4947 KD.kernarg_size = Val;
4948 } else if (ID == ".amdhsa_user_sgpr_count") {
4949 ExplicitUserSGPRCount = Val;
4950 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4951 if (hasArchitectedFlatScratch())
4952 return Error(IDRange.Start,
4953 "directive is not supported with architected flat scratch",
4954 IDRange);
4955 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4956 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4957 Val, ValRange);
4958 if (Val)
4959 ImpliedUserSGPRCount += 4;
4960 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4961 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4962 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4963 ValRange);
4964 if (Val)
4965 ImpliedUserSGPRCount += 2;
4966 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4967 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4968 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4969 ValRange);
4970 if (Val)
4971 ImpliedUserSGPRCount += 2;
4972 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4973 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4974 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4975 Val, ValRange);
4976 if (Val)
4977 ImpliedUserSGPRCount += 2;
4978 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4979 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4980 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4981 ValRange);
4982 if (Val)
4983 ImpliedUserSGPRCount += 2;
4984 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4985 if (hasArchitectedFlatScratch())
4986 return Error(IDRange.Start,
4987 "directive is not supported with architected flat scratch",
4988 IDRange);
4989 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4990 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4991 ValRange);
4992 if (Val)
4993 ImpliedUserSGPRCount += 2;
4994 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4995 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4996 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4997 Val, ValRange);
4998 if (Val)
4999 ImpliedUserSGPRCount += 1;
5000 } else if (ID == ".amdhsa_wavefront_size32") {
5001 if (IVersion.Major < 10)
5002 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5003 EnableWavefrontSize32 = Val;
5004 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5005 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5006 Val, ValRange);
5007 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5008 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5009 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5010 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5011 if (hasArchitectedFlatScratch())
5012 return Error(IDRange.Start,
5013 "directive is not supported with architected flat scratch",
5014 IDRange);
5015 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5016 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5017 } else if (ID == ".amdhsa_enable_private_segment") {
5018 if (!hasArchitectedFlatScratch())
5019 return Error(
5020 IDRange.Start,
5021 "directive is not supported without architected flat scratch",
5022 IDRange);
5023 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5024 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5025 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5026 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5027 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5028 ValRange);
5029 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5030 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5031 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5032 ValRange);
5033 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5034 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5035 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5036 ValRange);
5037 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5038 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5039 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5040 ValRange);
5041 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5042 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5043 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5044 ValRange);
5045 } else if (ID == ".amdhsa_next_free_vgpr") {
5046 VGPRRange = ValRange;
5047 NextFreeVGPR = Val;
5048 } else if (ID == ".amdhsa_next_free_sgpr") {
5049 SGPRRange = ValRange;
5050 NextFreeSGPR = Val;
5051 } else if (ID == ".amdhsa_accum_offset") {
5052 if (!isGFX90A())
5053 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5054 AccumOffset = Val;
5055 } else if (ID == ".amdhsa_reserve_vcc") {
5056 if (!isUInt<1>(Val))
5057 return OutOfRangeError(ValRange);
5058 ReserveVCC = Val;
5059 } else if (ID == ".amdhsa_reserve_flat_scratch") {
5060 if (IVersion.Major < 7)
5061 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5062 if (hasArchitectedFlatScratch())
5063 return Error(IDRange.Start,
5064 "directive is not supported with architected flat scratch",
5065 IDRange);
5066 if (!isUInt<1>(Val))
5067 return OutOfRangeError(ValRange);
5068 ReserveFlatScr = Val;
5069 } else if (ID == ".amdhsa_reserve_xnack_mask") {
5070 if (IVersion.Major < 8)
5071 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5072 if (!isUInt<1>(Val))
5073 return OutOfRangeError(ValRange);
5074 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5075 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5076 IDRange);
5077 } else if (ID == ".amdhsa_float_round_mode_32") {
5078 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5079 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5080 } else if (ID == ".amdhsa_float_round_mode_16_64") {
5081 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5082 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5083 } else if (ID == ".amdhsa_float_denorm_mode_32") {
5084 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5085 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5086 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5087 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5088 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5089 ValRange);
5090 } else if (ID == ".amdhsa_dx10_clamp") {
5091 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5092 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5093 } else if (ID == ".amdhsa_ieee_mode") {
5094 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5095 Val, ValRange);
5096 } else if (ID == ".amdhsa_fp16_overflow") {
5097 if (IVersion.Major < 9)
5098 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5099 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5100 ValRange);
5101 } else if (ID == ".amdhsa_tg_split") {
5102 if (!isGFX90A())
5103 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5104 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5105 ValRange);
5106 } else if (ID == ".amdhsa_workgroup_processor_mode") {
5107 if (IVersion.Major < 10)
5108 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5109 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5110 ValRange);
5111 } else if (ID == ".amdhsa_memory_ordered") {
5112 if (IVersion.Major < 10)
5113 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5114 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5115 ValRange);
5116 } else if (ID == ".amdhsa_forward_progress") {
5117 if (IVersion.Major < 10)
5118 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5119 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5120 ValRange);
5121 } else if (ID == ".amdhsa_shared_vgpr_count") {
5122 if (IVersion.Major < 10)
5123 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5124 SharedVGPRCount = Val;
5125 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5126 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
5127 ValRange);
5128 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5129 PARSE_BITS_ENTRY(
5130 KD.compute_pgm_rsrc2,
5131 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5132 ValRange);
5133 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5134 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5135 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5136 Val, ValRange);
5137 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5138 PARSE_BITS_ENTRY(
5139 KD.compute_pgm_rsrc2,
5140 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5141 ValRange);
5142 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5143 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5144 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5145 Val, ValRange);
5146 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5147 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5148 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5149 Val, ValRange);
5150 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5151 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5152 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5153 Val, ValRange);
5154 } else if (ID == ".amdhsa_exception_int_div_zero") {
5155 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5156 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5157 Val, ValRange);
5158 } else {
5159 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5160 }
5161
5162 #undef PARSE_BITS_ENTRY
5163 }
5164
5165 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5166 return TokError(".amdhsa_next_free_vgpr directive is required");
5167
5168 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5169 return TokError(".amdhsa_next_free_sgpr directive is required");
5170
5171 unsigned VGPRBlocks;
5172 unsigned SGPRBlocks;
5173 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5174 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5175 EnableWavefrontSize32, NextFreeVGPR,
5176 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5177 SGPRBlocks))
5178 return true;
5179
5180 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5181 VGPRBlocks))
5182 return OutOfRangeError(VGPRRange);
5183 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5184 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5185
5186 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5187 SGPRBlocks))
5188 return OutOfRangeError(SGPRRange);
5189 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5190 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5191 SGPRBlocks);
5192
5193 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5194 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5195 "enabled user SGPRs");
5196
5197 unsigned UserSGPRCount =
5198 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5199
5200 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5201 return TokError("too many user SGPRs enabled");
5202 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5203 UserSGPRCount);
5204
5205 if (isGFX90A()) {
5206 if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5207 return TokError(".amdhsa_accum_offset directive is required");
5208 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5209 return TokError("accum_offset should be in range [4..256] in "
5210 "increments of 4");
5211 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5212 return TokError("accum_offset exceeds total VGPR allocation");
5213 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5214 (AccumOffset / 4 - 1));
5215 }
5216
5217 if (IVersion.Major == 10) {
5218 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5219 if (SharedVGPRCount && EnableWavefrontSize32) {
5220 return TokError("shared_vgpr_count directive not valid on "
5221 "wavefront size 32");
5222 }
5223 if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5224 return TokError("shared_vgpr_count*2 + "
5225 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5226 "exceed 63\n");
5227 }
5228 }
5229
5230 getTargetStreamer().EmitAmdhsaKernelDescriptor(
5231 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5232 ReserveFlatScr);
5233 return false;
5234 }
5235
ParseDirectiveHSACodeObjectVersion()5236 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5237 uint32_t Major;
5238 uint32_t Minor;
5239
5240 if (ParseDirectiveMajorMinor(Major, Minor))
5241 return true;
5242
5243 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5244 return false;
5245 }
5246
ParseDirectiveHSACodeObjectISA()5247 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5248 uint32_t Major;
5249 uint32_t Minor;
5250 uint32_t Stepping;
5251 StringRef VendorName;
5252 StringRef ArchName;
5253
5254 // If this directive has no arguments, then use the ISA version for the
5255 // targeted GPU.
5256 if (isToken(AsmToken::EndOfStatement)) {
5257 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5258 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5259 ISA.Stepping,
5260 "AMD", "AMDGPU");
5261 return false;
5262 }
5263
5264 if (ParseDirectiveMajorMinor(Major, Minor))
5265 return true;
5266
5267 if (!trySkipToken(AsmToken::Comma))
5268 return TokError("stepping version number required, comma expected");
5269
5270 if (ParseAsAbsoluteExpression(Stepping))
5271 return TokError("invalid stepping version");
5272
5273 if (!trySkipToken(AsmToken::Comma))
5274 return TokError("vendor name required, comma expected");
5275
5276 if (!parseString(VendorName, "invalid vendor name"))
5277 return true;
5278
5279 if (!trySkipToken(AsmToken::Comma))
5280 return TokError("arch name required, comma expected");
5281
5282 if (!parseString(ArchName, "invalid arch name"))
5283 return true;
5284
5285 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5286 VendorName, ArchName);
5287 return false;
5288 }
5289
ParseAMDKernelCodeTValue(StringRef ID,amd_kernel_code_t & Header)5290 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5291 amd_kernel_code_t &Header) {
5292 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5293 // assembly for backwards compatibility.
5294 if (ID == "max_scratch_backing_memory_byte_size") {
5295 Parser.eatToEndOfStatement();
5296 return false;
5297 }
5298
5299 SmallString<40> ErrStr;
5300 raw_svector_ostream Err(ErrStr);
5301 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5302 return TokError(Err.str());
5303 }
5304 Lex();
5305
5306 if (ID == "enable_wavefront_size32") {
5307 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5308 if (!isGFX10Plus())
5309 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5310 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5311 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5312 } else {
5313 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5314 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5315 }
5316 }
5317
5318 if (ID == "wavefront_size") {
5319 if (Header.wavefront_size == 5) {
5320 if (!isGFX10Plus())
5321 return TokError("wavefront_size=5 is only allowed on GFX10+");
5322 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5323 return TokError("wavefront_size=5 requires +WavefrontSize32");
5324 } else if (Header.wavefront_size == 6) {
5325 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5326 return TokError("wavefront_size=6 requires +WavefrontSize64");
5327 }
5328 }
5329
5330 if (ID == "enable_wgp_mode") {
5331 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5332 !isGFX10Plus())
5333 return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5334 }
5335
5336 if (ID == "enable_mem_ordered") {
5337 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5338 !isGFX10Plus())
5339 return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5340 }
5341
5342 if (ID == "enable_fwd_progress") {
5343 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5344 !isGFX10Plus())
5345 return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5346 }
5347
5348 return false;
5349 }
5350
ParseDirectiveAMDKernelCodeT()5351 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5352 amd_kernel_code_t Header;
5353 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5354
5355 while (true) {
5356 // Lex EndOfStatement. This is in a while loop, because lexing a comment
5357 // will set the current token to EndOfStatement.
5358 while(trySkipToken(AsmToken::EndOfStatement));
5359
5360 StringRef ID;
5361 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5362 return true;
5363
5364 if (ID == ".end_amd_kernel_code_t")
5365 break;
5366
5367 if (ParseAMDKernelCodeTValue(ID, Header))
5368 return true;
5369 }
5370
5371 getTargetStreamer().EmitAMDKernelCodeT(Header);
5372
5373 return false;
5374 }
5375
ParseDirectiveAMDGPUHsaKernel()5376 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5377 StringRef KernelName;
5378 if (!parseId(KernelName, "expected symbol name"))
5379 return true;
5380
5381 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5382 ELF::STT_AMDGPU_HSA_KERNEL);
5383
5384 KernelScope.initialize(getContext());
5385 return false;
5386 }
5387
ParseDirectiveISAVersion()5388 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5389 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5390 return Error(getLoc(),
5391 ".amd_amdgpu_isa directive is not available on non-amdgcn "
5392 "architectures");
5393 }
5394
5395 auto TargetIDDirective = getLexer().getTok().getStringContents();
5396 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5397 return Error(getParser().getTok().getLoc(), "target id must match options");
5398
5399 getTargetStreamer().EmitISAVersion();
5400 Lex();
5401
5402 return false;
5403 }
5404
ParseDirectiveHSAMetadata()5405 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5406 const char *AssemblerDirectiveBegin;
5407 const char *AssemblerDirectiveEnd;
5408 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5409 isHsaAbiVersion3AndAbove(&getSTI())
5410 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5411 HSAMD::V3::AssemblerDirectiveEnd)
5412 : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5413 HSAMD::AssemblerDirectiveEnd);
5414
5415 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5416 return Error(getLoc(),
5417 (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5418 "not available on non-amdhsa OSes")).str());
5419 }
5420
5421 std::string HSAMetadataString;
5422 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5423 HSAMetadataString))
5424 return true;
5425
5426 if (isHsaAbiVersion3AndAbove(&getSTI())) {
5427 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5428 return Error(getLoc(), "invalid HSA metadata");
5429 } else {
5430 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5431 return Error(getLoc(), "invalid HSA metadata");
5432 }
5433
5434 return false;
5435 }
5436
5437 /// Common code to parse out a block of text (typically YAML) between start and
5438 /// end directives.
ParseToEndDirective(const char * AssemblerDirectiveBegin,const char * AssemblerDirectiveEnd,std::string & CollectString)5439 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5440 const char *AssemblerDirectiveEnd,
5441 std::string &CollectString) {
5442
5443 raw_string_ostream CollectStream(CollectString);
5444
5445 getLexer().setSkipSpace(false);
5446
5447 bool FoundEnd = false;
5448 while (!isToken(AsmToken::Eof)) {
5449 while (isToken(AsmToken::Space)) {
5450 CollectStream << getTokenStr();
5451 Lex();
5452 }
5453
5454 if (trySkipId(AssemblerDirectiveEnd)) {
5455 FoundEnd = true;
5456 break;
5457 }
5458
5459 CollectStream << Parser.parseStringToEndOfStatement()
5460 << getContext().getAsmInfo()->getSeparatorString();
5461
5462 Parser.eatToEndOfStatement();
5463 }
5464
5465 getLexer().setSkipSpace(true);
5466
5467 if (isToken(AsmToken::Eof) && !FoundEnd) {
5468 return TokError(Twine("expected directive ") +
5469 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5470 }
5471
5472 CollectStream.flush();
5473 return false;
5474 }
5475
5476 /// Parse the assembler directive for new MsgPack-format PAL metadata.
ParseDirectivePALMetadataBegin()5477 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5478 std::string String;
5479 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5480 AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5481 return true;
5482
5483 auto PALMetadata = getTargetStreamer().getPALMetadata();
5484 if (!PALMetadata->setFromString(String))
5485 return Error(getLoc(), "invalid PAL metadata");
5486 return false;
5487 }
5488
5489 /// Parse the assembler directive for old linear-format PAL metadata.
ParseDirectivePALMetadata()5490 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5491 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5492 return Error(getLoc(),
5493 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5494 "not available on non-amdpal OSes")).str());
5495 }
5496
5497 auto PALMetadata = getTargetStreamer().getPALMetadata();
5498 PALMetadata->setLegacy();
5499 for (;;) {
5500 uint32_t Key, Value;
5501 if (ParseAsAbsoluteExpression(Key)) {
5502 return TokError(Twine("invalid value in ") +
5503 Twine(PALMD::AssemblerDirective));
5504 }
5505 if (!trySkipToken(AsmToken::Comma)) {
5506 return TokError(Twine("expected an even number of values in ") +
5507 Twine(PALMD::AssemblerDirective));
5508 }
5509 if (ParseAsAbsoluteExpression(Value)) {
5510 return TokError(Twine("invalid value in ") +
5511 Twine(PALMD::AssemblerDirective));
5512 }
5513 PALMetadata->setRegister(Key, Value);
5514 if (!trySkipToken(AsmToken::Comma))
5515 break;
5516 }
5517 return false;
5518 }
5519
5520 /// ParseDirectiveAMDGPULDS
5521 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
ParseDirectiveAMDGPULDS()5522 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5523 if (getParser().checkForValidSection())
5524 return true;
5525
5526 StringRef Name;
5527 SMLoc NameLoc = getLoc();
5528 if (getParser().parseIdentifier(Name))
5529 return TokError("expected identifier in directive");
5530
5531 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5532 if (parseToken(AsmToken::Comma, "expected ','"))
5533 return true;
5534
5535 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5536
5537 int64_t Size;
5538 SMLoc SizeLoc = getLoc();
5539 if (getParser().parseAbsoluteExpression(Size))
5540 return true;
5541 if (Size < 0)
5542 return Error(SizeLoc, "size must be non-negative");
5543 if (Size > LocalMemorySize)
5544 return Error(SizeLoc, "size is too large");
5545
5546 int64_t Alignment = 4;
5547 if (trySkipToken(AsmToken::Comma)) {
5548 SMLoc AlignLoc = getLoc();
5549 if (getParser().parseAbsoluteExpression(Alignment))
5550 return true;
5551 if (Alignment < 0 || !isPowerOf2_64(Alignment))
5552 return Error(AlignLoc, "alignment must be a power of two");
5553
5554 // Alignment larger than the size of LDS is possible in theory, as long
5555 // as the linker manages to place to symbol at address 0, but we do want
5556 // to make sure the alignment fits nicely into a 32-bit integer.
5557 if (Alignment >= 1u << 31)
5558 return Error(AlignLoc, "alignment is too large");
5559 }
5560
5561 if (parseEOL())
5562 return true;
5563
5564 Symbol->redefineIfPossible();
5565 if (!Symbol->isUndefined())
5566 return Error(NameLoc, "invalid symbol redefinition");
5567
5568 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5569 return false;
5570 }
5571
ParseDirective(AsmToken DirectiveID)5572 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5573 StringRef IDVal = DirectiveID.getString();
5574
5575 if (isHsaAbiVersion3AndAbove(&getSTI())) {
5576 if (IDVal == ".amdhsa_kernel")
5577 return ParseDirectiveAMDHSAKernel();
5578
5579 // TODO: Restructure/combine with PAL metadata directive.
5580 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5581 return ParseDirectiveHSAMetadata();
5582 } else {
5583 if (IDVal == ".hsa_code_object_version")
5584 return ParseDirectiveHSACodeObjectVersion();
5585
5586 if (IDVal == ".hsa_code_object_isa")
5587 return ParseDirectiveHSACodeObjectISA();
5588
5589 if (IDVal == ".amd_kernel_code_t")
5590 return ParseDirectiveAMDKernelCodeT();
5591
5592 if (IDVal == ".amdgpu_hsa_kernel")
5593 return ParseDirectiveAMDGPUHsaKernel();
5594
5595 if (IDVal == ".amd_amdgpu_isa")
5596 return ParseDirectiveISAVersion();
5597
5598 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5599 return ParseDirectiveHSAMetadata();
5600 }
5601
5602 if (IDVal == ".amdgcn_target")
5603 return ParseDirectiveAMDGCNTarget();
5604
5605 if (IDVal == ".amdgpu_lds")
5606 return ParseDirectiveAMDGPULDS();
5607
5608 if (IDVal == PALMD::AssemblerDirectiveBegin)
5609 return ParseDirectivePALMetadataBegin();
5610
5611 if (IDVal == PALMD::AssemblerDirective)
5612 return ParseDirectivePALMetadata();
5613
5614 return true;
5615 }
5616
subtargetHasRegister(const MCRegisterInfo & MRI,unsigned RegNo)5617 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5618 unsigned RegNo) {
5619
5620 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5621 return isGFX9Plus();
5622
5623 // GFX10+ has 2 more SGPRs 104 and 105.
5624 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5625 return hasSGPR104_SGPR105();
5626
5627 switch (RegNo) {
5628 case AMDGPU::SRC_SHARED_BASE:
5629 case AMDGPU::SRC_SHARED_LIMIT:
5630 case AMDGPU::SRC_PRIVATE_BASE:
5631 case AMDGPU::SRC_PRIVATE_LIMIT:
5632 return isGFX9Plus();
5633 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5634 return isGFX9Plus() && !isGFX11Plus();
5635 case AMDGPU::TBA:
5636 case AMDGPU::TBA_LO:
5637 case AMDGPU::TBA_HI:
5638 case AMDGPU::TMA:
5639 case AMDGPU::TMA_LO:
5640 case AMDGPU::TMA_HI:
5641 return !isGFX9Plus();
5642 case AMDGPU::XNACK_MASK:
5643 case AMDGPU::XNACK_MASK_LO:
5644 case AMDGPU::XNACK_MASK_HI:
5645 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5646 case AMDGPU::SGPR_NULL:
5647 return isGFX10Plus();
5648 default:
5649 break;
5650 }
5651
5652 if (isCI())
5653 return true;
5654
5655 if (isSI() || isGFX10Plus()) {
5656 // No flat_scr on SI.
5657 // On GFX10Plus flat scratch is not a valid register operand and can only be
5658 // accessed with s_setreg/s_getreg.
5659 switch (RegNo) {
5660 case AMDGPU::FLAT_SCR:
5661 case AMDGPU::FLAT_SCR_LO:
5662 case AMDGPU::FLAT_SCR_HI:
5663 return false;
5664 default:
5665 return true;
5666 }
5667 }
5668
5669 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5670 // SI/CI have.
5671 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5672 return hasSGPR102_SGPR103();
5673
5674 return true;
5675 }
5676
5677 OperandMatchResultTy
parseOperand(OperandVector & Operands,StringRef Mnemonic,OperandMode Mode)5678 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5679 OperandMode Mode) {
5680 OperandMatchResultTy ResTy = parseVOPD(Operands);
5681 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5682 isToken(AsmToken::EndOfStatement))
5683 return ResTy;
5684
5685 // Try to parse with a custom parser
5686 ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5687
5688 // If we successfully parsed the operand or if there as an error parsing,
5689 // we are done.
5690 //
5691 // If we are parsing after we reach EndOfStatement then this means we
5692 // are appending default values to the Operands list. This is only done
5693 // by custom parser, so we shouldn't continue on to the generic parsing.
5694 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5695 isToken(AsmToken::EndOfStatement))
5696 return ResTy;
5697
5698 SMLoc RBraceLoc;
5699 SMLoc LBraceLoc = getLoc();
5700 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5701 unsigned Prefix = Operands.size();
5702
5703 for (;;) {
5704 auto Loc = getLoc();
5705 ResTy = parseReg(Operands);
5706 if (ResTy == MatchOperand_NoMatch)
5707 Error(Loc, "expected a register");
5708 if (ResTy != MatchOperand_Success)
5709 return MatchOperand_ParseFail;
5710
5711 RBraceLoc = getLoc();
5712 if (trySkipToken(AsmToken::RBrac))
5713 break;
5714
5715 if (!skipToken(AsmToken::Comma,
5716 "expected a comma or a closing square bracket")) {
5717 return MatchOperand_ParseFail;
5718 }
5719 }
5720
5721 if (Operands.size() - Prefix > 1) {
5722 Operands.insert(Operands.begin() + Prefix,
5723 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5724 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5725 }
5726
5727 return MatchOperand_Success;
5728 }
5729
5730 return parseRegOrImm(Operands);
5731 }
5732
parseMnemonicSuffix(StringRef Name)5733 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5734 // Clear any forced encodings from the previous instruction.
5735 setForcedEncodingSize(0);
5736 setForcedDPP(false);
5737 setForcedSDWA(false);
5738
5739 if (Name.endswith("_e64_dpp")) {
5740 setForcedDPP(true);
5741 setForcedEncodingSize(64);
5742 return Name.substr(0, Name.size() - 8);
5743 } else if (Name.endswith("_e64")) {
5744 setForcedEncodingSize(64);
5745 return Name.substr(0, Name.size() - 4);
5746 } else if (Name.endswith("_e32")) {
5747 setForcedEncodingSize(32);
5748 return Name.substr(0, Name.size() - 4);
5749 } else if (Name.endswith("_dpp")) {
5750 setForcedDPP(true);
5751 return Name.substr(0, Name.size() - 4);
5752 } else if (Name.endswith("_sdwa")) {
5753 setForcedSDWA(true);
5754 return Name.substr(0, Name.size() - 5);
5755 }
5756 return Name;
5757 }
5758
5759 static void applyMnemonicAliases(StringRef &Mnemonic,
5760 const FeatureBitset &Features,
5761 unsigned VariantID);
5762
ParseInstruction(ParseInstructionInfo & Info,StringRef Name,SMLoc NameLoc,OperandVector & Operands)5763 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5764 StringRef Name,
5765 SMLoc NameLoc, OperandVector &Operands) {
5766 // Add the instruction mnemonic
5767 Name = parseMnemonicSuffix(Name);
5768
5769 // If the target architecture uses MnemonicAlias, call it here to parse
5770 // operands correctly.
5771 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5772
5773 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5774
5775 bool IsMIMG = Name.startswith("image_");
5776
5777 while (!trySkipToken(AsmToken::EndOfStatement)) {
5778 OperandMode Mode = OperandMode_Default;
5779 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5780 Mode = OperandMode_NSA;
5781 CPolSeen = 0;
5782 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5783
5784 if (Res != MatchOperand_Success) {
5785 checkUnsupportedInstruction(Name, NameLoc);
5786 if (!Parser.hasPendingError()) {
5787 // FIXME: use real operand location rather than the current location.
5788 StringRef Msg =
5789 (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5790 "not a valid operand.";
5791 Error(getLoc(), Msg);
5792 }
5793 while (!trySkipToken(AsmToken::EndOfStatement)) {
5794 lex();
5795 }
5796 return true;
5797 }
5798
5799 // Eat the comma or space if there is one.
5800 trySkipToken(AsmToken::Comma);
5801 }
5802
5803 return false;
5804 }
5805
5806 //===----------------------------------------------------------------------===//
5807 // Utility functions
5808 //===----------------------------------------------------------------------===//
5809
5810 OperandMatchResultTy
parseIntWithPrefix(const char * Prefix,int64_t & IntVal)5811 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5812
5813 if (!trySkipId(Prefix, AsmToken::Colon))
5814 return MatchOperand_NoMatch;
5815
5816 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5817 }
5818
5819 OperandMatchResultTy
parseIntWithPrefix(const char * Prefix,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy,bool (* ConvertResult)(int64_t &))5820 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5821 AMDGPUOperand::ImmTy ImmTy,
5822 bool (*ConvertResult)(int64_t&)) {
5823 SMLoc S = getLoc();
5824 int64_t Value = 0;
5825
5826 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5827 if (Res != MatchOperand_Success)
5828 return Res;
5829
5830 if (ConvertResult && !ConvertResult(Value)) {
5831 Error(S, "invalid " + StringRef(Prefix) + " value.");
5832 }
5833
5834 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5835 return MatchOperand_Success;
5836 }
5837
5838 OperandMatchResultTy
parseOperandArrayWithPrefix(const char * Prefix,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy,bool (* ConvertResult)(int64_t &))5839 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5840 OperandVector &Operands,
5841 AMDGPUOperand::ImmTy ImmTy,
5842 bool (*ConvertResult)(int64_t&)) {
5843 SMLoc S = getLoc();
5844 if (!trySkipId(Prefix, AsmToken::Colon))
5845 return MatchOperand_NoMatch;
5846
5847 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5848 return MatchOperand_ParseFail;
5849
5850 unsigned Val = 0;
5851 const unsigned MaxSize = 4;
5852
5853 // FIXME: How to verify the number of elements matches the number of src
5854 // operands?
5855 for (int I = 0; ; ++I) {
5856 int64_t Op;
5857 SMLoc Loc = getLoc();
5858 if (!parseExpr(Op))
5859 return MatchOperand_ParseFail;
5860
5861 if (Op != 0 && Op != 1) {
5862 Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5863 return MatchOperand_ParseFail;
5864 }
5865
5866 Val |= (Op << I);
5867
5868 if (trySkipToken(AsmToken::RBrac))
5869 break;
5870
5871 if (I + 1 == MaxSize) {
5872 Error(getLoc(), "expected a closing square bracket");
5873 return MatchOperand_ParseFail;
5874 }
5875
5876 if (!skipToken(AsmToken::Comma, "expected a comma"))
5877 return MatchOperand_ParseFail;
5878 }
5879
5880 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5881 return MatchOperand_Success;
5882 }
5883
5884 OperandMatchResultTy
parseNamedBit(StringRef Name,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy)5885 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5886 AMDGPUOperand::ImmTy ImmTy) {
5887 int64_t Bit;
5888 SMLoc S = getLoc();
5889
5890 if (trySkipId(Name)) {
5891 Bit = 1;
5892 } else if (trySkipId("no", Name)) {
5893 Bit = 0;
5894 } else {
5895 return MatchOperand_NoMatch;
5896 }
5897
5898 if (Name == "r128" && !hasMIMG_R128()) {
5899 Error(S, "r128 modifier is not supported on this GPU");
5900 return MatchOperand_ParseFail;
5901 }
5902 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5903 Error(S, "a16 modifier is not supported on this GPU");
5904 return MatchOperand_ParseFail;
5905 }
5906
5907 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5908 ImmTy = AMDGPUOperand::ImmTyR128A16;
5909
5910 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5911 return MatchOperand_Success;
5912 }
5913
5914 OperandMatchResultTy
parseCPol(OperandVector & Operands)5915 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5916 unsigned CPolOn = 0;
5917 unsigned CPolOff = 0;
5918 SMLoc S = getLoc();
5919
5920 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5921 if (isGFX940() && !Mnemo.startswith("s_")) {
5922 if (trySkipId("sc0"))
5923 CPolOn = AMDGPU::CPol::SC0;
5924 else if (trySkipId("nosc0"))
5925 CPolOff = AMDGPU::CPol::SC0;
5926 else if (trySkipId("nt"))
5927 CPolOn = AMDGPU::CPol::NT;
5928 else if (trySkipId("nont"))
5929 CPolOff = AMDGPU::CPol::NT;
5930 else if (trySkipId("sc1"))
5931 CPolOn = AMDGPU::CPol::SC1;
5932 else if (trySkipId("nosc1"))
5933 CPolOff = AMDGPU::CPol::SC1;
5934 else
5935 return MatchOperand_NoMatch;
5936 }
5937 else if (trySkipId("glc"))
5938 CPolOn = AMDGPU::CPol::GLC;
5939 else if (trySkipId("noglc"))
5940 CPolOff = AMDGPU::CPol::GLC;
5941 else if (trySkipId("slc"))
5942 CPolOn = AMDGPU::CPol::SLC;
5943 else if (trySkipId("noslc"))
5944 CPolOff = AMDGPU::CPol::SLC;
5945 else if (trySkipId("dlc"))
5946 CPolOn = AMDGPU::CPol::DLC;
5947 else if (trySkipId("nodlc"))
5948 CPolOff = AMDGPU::CPol::DLC;
5949 else if (trySkipId("scc"))
5950 CPolOn = AMDGPU::CPol::SCC;
5951 else if (trySkipId("noscc"))
5952 CPolOff = AMDGPU::CPol::SCC;
5953 else
5954 return MatchOperand_NoMatch;
5955
5956 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5957 Error(S, "dlc modifier is not supported on this GPU");
5958 return MatchOperand_ParseFail;
5959 }
5960
5961 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5962 Error(S, "scc modifier is not supported on this GPU");
5963 return MatchOperand_ParseFail;
5964 }
5965
5966 if (CPolSeen & (CPolOn | CPolOff)) {
5967 Error(S, "duplicate cache policy modifier");
5968 return MatchOperand_ParseFail;
5969 }
5970
5971 CPolSeen |= (CPolOn | CPolOff);
5972
5973 for (unsigned I = 1; I != Operands.size(); ++I) {
5974 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5975 if (Op.isCPol()) {
5976 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5977 return MatchOperand_Success;
5978 }
5979 }
5980
5981 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5982 AMDGPUOperand::ImmTyCPol));
5983
5984 return MatchOperand_Success;
5985 }
5986
addOptionalImmOperand(MCInst & Inst,const OperandVector & Operands,AMDGPUAsmParser::OptionalImmIndexMap & OptionalIdx,AMDGPUOperand::ImmTy ImmT,int64_t Default=0)5987 static void addOptionalImmOperand(
5988 MCInst& Inst, const OperandVector& Operands,
5989 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5990 AMDGPUOperand::ImmTy ImmT,
5991 int64_t Default = 0) {
5992 auto i = OptionalIdx.find(ImmT);
5993 if (i != OptionalIdx.end()) {
5994 unsigned Idx = i->second;
5995 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5996 } else {
5997 Inst.addOperand(MCOperand::createImm(Default));
5998 }
5999 }
6000
6001 OperandMatchResultTy
parseStringWithPrefix(StringRef Prefix,StringRef & Value,SMLoc & StringLoc)6002 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6003 StringRef &Value,
6004 SMLoc &StringLoc) {
6005 if (!trySkipId(Prefix, AsmToken::Colon))
6006 return MatchOperand_NoMatch;
6007
6008 StringLoc = getLoc();
6009 return parseId(Value, "expected an identifier") ? MatchOperand_Success
6010 : MatchOperand_ParseFail;
6011 }
6012
6013 //===----------------------------------------------------------------------===//
6014 // MTBUF format
6015 //===----------------------------------------------------------------------===//
6016
tryParseFmt(const char * Pref,int64_t MaxVal,int64_t & Fmt)6017 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6018 int64_t MaxVal,
6019 int64_t &Fmt) {
6020 int64_t Val;
6021 SMLoc Loc = getLoc();
6022
6023 auto Res = parseIntWithPrefix(Pref, Val);
6024 if (Res == MatchOperand_ParseFail)
6025 return false;
6026 if (Res == MatchOperand_NoMatch)
6027 return true;
6028
6029 if (Val < 0 || Val > MaxVal) {
6030 Error(Loc, Twine("out of range ", StringRef(Pref)));
6031 return false;
6032 }
6033
6034 Fmt = Val;
6035 return true;
6036 }
6037
6038 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6039 // values to live in a joint format operand in the MCInst encoding.
6040 OperandMatchResultTy
parseDfmtNfmt(int64_t & Format)6041 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6042 using namespace llvm::AMDGPU::MTBUFFormat;
6043
6044 int64_t Dfmt = DFMT_UNDEF;
6045 int64_t Nfmt = NFMT_UNDEF;
6046
6047 // dfmt and nfmt can appear in either order, and each is optional.
6048 for (int I = 0; I < 2; ++I) {
6049 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6050 return MatchOperand_ParseFail;
6051
6052 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
6053 return MatchOperand_ParseFail;
6054 }
6055 // Skip optional comma between dfmt/nfmt
6056 // but guard against 2 commas following each other.
6057 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6058 !peekToken().is(AsmToken::Comma)) {
6059 trySkipToken(AsmToken::Comma);
6060 }
6061 }
6062
6063 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6064 return MatchOperand_NoMatch;
6065
6066 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6067 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6068
6069 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6070 return MatchOperand_Success;
6071 }
6072
6073 OperandMatchResultTy
parseUfmt(int64_t & Format)6074 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6075 using namespace llvm::AMDGPU::MTBUFFormat;
6076
6077 int64_t Fmt = UFMT_UNDEF;
6078
6079 if (!tryParseFmt("format", UFMT_MAX, Fmt))
6080 return MatchOperand_ParseFail;
6081
6082 if (Fmt == UFMT_UNDEF)
6083 return MatchOperand_NoMatch;
6084
6085 Format = Fmt;
6086 return MatchOperand_Success;
6087 }
6088
matchDfmtNfmt(int64_t & Dfmt,int64_t & Nfmt,StringRef FormatStr,SMLoc Loc)6089 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6090 int64_t &Nfmt,
6091 StringRef FormatStr,
6092 SMLoc Loc) {
6093 using namespace llvm::AMDGPU::MTBUFFormat;
6094 int64_t Format;
6095
6096 Format = getDfmt(FormatStr);
6097 if (Format != DFMT_UNDEF) {
6098 Dfmt = Format;
6099 return true;
6100 }
6101
6102 Format = getNfmt(FormatStr, getSTI());
6103 if (Format != NFMT_UNDEF) {
6104 Nfmt = Format;
6105 return true;
6106 }
6107
6108 Error(Loc, "unsupported format");
6109 return false;
6110 }
6111
6112 OperandMatchResultTy
parseSymbolicSplitFormat(StringRef FormatStr,SMLoc FormatLoc,int64_t & Format)6113 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6114 SMLoc FormatLoc,
6115 int64_t &Format) {
6116 using namespace llvm::AMDGPU::MTBUFFormat;
6117
6118 int64_t Dfmt = DFMT_UNDEF;
6119 int64_t Nfmt = NFMT_UNDEF;
6120 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6121 return MatchOperand_ParseFail;
6122
6123 if (trySkipToken(AsmToken::Comma)) {
6124 StringRef Str;
6125 SMLoc Loc = getLoc();
6126 if (!parseId(Str, "expected a format string") ||
6127 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
6128 return MatchOperand_ParseFail;
6129 }
6130 if (Dfmt == DFMT_UNDEF) {
6131 Error(Loc, "duplicate numeric format");
6132 return MatchOperand_ParseFail;
6133 } else if (Nfmt == NFMT_UNDEF) {
6134 Error(Loc, "duplicate data format");
6135 return MatchOperand_ParseFail;
6136 }
6137 }
6138
6139 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6140 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6141
6142 if (isGFX10Plus()) {
6143 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6144 if (Ufmt == UFMT_UNDEF) {
6145 Error(FormatLoc, "unsupported format");
6146 return MatchOperand_ParseFail;
6147 }
6148 Format = Ufmt;
6149 } else {
6150 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6151 }
6152
6153 return MatchOperand_Success;
6154 }
6155
6156 OperandMatchResultTy
parseSymbolicUnifiedFormat(StringRef FormatStr,SMLoc Loc,int64_t & Format)6157 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6158 SMLoc Loc,
6159 int64_t &Format) {
6160 using namespace llvm::AMDGPU::MTBUFFormat;
6161
6162 auto Id = getUnifiedFormat(FormatStr, getSTI());
6163 if (Id == UFMT_UNDEF)
6164 return MatchOperand_NoMatch;
6165
6166 if (!isGFX10Plus()) {
6167 Error(Loc, "unified format is not supported on this GPU");
6168 return MatchOperand_ParseFail;
6169 }
6170
6171 Format = Id;
6172 return MatchOperand_Success;
6173 }
6174
6175 OperandMatchResultTy
parseNumericFormat(int64_t & Format)6176 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6177 using namespace llvm::AMDGPU::MTBUFFormat;
6178 SMLoc Loc = getLoc();
6179
6180 if (!parseExpr(Format))
6181 return MatchOperand_ParseFail;
6182 if (!isValidFormatEncoding(Format, getSTI())) {
6183 Error(Loc, "out of range format");
6184 return MatchOperand_ParseFail;
6185 }
6186
6187 return MatchOperand_Success;
6188 }
6189
6190 OperandMatchResultTy
parseSymbolicOrNumericFormat(int64_t & Format)6191 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6192 using namespace llvm::AMDGPU::MTBUFFormat;
6193
6194 if (!trySkipId("format", AsmToken::Colon))
6195 return MatchOperand_NoMatch;
6196
6197 if (trySkipToken(AsmToken::LBrac)) {
6198 StringRef FormatStr;
6199 SMLoc Loc = getLoc();
6200 if (!parseId(FormatStr, "expected a format string"))
6201 return MatchOperand_ParseFail;
6202
6203 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6204 if (Res == MatchOperand_NoMatch)
6205 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6206 if (Res != MatchOperand_Success)
6207 return Res;
6208
6209 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6210 return MatchOperand_ParseFail;
6211
6212 return MatchOperand_Success;
6213 }
6214
6215 return parseNumericFormat(Format);
6216 }
6217
6218 OperandMatchResultTy
parseFORMAT(OperandVector & Operands)6219 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6220 using namespace llvm::AMDGPU::MTBUFFormat;
6221
6222 int64_t Format = getDefaultFormatEncoding(getSTI());
6223 OperandMatchResultTy Res;
6224 SMLoc Loc = getLoc();
6225
6226 // Parse legacy format syntax.
6227 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6228 if (Res == MatchOperand_ParseFail)
6229 return Res;
6230
6231 bool FormatFound = (Res == MatchOperand_Success);
6232
6233 Operands.push_back(
6234 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6235
6236 if (FormatFound)
6237 trySkipToken(AsmToken::Comma);
6238
6239 if (isToken(AsmToken::EndOfStatement)) {
6240 // We are expecting an soffset operand,
6241 // but let matcher handle the error.
6242 return MatchOperand_Success;
6243 }
6244
6245 // Parse soffset.
6246 Res = parseRegOrImm(Operands);
6247 if (Res != MatchOperand_Success)
6248 return Res;
6249
6250 trySkipToken(AsmToken::Comma);
6251
6252 if (!FormatFound) {
6253 Res = parseSymbolicOrNumericFormat(Format);
6254 if (Res == MatchOperand_ParseFail)
6255 return Res;
6256 if (Res == MatchOperand_Success) {
6257 auto Size = Operands.size();
6258 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6259 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6260 Op.setImm(Format);
6261 }
6262 return MatchOperand_Success;
6263 }
6264
6265 if (isId("format") && peekToken().is(AsmToken::Colon)) {
6266 Error(getLoc(), "duplicate format");
6267 return MatchOperand_ParseFail;
6268 }
6269 return MatchOperand_Success;
6270 }
6271
6272 //===----------------------------------------------------------------------===//
6273 // ds
6274 //===----------------------------------------------------------------------===//
6275
cvtDSOffset01(MCInst & Inst,const OperandVector & Operands)6276 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6277 const OperandVector &Operands) {
6278 OptionalImmIndexMap OptionalIdx;
6279
6280 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6281 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6282
6283 // Add the register arguments
6284 if (Op.isReg()) {
6285 Op.addRegOperands(Inst, 1);
6286 continue;
6287 }
6288
6289 // Handle optional arguments
6290 OptionalIdx[Op.getImmTy()] = i;
6291 }
6292
6293 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6294 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6295 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6296
6297 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6298 }
6299
cvtDSImpl(MCInst & Inst,const OperandVector & Operands,bool IsGdsHardcoded)6300 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6301 bool IsGdsHardcoded) {
6302 OptionalImmIndexMap OptionalIdx;
6303 AMDGPUOperand::ImmTy OffsetType = AMDGPUOperand::ImmTyOffset;
6304
6305 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6306 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6307
6308 // Add the register arguments
6309 if (Op.isReg()) {
6310 Op.addRegOperands(Inst, 1);
6311 continue;
6312 }
6313
6314 if (Op.isToken() && Op.getToken() == "gds") {
6315 IsGdsHardcoded = true;
6316 continue;
6317 }
6318
6319 // Handle optional arguments
6320 OptionalIdx[Op.getImmTy()] = i;
6321
6322 if (Op.getImmTy() == AMDGPUOperand::ImmTySwizzle)
6323 OffsetType = AMDGPUOperand::ImmTySwizzle;
6324 }
6325
6326 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6327
6328 if (!IsGdsHardcoded) {
6329 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6330 }
6331 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6332 }
6333
cvtExp(MCInst & Inst,const OperandVector & Operands)6334 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6335 OptionalImmIndexMap OptionalIdx;
6336
6337 unsigned OperandIdx[4];
6338 unsigned EnMask = 0;
6339 int SrcIdx = 0;
6340
6341 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6342 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6343
6344 // Add the register arguments
6345 if (Op.isReg()) {
6346 assert(SrcIdx < 4);
6347 OperandIdx[SrcIdx] = Inst.size();
6348 Op.addRegOperands(Inst, 1);
6349 ++SrcIdx;
6350 continue;
6351 }
6352
6353 if (Op.isOff()) {
6354 assert(SrcIdx < 4);
6355 OperandIdx[SrcIdx] = Inst.size();
6356 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6357 ++SrcIdx;
6358 continue;
6359 }
6360
6361 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6362 Op.addImmOperands(Inst, 1);
6363 continue;
6364 }
6365
6366 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6367 continue;
6368
6369 // Handle optional arguments
6370 OptionalIdx[Op.getImmTy()] = i;
6371 }
6372
6373 assert(SrcIdx == 4);
6374
6375 bool Compr = false;
6376 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6377 Compr = true;
6378 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6379 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6380 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6381 }
6382
6383 for (auto i = 0; i < SrcIdx; ++i) {
6384 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6385 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6386 }
6387 }
6388
6389 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6390 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6391
6392 Inst.addOperand(MCOperand::createImm(EnMask));
6393 }
6394
6395 //===----------------------------------------------------------------------===//
6396 // s_waitcnt
6397 //===----------------------------------------------------------------------===//
6398
6399 static bool
encodeCnt(const AMDGPU::IsaVersion ISA,int64_t & IntVal,int64_t CntVal,bool Saturate,unsigned (* encode)(const IsaVersion & Version,unsigned,unsigned),unsigned (* decode)(const IsaVersion & Version,unsigned))6400 encodeCnt(
6401 const AMDGPU::IsaVersion ISA,
6402 int64_t &IntVal,
6403 int64_t CntVal,
6404 bool Saturate,
6405 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6406 unsigned (*decode)(const IsaVersion &Version, unsigned))
6407 {
6408 bool Failed = false;
6409
6410 IntVal = encode(ISA, IntVal, CntVal);
6411 if (CntVal != decode(ISA, IntVal)) {
6412 if (Saturate) {
6413 IntVal = encode(ISA, IntVal, -1);
6414 } else {
6415 Failed = true;
6416 }
6417 }
6418 return Failed;
6419 }
6420
parseCnt(int64_t & IntVal)6421 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6422
6423 SMLoc CntLoc = getLoc();
6424 StringRef CntName = getTokenStr();
6425
6426 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6427 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6428 return false;
6429
6430 int64_t CntVal;
6431 SMLoc ValLoc = getLoc();
6432 if (!parseExpr(CntVal))
6433 return false;
6434
6435 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6436
6437 bool Failed = true;
6438 bool Sat = CntName.endswith("_sat");
6439
6440 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6441 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6442 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6443 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6444 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6445 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6446 } else {
6447 Error(CntLoc, "invalid counter name " + CntName);
6448 return false;
6449 }
6450
6451 if (Failed) {
6452 Error(ValLoc, "too large value for " + CntName);
6453 return false;
6454 }
6455
6456 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6457 return false;
6458
6459 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6460 if (isToken(AsmToken::EndOfStatement)) {
6461 Error(getLoc(), "expected a counter name");
6462 return false;
6463 }
6464 }
6465
6466 return true;
6467 }
6468
6469 OperandMatchResultTy
parseSWaitCntOps(OperandVector & Operands)6470 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6471 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6472 int64_t Waitcnt = getWaitcntBitMask(ISA);
6473 SMLoc S = getLoc();
6474
6475 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6476 while (!isToken(AsmToken::EndOfStatement)) {
6477 if (!parseCnt(Waitcnt))
6478 return MatchOperand_ParseFail;
6479 }
6480 } else {
6481 if (!parseExpr(Waitcnt))
6482 return MatchOperand_ParseFail;
6483 }
6484
6485 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6486 return MatchOperand_Success;
6487 }
6488
parseDelay(int64_t & Delay)6489 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6490 SMLoc FieldLoc = getLoc();
6491 StringRef FieldName = getTokenStr();
6492 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6493 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6494 return false;
6495
6496 SMLoc ValueLoc = getLoc();
6497 StringRef ValueName = getTokenStr();
6498 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6499 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6500 return false;
6501
6502 unsigned Shift;
6503 if (FieldName == "instid0") {
6504 Shift = 0;
6505 } else if (FieldName == "instskip") {
6506 Shift = 4;
6507 } else if (FieldName == "instid1") {
6508 Shift = 7;
6509 } else {
6510 Error(FieldLoc, "invalid field name " + FieldName);
6511 return false;
6512 }
6513
6514 int Value;
6515 if (Shift == 4) {
6516 // Parse values for instskip.
6517 Value = StringSwitch<int>(ValueName)
6518 .Case("SAME", 0)
6519 .Case("NEXT", 1)
6520 .Case("SKIP_1", 2)
6521 .Case("SKIP_2", 3)
6522 .Case("SKIP_3", 4)
6523 .Case("SKIP_4", 5)
6524 .Default(-1);
6525 } else {
6526 // Parse values for instid0 and instid1.
6527 Value = StringSwitch<int>(ValueName)
6528 .Case("NO_DEP", 0)
6529 .Case("VALU_DEP_1", 1)
6530 .Case("VALU_DEP_2", 2)
6531 .Case("VALU_DEP_3", 3)
6532 .Case("VALU_DEP_4", 4)
6533 .Case("TRANS32_DEP_1", 5)
6534 .Case("TRANS32_DEP_2", 6)
6535 .Case("TRANS32_DEP_3", 7)
6536 .Case("FMA_ACCUM_CYCLE_1", 8)
6537 .Case("SALU_CYCLE_1", 9)
6538 .Case("SALU_CYCLE_2", 10)
6539 .Case("SALU_CYCLE_3", 11)
6540 .Default(-1);
6541 }
6542 if (Value < 0) {
6543 Error(ValueLoc, "invalid value name " + ValueName);
6544 return false;
6545 }
6546
6547 Delay |= Value << Shift;
6548 return true;
6549 }
6550
6551 OperandMatchResultTy
parseSDelayAluOps(OperandVector & Operands)6552 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) {
6553 int64_t Delay = 0;
6554 SMLoc S = getLoc();
6555
6556 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6557 do {
6558 if (!parseDelay(Delay))
6559 return MatchOperand_ParseFail;
6560 } while (trySkipToken(AsmToken::Pipe));
6561 } else {
6562 if (!parseExpr(Delay))
6563 return MatchOperand_ParseFail;
6564 }
6565
6566 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6567 return MatchOperand_Success;
6568 }
6569
6570 bool
isSWaitCnt() const6571 AMDGPUOperand::isSWaitCnt() const {
6572 return isImm();
6573 }
6574
isSDelayAlu() const6575 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); }
6576
6577 //===----------------------------------------------------------------------===//
6578 // DepCtr
6579 //===----------------------------------------------------------------------===//
6580
depCtrError(SMLoc Loc,int ErrorId,StringRef DepCtrName)6581 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6582 StringRef DepCtrName) {
6583 switch (ErrorId) {
6584 case OPR_ID_UNKNOWN:
6585 Error(Loc, Twine("invalid counter name ", DepCtrName));
6586 return;
6587 case OPR_ID_UNSUPPORTED:
6588 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6589 return;
6590 case OPR_ID_DUPLICATE:
6591 Error(Loc, Twine("duplicate counter name ", DepCtrName));
6592 return;
6593 case OPR_VAL_INVALID:
6594 Error(Loc, Twine("invalid value for ", DepCtrName));
6595 return;
6596 default:
6597 assert(false);
6598 }
6599 }
6600
parseDepCtr(int64_t & DepCtr,unsigned & UsedOprMask)6601 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6602
6603 using namespace llvm::AMDGPU::DepCtr;
6604
6605 SMLoc DepCtrLoc = getLoc();
6606 StringRef DepCtrName = getTokenStr();
6607
6608 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6609 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6610 return false;
6611
6612 int64_t ExprVal;
6613 if (!parseExpr(ExprVal))
6614 return false;
6615
6616 unsigned PrevOprMask = UsedOprMask;
6617 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6618
6619 if (CntVal < 0) {
6620 depCtrError(DepCtrLoc, CntVal, DepCtrName);
6621 return false;
6622 }
6623
6624 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6625 return false;
6626
6627 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6628 if (isToken(AsmToken::EndOfStatement)) {
6629 Error(getLoc(), "expected a counter name");
6630 return false;
6631 }
6632 }
6633
6634 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6635 DepCtr = (DepCtr & ~CntValMask) | CntVal;
6636 return true;
6637 }
6638
parseDepCtrOps(OperandVector & Operands)6639 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
6640 using namespace llvm::AMDGPU::DepCtr;
6641
6642 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6643 SMLoc Loc = getLoc();
6644
6645 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6646 unsigned UsedOprMask = 0;
6647 while (!isToken(AsmToken::EndOfStatement)) {
6648 if (!parseDepCtr(DepCtr, UsedOprMask))
6649 return MatchOperand_ParseFail;
6650 }
6651 } else {
6652 if (!parseExpr(DepCtr))
6653 return MatchOperand_ParseFail;
6654 }
6655
6656 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6657 return MatchOperand_Success;
6658 }
6659
isDepCtr() const6660 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6661
6662 //===----------------------------------------------------------------------===//
6663 // hwreg
6664 //===----------------------------------------------------------------------===//
6665
6666 bool
parseHwregBody(OperandInfoTy & HwReg,OperandInfoTy & Offset,OperandInfoTy & Width)6667 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6668 OperandInfoTy &Offset,
6669 OperandInfoTy &Width) {
6670 using namespace llvm::AMDGPU::Hwreg;
6671
6672 // The register may be specified by name or using a numeric code
6673 HwReg.Loc = getLoc();
6674 if (isToken(AsmToken::Identifier) &&
6675 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6676 HwReg.IsSymbolic = true;
6677 lex(); // skip register name
6678 } else if (!parseExpr(HwReg.Id, "a register name")) {
6679 return false;
6680 }
6681
6682 if (trySkipToken(AsmToken::RParen))
6683 return true;
6684
6685 // parse optional params
6686 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6687 return false;
6688
6689 Offset.Loc = getLoc();
6690 if (!parseExpr(Offset.Id))
6691 return false;
6692
6693 if (!skipToken(AsmToken::Comma, "expected a comma"))
6694 return false;
6695
6696 Width.Loc = getLoc();
6697 return parseExpr(Width.Id) &&
6698 skipToken(AsmToken::RParen, "expected a closing parenthesis");
6699 }
6700
6701 bool
validateHwreg(const OperandInfoTy & HwReg,const OperandInfoTy & Offset,const OperandInfoTy & Width)6702 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6703 const OperandInfoTy &Offset,
6704 const OperandInfoTy &Width) {
6705
6706 using namespace llvm::AMDGPU::Hwreg;
6707
6708 if (HwReg.IsSymbolic) {
6709 if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6710 Error(HwReg.Loc,
6711 "specified hardware register is not supported on this GPU");
6712 return false;
6713 }
6714 } else {
6715 if (!isValidHwreg(HwReg.Id)) {
6716 Error(HwReg.Loc,
6717 "invalid code of hardware register: only 6-bit values are legal");
6718 return false;
6719 }
6720 }
6721 if (!isValidHwregOffset(Offset.Id)) {
6722 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6723 return false;
6724 }
6725 if (!isValidHwregWidth(Width.Id)) {
6726 Error(Width.Loc,
6727 "invalid bitfield width: only values from 1 to 32 are legal");
6728 return false;
6729 }
6730 return true;
6731 }
6732
6733 OperandMatchResultTy
parseHwreg(OperandVector & Operands)6734 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6735 using namespace llvm::AMDGPU::Hwreg;
6736
6737 int64_t ImmVal = 0;
6738 SMLoc Loc = getLoc();
6739
6740 if (trySkipId("hwreg", AsmToken::LParen)) {
6741 OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6742 OperandInfoTy Offset(OFFSET_DEFAULT_);
6743 OperandInfoTy Width(WIDTH_DEFAULT_);
6744 if (parseHwregBody(HwReg, Offset, Width) &&
6745 validateHwreg(HwReg, Offset, Width)) {
6746 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6747 } else {
6748 return MatchOperand_ParseFail;
6749 }
6750 } else if (parseExpr(ImmVal, "a hwreg macro")) {
6751 if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6752 Error(Loc, "invalid immediate: only 16-bit values are legal");
6753 return MatchOperand_ParseFail;
6754 }
6755 } else {
6756 return MatchOperand_ParseFail;
6757 }
6758
6759 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6760 return MatchOperand_Success;
6761 }
6762
isHwreg() const6763 bool AMDGPUOperand::isHwreg() const {
6764 return isImmTy(ImmTyHwreg);
6765 }
6766
6767 //===----------------------------------------------------------------------===//
6768 // sendmsg
6769 //===----------------------------------------------------------------------===//
6770
6771 bool
parseSendMsgBody(OperandInfoTy & Msg,OperandInfoTy & Op,OperandInfoTy & Stream)6772 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6773 OperandInfoTy &Op,
6774 OperandInfoTy &Stream) {
6775 using namespace llvm::AMDGPU::SendMsg;
6776
6777 Msg.Loc = getLoc();
6778 if (isToken(AsmToken::Identifier) &&
6779 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6780 Msg.IsSymbolic = true;
6781 lex(); // skip message name
6782 } else if (!parseExpr(Msg.Id, "a message name")) {
6783 return false;
6784 }
6785
6786 if (trySkipToken(AsmToken::Comma)) {
6787 Op.IsDefined = true;
6788 Op.Loc = getLoc();
6789 if (isToken(AsmToken::Identifier) &&
6790 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6791 lex(); // skip operation name
6792 } else if (!parseExpr(Op.Id, "an operation name")) {
6793 return false;
6794 }
6795
6796 if (trySkipToken(AsmToken::Comma)) {
6797 Stream.IsDefined = true;
6798 Stream.Loc = getLoc();
6799 if (!parseExpr(Stream.Id))
6800 return false;
6801 }
6802 }
6803
6804 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6805 }
6806
6807 bool
validateSendMsg(const OperandInfoTy & Msg,const OperandInfoTy & Op,const OperandInfoTy & Stream)6808 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6809 const OperandInfoTy &Op,
6810 const OperandInfoTy &Stream) {
6811 using namespace llvm::AMDGPU::SendMsg;
6812
6813 // Validation strictness depends on whether message is specified
6814 // in a symbolic or in a numeric form. In the latter case
6815 // only encoding possibility is checked.
6816 bool Strict = Msg.IsSymbolic;
6817
6818 if (Strict) {
6819 if (Msg.Id == OPR_ID_UNSUPPORTED) {
6820 Error(Msg.Loc, "specified message id is not supported on this GPU");
6821 return false;
6822 }
6823 } else {
6824 if (!isValidMsgId(Msg.Id, getSTI())) {
6825 Error(Msg.Loc, "invalid message id");
6826 return false;
6827 }
6828 }
6829 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
6830 if (Op.IsDefined) {
6831 Error(Op.Loc, "message does not support operations");
6832 } else {
6833 Error(Msg.Loc, "missing message operation");
6834 }
6835 return false;
6836 }
6837 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6838 Error(Op.Loc, "invalid operation id");
6839 return false;
6840 }
6841 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
6842 Stream.IsDefined) {
6843 Error(Stream.Loc, "message operation does not support streams");
6844 return false;
6845 }
6846 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6847 Error(Stream.Loc, "invalid message stream id");
6848 return false;
6849 }
6850 return true;
6851 }
6852
6853 OperandMatchResultTy
parseSendMsgOp(OperandVector & Operands)6854 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6855 using namespace llvm::AMDGPU::SendMsg;
6856
6857 int64_t ImmVal = 0;
6858 SMLoc Loc = getLoc();
6859
6860 if (trySkipId("sendmsg", AsmToken::LParen)) {
6861 OperandInfoTy Msg(OPR_ID_UNKNOWN);
6862 OperandInfoTy Op(OP_NONE_);
6863 OperandInfoTy Stream(STREAM_ID_NONE_);
6864 if (parseSendMsgBody(Msg, Op, Stream) &&
6865 validateSendMsg(Msg, Op, Stream)) {
6866 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6867 } else {
6868 return MatchOperand_ParseFail;
6869 }
6870 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6871 if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6872 Error(Loc, "invalid immediate: only 16-bit values are legal");
6873 return MatchOperand_ParseFail;
6874 }
6875 } else {
6876 return MatchOperand_ParseFail;
6877 }
6878
6879 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6880 return MatchOperand_Success;
6881 }
6882
isSendMsg() const6883 bool AMDGPUOperand::isSendMsg() const {
6884 return isImmTy(ImmTySendMsg);
6885 }
6886
6887 //===----------------------------------------------------------------------===//
6888 // v_interp
6889 //===----------------------------------------------------------------------===//
6890
parseInterpSlot(OperandVector & Operands)6891 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6892 StringRef Str;
6893 SMLoc S = getLoc();
6894
6895 if (!parseId(Str))
6896 return MatchOperand_NoMatch;
6897
6898 int Slot = StringSwitch<int>(Str)
6899 .Case("p10", 0)
6900 .Case("p20", 1)
6901 .Case("p0", 2)
6902 .Default(-1);
6903
6904 if (Slot == -1) {
6905 Error(S, "invalid interpolation slot");
6906 return MatchOperand_ParseFail;
6907 }
6908
6909 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6910 AMDGPUOperand::ImmTyInterpSlot));
6911 return MatchOperand_Success;
6912 }
6913
parseInterpAttr(OperandVector & Operands)6914 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6915 StringRef Str;
6916 SMLoc S = getLoc();
6917
6918 if (!parseId(Str))
6919 return MatchOperand_NoMatch;
6920
6921 if (!Str.startswith("attr")) {
6922 Error(S, "invalid interpolation attribute");
6923 return MatchOperand_ParseFail;
6924 }
6925
6926 StringRef Chan = Str.take_back(2);
6927 int AttrChan = StringSwitch<int>(Chan)
6928 .Case(".x", 0)
6929 .Case(".y", 1)
6930 .Case(".z", 2)
6931 .Case(".w", 3)
6932 .Default(-1);
6933 if (AttrChan == -1) {
6934 Error(S, "invalid or missing interpolation attribute channel");
6935 return MatchOperand_ParseFail;
6936 }
6937
6938 Str = Str.drop_back(2).drop_front(4);
6939
6940 uint8_t Attr;
6941 if (Str.getAsInteger(10, Attr)) {
6942 Error(S, "invalid or missing interpolation attribute number");
6943 return MatchOperand_ParseFail;
6944 }
6945
6946 if (Attr > 63) {
6947 Error(S, "out of bounds interpolation attribute number");
6948 return MatchOperand_ParseFail;
6949 }
6950
6951 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6952
6953 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6954 AMDGPUOperand::ImmTyInterpAttr));
6955 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6956 AMDGPUOperand::ImmTyAttrChan));
6957 return MatchOperand_Success;
6958 }
6959
6960 //===----------------------------------------------------------------------===//
6961 // exp
6962 //===----------------------------------------------------------------------===//
6963
parseExpTgt(OperandVector & Operands)6964 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6965 using namespace llvm::AMDGPU::Exp;
6966
6967 StringRef Str;
6968 SMLoc S = getLoc();
6969
6970 if (!parseId(Str))
6971 return MatchOperand_NoMatch;
6972
6973 unsigned Id = getTgtId(Str);
6974 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6975 Error(S, (Id == ET_INVALID) ?
6976 "invalid exp target" :
6977 "exp target is not supported on this GPU");
6978 return MatchOperand_ParseFail;
6979 }
6980
6981 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6982 AMDGPUOperand::ImmTyExpTgt));
6983 return MatchOperand_Success;
6984 }
6985
6986 //===----------------------------------------------------------------------===//
6987 // parser helpers
6988 //===----------------------------------------------------------------------===//
6989
6990 bool
isId(const AsmToken & Token,const StringRef Id) const6991 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6992 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6993 }
6994
6995 bool
isId(const StringRef Id) const6996 AMDGPUAsmParser::isId(const StringRef Id) const {
6997 return isId(getToken(), Id);
6998 }
6999
7000 bool
isToken(const AsmToken::TokenKind Kind) const7001 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7002 return getTokenKind() == Kind;
7003 }
7004
7005 bool
trySkipId(const StringRef Id)7006 AMDGPUAsmParser::trySkipId(const StringRef Id) {
7007 if (isId(Id)) {
7008 lex();
7009 return true;
7010 }
7011 return false;
7012 }
7013
7014 bool
trySkipId(const StringRef Pref,const StringRef Id)7015 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7016 if (isToken(AsmToken::Identifier)) {
7017 StringRef Tok = getTokenStr();
7018 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
7019 lex();
7020 return true;
7021 }
7022 }
7023 return false;
7024 }
7025
7026 bool
trySkipId(const StringRef Id,const AsmToken::TokenKind Kind)7027 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7028 if (isId(Id) && peekToken().is(Kind)) {
7029 lex();
7030 lex();
7031 return true;
7032 }
7033 return false;
7034 }
7035
7036 bool
trySkipToken(const AsmToken::TokenKind Kind)7037 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7038 if (isToken(Kind)) {
7039 lex();
7040 return true;
7041 }
7042 return false;
7043 }
7044
7045 bool
skipToken(const AsmToken::TokenKind Kind,const StringRef ErrMsg)7046 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7047 const StringRef ErrMsg) {
7048 if (!trySkipToken(Kind)) {
7049 Error(getLoc(), ErrMsg);
7050 return false;
7051 }
7052 return true;
7053 }
7054
7055 bool
parseExpr(int64_t & Imm,StringRef Expected)7056 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7057 SMLoc S = getLoc();
7058
7059 const MCExpr *Expr;
7060 if (Parser.parseExpression(Expr))
7061 return false;
7062
7063 if (Expr->evaluateAsAbsolute(Imm))
7064 return true;
7065
7066 if (Expected.empty()) {
7067 Error(S, "expected absolute expression");
7068 } else {
7069 Error(S, Twine("expected ", Expected) +
7070 Twine(" or an absolute expression"));
7071 }
7072 return false;
7073 }
7074
7075 bool
parseExpr(OperandVector & Operands)7076 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7077 SMLoc S = getLoc();
7078
7079 const MCExpr *Expr;
7080 if (Parser.parseExpression(Expr))
7081 return false;
7082
7083 int64_t IntVal;
7084 if (Expr->evaluateAsAbsolute(IntVal)) {
7085 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7086 } else {
7087 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7088 }
7089 return true;
7090 }
7091
7092 bool
parseString(StringRef & Val,const StringRef ErrMsg)7093 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7094 if (isToken(AsmToken::String)) {
7095 Val = getToken().getStringContents();
7096 lex();
7097 return true;
7098 } else {
7099 Error(getLoc(), ErrMsg);
7100 return false;
7101 }
7102 }
7103
7104 bool
parseId(StringRef & Val,const StringRef ErrMsg)7105 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7106 if (isToken(AsmToken::Identifier)) {
7107 Val = getTokenStr();
7108 lex();
7109 return true;
7110 } else {
7111 if (!ErrMsg.empty())
7112 Error(getLoc(), ErrMsg);
7113 return false;
7114 }
7115 }
7116
7117 AsmToken
getToken() const7118 AMDGPUAsmParser::getToken() const {
7119 return Parser.getTok();
7120 }
7121
peekToken(bool ShouldSkipSpace)7122 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7123 return isToken(AsmToken::EndOfStatement)
7124 ? getToken()
7125 : getLexer().peekTok(ShouldSkipSpace);
7126 }
7127
7128 void
peekTokens(MutableArrayRef<AsmToken> Tokens)7129 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7130 auto TokCount = getLexer().peekTokens(Tokens);
7131
7132 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7133 Tokens[Idx] = AsmToken(AsmToken::Error, "");
7134 }
7135
7136 AsmToken::TokenKind
getTokenKind() const7137 AMDGPUAsmParser::getTokenKind() const {
7138 return getLexer().getKind();
7139 }
7140
7141 SMLoc
getLoc() const7142 AMDGPUAsmParser::getLoc() const {
7143 return getToken().getLoc();
7144 }
7145
7146 StringRef
getTokenStr() const7147 AMDGPUAsmParser::getTokenStr() const {
7148 return getToken().getString();
7149 }
7150
7151 void
lex()7152 AMDGPUAsmParser::lex() {
7153 Parser.Lex();
7154 }
7155
7156 SMLoc
getOperandLoc(std::function<bool (const AMDGPUOperand &)> Test,const OperandVector & Operands) const7157 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7158 const OperandVector &Operands) const {
7159 for (unsigned i = Operands.size() - 1; i > 0; --i) {
7160 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7161 if (Test(Op))
7162 return Op.getStartLoc();
7163 }
7164 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7165 }
7166
7167 SMLoc
getImmLoc(AMDGPUOperand::ImmTy Type,const OperandVector & Operands) const7168 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7169 const OperandVector &Operands) const {
7170 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7171 return getOperandLoc(Test, Operands);
7172 }
7173
7174 SMLoc
getRegLoc(unsigned Reg,const OperandVector & Operands) const7175 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7176 const OperandVector &Operands) const {
7177 auto Test = [=](const AMDGPUOperand& Op) {
7178 return Op.isRegKind() && Op.getReg() == Reg;
7179 };
7180 return getOperandLoc(Test, Operands);
7181 }
7182
7183 SMLoc
getLitLoc(const OperandVector & Operands) const7184 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
7185 auto Test = [](const AMDGPUOperand& Op) {
7186 return Op.IsImmKindLiteral() || Op.isExpr();
7187 };
7188 return getOperandLoc(Test, Operands);
7189 }
7190
7191 SMLoc
getConstLoc(const OperandVector & Operands) const7192 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7193 auto Test = [](const AMDGPUOperand& Op) {
7194 return Op.isImmKindConst();
7195 };
7196 return getOperandLoc(Test, Operands);
7197 }
7198
7199 //===----------------------------------------------------------------------===//
7200 // swizzle
7201 //===----------------------------------------------------------------------===//
7202
7203 LLVM_READNONE
7204 static unsigned
encodeBitmaskPerm(const unsigned AndMask,const unsigned OrMask,const unsigned XorMask)7205 encodeBitmaskPerm(const unsigned AndMask,
7206 const unsigned OrMask,
7207 const unsigned XorMask) {
7208 using namespace llvm::AMDGPU::Swizzle;
7209
7210 return BITMASK_PERM_ENC |
7211 (AndMask << BITMASK_AND_SHIFT) |
7212 (OrMask << BITMASK_OR_SHIFT) |
7213 (XorMask << BITMASK_XOR_SHIFT);
7214 }
7215
7216 bool
parseSwizzleOperand(int64_t & Op,const unsigned MinVal,const unsigned MaxVal,const StringRef ErrMsg,SMLoc & Loc)7217 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7218 const unsigned MinVal,
7219 const unsigned MaxVal,
7220 const StringRef ErrMsg,
7221 SMLoc &Loc) {
7222 if (!skipToken(AsmToken::Comma, "expected a comma")) {
7223 return false;
7224 }
7225 Loc = getLoc();
7226 if (!parseExpr(Op)) {
7227 return false;
7228 }
7229 if (Op < MinVal || Op > MaxVal) {
7230 Error(Loc, ErrMsg);
7231 return false;
7232 }
7233
7234 return true;
7235 }
7236
7237 bool
parseSwizzleOperands(const unsigned OpNum,int64_t * Op,const unsigned MinVal,const unsigned MaxVal,const StringRef ErrMsg)7238 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7239 const unsigned MinVal,
7240 const unsigned MaxVal,
7241 const StringRef ErrMsg) {
7242 SMLoc Loc;
7243 for (unsigned i = 0; i < OpNum; ++i) {
7244 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7245 return false;
7246 }
7247
7248 return true;
7249 }
7250
7251 bool
parseSwizzleQuadPerm(int64_t & Imm)7252 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7253 using namespace llvm::AMDGPU::Swizzle;
7254
7255 int64_t Lane[LANE_NUM];
7256 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7257 "expected a 2-bit lane id")) {
7258 Imm = QUAD_PERM_ENC;
7259 for (unsigned I = 0; I < LANE_NUM; ++I) {
7260 Imm |= Lane[I] << (LANE_SHIFT * I);
7261 }
7262 return true;
7263 }
7264 return false;
7265 }
7266
7267 bool
parseSwizzleBroadcast(int64_t & Imm)7268 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7269 using namespace llvm::AMDGPU::Swizzle;
7270
7271 SMLoc Loc;
7272 int64_t GroupSize;
7273 int64_t LaneIdx;
7274
7275 if (!parseSwizzleOperand(GroupSize,
7276 2, 32,
7277 "group size must be in the interval [2,32]",
7278 Loc)) {
7279 return false;
7280 }
7281 if (!isPowerOf2_64(GroupSize)) {
7282 Error(Loc, "group size must be a power of two");
7283 return false;
7284 }
7285 if (parseSwizzleOperand(LaneIdx,
7286 0, GroupSize - 1,
7287 "lane id must be in the interval [0,group size - 1]",
7288 Loc)) {
7289 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7290 return true;
7291 }
7292 return false;
7293 }
7294
7295 bool
parseSwizzleReverse(int64_t & Imm)7296 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7297 using namespace llvm::AMDGPU::Swizzle;
7298
7299 SMLoc Loc;
7300 int64_t GroupSize;
7301
7302 if (!parseSwizzleOperand(GroupSize,
7303 2, 32,
7304 "group size must be in the interval [2,32]",
7305 Loc)) {
7306 return false;
7307 }
7308 if (!isPowerOf2_64(GroupSize)) {
7309 Error(Loc, "group size must be a power of two");
7310 return false;
7311 }
7312
7313 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7314 return true;
7315 }
7316
7317 bool
parseSwizzleSwap(int64_t & Imm)7318 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7319 using namespace llvm::AMDGPU::Swizzle;
7320
7321 SMLoc Loc;
7322 int64_t GroupSize;
7323
7324 if (!parseSwizzleOperand(GroupSize,
7325 1, 16,
7326 "group size must be in the interval [1,16]",
7327 Loc)) {
7328 return false;
7329 }
7330 if (!isPowerOf2_64(GroupSize)) {
7331 Error(Loc, "group size must be a power of two");
7332 return false;
7333 }
7334
7335 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7336 return true;
7337 }
7338
7339 bool
parseSwizzleBitmaskPerm(int64_t & Imm)7340 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7341 using namespace llvm::AMDGPU::Swizzle;
7342
7343 if (!skipToken(AsmToken::Comma, "expected a comma")) {
7344 return false;
7345 }
7346
7347 StringRef Ctl;
7348 SMLoc StrLoc = getLoc();
7349 if (!parseString(Ctl)) {
7350 return false;
7351 }
7352 if (Ctl.size() != BITMASK_WIDTH) {
7353 Error(StrLoc, "expected a 5-character mask");
7354 return false;
7355 }
7356
7357 unsigned AndMask = 0;
7358 unsigned OrMask = 0;
7359 unsigned XorMask = 0;
7360
7361 for (size_t i = 0; i < Ctl.size(); ++i) {
7362 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7363 switch(Ctl[i]) {
7364 default:
7365 Error(StrLoc, "invalid mask");
7366 return false;
7367 case '0':
7368 break;
7369 case '1':
7370 OrMask |= Mask;
7371 break;
7372 case 'p':
7373 AndMask |= Mask;
7374 break;
7375 case 'i':
7376 AndMask |= Mask;
7377 XorMask |= Mask;
7378 break;
7379 }
7380 }
7381
7382 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7383 return true;
7384 }
7385
7386 bool
parseSwizzleOffset(int64_t & Imm)7387 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7388
7389 SMLoc OffsetLoc = getLoc();
7390
7391 if (!parseExpr(Imm, "a swizzle macro")) {
7392 return false;
7393 }
7394 if (!isUInt<16>(Imm)) {
7395 Error(OffsetLoc, "expected a 16-bit offset");
7396 return false;
7397 }
7398 return true;
7399 }
7400
7401 bool
parseSwizzleMacro(int64_t & Imm)7402 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7403 using namespace llvm::AMDGPU::Swizzle;
7404
7405 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7406
7407 SMLoc ModeLoc = getLoc();
7408 bool Ok = false;
7409
7410 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7411 Ok = parseSwizzleQuadPerm(Imm);
7412 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7413 Ok = parseSwizzleBitmaskPerm(Imm);
7414 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7415 Ok = parseSwizzleBroadcast(Imm);
7416 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7417 Ok = parseSwizzleSwap(Imm);
7418 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7419 Ok = parseSwizzleReverse(Imm);
7420 } else {
7421 Error(ModeLoc, "expected a swizzle mode");
7422 }
7423
7424 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7425 }
7426
7427 return false;
7428 }
7429
7430 OperandMatchResultTy
parseSwizzleOp(OperandVector & Operands)7431 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7432 SMLoc S = getLoc();
7433 int64_t Imm = 0;
7434
7435 if (trySkipId("offset")) {
7436
7437 bool Ok = false;
7438 if (skipToken(AsmToken::Colon, "expected a colon")) {
7439 if (trySkipId("swizzle")) {
7440 Ok = parseSwizzleMacro(Imm);
7441 } else {
7442 Ok = parseSwizzleOffset(Imm);
7443 }
7444 }
7445
7446 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7447
7448 return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7449 } else {
7450 // Swizzle "offset" operand is optional.
7451 // If it is omitted, try parsing other optional operands.
7452 return parseOptionalOpr(Operands);
7453 }
7454 }
7455
7456 bool
isSwizzle() const7457 AMDGPUOperand::isSwizzle() const {
7458 return isImmTy(ImmTySwizzle);
7459 }
7460
7461 //===----------------------------------------------------------------------===//
7462 // VGPR Index Mode
7463 //===----------------------------------------------------------------------===//
7464
parseGPRIdxMacro()7465 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7466
7467 using namespace llvm::AMDGPU::VGPRIndexMode;
7468
7469 if (trySkipToken(AsmToken::RParen)) {
7470 return OFF;
7471 }
7472
7473 int64_t Imm = 0;
7474
7475 while (true) {
7476 unsigned Mode = 0;
7477 SMLoc S = getLoc();
7478
7479 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7480 if (trySkipId(IdSymbolic[ModeId])) {
7481 Mode = 1 << ModeId;
7482 break;
7483 }
7484 }
7485
7486 if (Mode == 0) {
7487 Error(S, (Imm == 0)?
7488 "expected a VGPR index mode or a closing parenthesis" :
7489 "expected a VGPR index mode");
7490 return UNDEF;
7491 }
7492
7493 if (Imm & Mode) {
7494 Error(S, "duplicate VGPR index mode");
7495 return UNDEF;
7496 }
7497 Imm |= Mode;
7498
7499 if (trySkipToken(AsmToken::RParen))
7500 break;
7501 if (!skipToken(AsmToken::Comma,
7502 "expected a comma or a closing parenthesis"))
7503 return UNDEF;
7504 }
7505
7506 return Imm;
7507 }
7508
7509 OperandMatchResultTy
parseGPRIdxMode(OperandVector & Operands)7510 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7511
7512 using namespace llvm::AMDGPU::VGPRIndexMode;
7513
7514 int64_t Imm = 0;
7515 SMLoc S = getLoc();
7516
7517 if (trySkipId("gpr_idx", AsmToken::LParen)) {
7518 Imm = parseGPRIdxMacro();
7519 if (Imm == UNDEF)
7520 return MatchOperand_ParseFail;
7521 } else {
7522 if (getParser().parseAbsoluteExpression(Imm))
7523 return MatchOperand_ParseFail;
7524 if (Imm < 0 || !isUInt<4>(Imm)) {
7525 Error(S, "invalid immediate: only 4-bit values are legal");
7526 return MatchOperand_ParseFail;
7527 }
7528 }
7529
7530 Operands.push_back(
7531 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7532 return MatchOperand_Success;
7533 }
7534
isGPRIdxMode() const7535 bool AMDGPUOperand::isGPRIdxMode() const {
7536 return isImmTy(ImmTyGprIdxMode);
7537 }
7538
7539 //===----------------------------------------------------------------------===//
7540 // sopp branch targets
7541 //===----------------------------------------------------------------------===//
7542
7543 OperandMatchResultTy
parseSOppBrTarget(OperandVector & Operands)7544 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7545
7546 // Make sure we are not parsing something
7547 // that looks like a label or an expression but is not.
7548 // This will improve error messages.
7549 if (isRegister() || isModifier())
7550 return MatchOperand_NoMatch;
7551
7552 if (!parseExpr(Operands))
7553 return MatchOperand_ParseFail;
7554
7555 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7556 assert(Opr.isImm() || Opr.isExpr());
7557 SMLoc Loc = Opr.getStartLoc();
7558
7559 // Currently we do not support arbitrary expressions as branch targets.
7560 // Only labels and absolute expressions are accepted.
7561 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7562 Error(Loc, "expected an absolute expression or a label");
7563 } else if (Opr.isImm() && !Opr.isS16Imm()) {
7564 Error(Loc, "expected a 16-bit signed jump offset");
7565 }
7566
7567 return MatchOperand_Success;
7568 }
7569
7570 //===----------------------------------------------------------------------===//
7571 // Boolean holding registers
7572 //===----------------------------------------------------------------------===//
7573
7574 OperandMatchResultTy
parseBoolReg(OperandVector & Operands)7575 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7576 return parseReg(Operands);
7577 }
7578
7579 //===----------------------------------------------------------------------===//
7580 // mubuf
7581 //===----------------------------------------------------------------------===//
7582
defaultCPol() const7583 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7584 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7585 }
7586
cvtMubufImpl(MCInst & Inst,const OperandVector & Operands,bool IsAtomic,bool IsLds)7587 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7588 const OperandVector &Operands,
7589 bool IsAtomic,
7590 bool IsLds) {
7591 OptionalImmIndexMap OptionalIdx;
7592 unsigned FirstOperandIdx = 1;
7593 bool IsAtomicReturn = false;
7594
7595 if (IsAtomic) {
7596 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7597 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7598 if (!Op.isCPol())
7599 continue;
7600 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7601 break;
7602 }
7603
7604 if (!IsAtomicReturn) {
7605 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7606 if (NewOpc != -1)
7607 Inst.setOpcode(NewOpc);
7608 }
7609
7610 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
7611 SIInstrFlags::IsAtomicRet;
7612 }
7613
7614 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7615 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7616
7617 // Add the register arguments
7618 if (Op.isReg()) {
7619 Op.addRegOperands(Inst, 1);
7620 // Insert a tied src for atomic return dst.
7621 // This cannot be postponed as subsequent calls to
7622 // addImmOperands rely on correct number of MC operands.
7623 if (IsAtomicReturn && i == FirstOperandIdx)
7624 Op.addRegOperands(Inst, 1);
7625 continue;
7626 }
7627
7628 // Handle the case where soffset is an immediate
7629 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7630 Op.addImmOperands(Inst, 1);
7631 continue;
7632 }
7633
7634 // Handle tokens like 'offen' which are sometimes hard-coded into the
7635 // asm string. There are no MCInst operands for these.
7636 if (Op.isToken()) {
7637 continue;
7638 }
7639 assert(Op.isImm());
7640
7641 // Handle optional arguments
7642 OptionalIdx[Op.getImmTy()] = i;
7643 }
7644
7645 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7646 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7647
7648 if (!IsLds) { // tfe is not legal with lds opcodes
7649 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7650 }
7651 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7652 }
7653
cvtMtbuf(MCInst & Inst,const OperandVector & Operands)7654 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7655 OptionalImmIndexMap OptionalIdx;
7656
7657 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7658 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7659
7660 // Add the register arguments
7661 if (Op.isReg()) {
7662 Op.addRegOperands(Inst, 1);
7663 continue;
7664 }
7665
7666 // Handle the case where soffset is an immediate
7667 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7668 Op.addImmOperands(Inst, 1);
7669 continue;
7670 }
7671
7672 // Handle tokens like 'offen' which are sometimes hard-coded into the
7673 // asm string. There are no MCInst operands for these.
7674 if (Op.isToken()) {
7675 continue;
7676 }
7677 assert(Op.isImm());
7678
7679 // Handle optional arguments
7680 OptionalIdx[Op.getImmTy()] = i;
7681 }
7682
7683 addOptionalImmOperand(Inst, Operands, OptionalIdx,
7684 AMDGPUOperand::ImmTyOffset);
7685 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7686 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7687 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7688 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7689 }
7690
7691 //===----------------------------------------------------------------------===//
7692 // mimg
7693 //===----------------------------------------------------------------------===//
7694
cvtMIMG(MCInst & Inst,const OperandVector & Operands,bool IsAtomic)7695 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7696 bool IsAtomic) {
7697 unsigned I = 1;
7698 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7699 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7700 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7701 }
7702
7703 if (IsAtomic) {
7704 // Add src, same as dst
7705 assert(Desc.getNumDefs() == 1);
7706 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7707 }
7708
7709 OptionalImmIndexMap OptionalIdx;
7710
7711 for (unsigned E = Operands.size(); I != E; ++I) {
7712 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7713
7714 // Add the register arguments
7715 if (Op.isReg()) {
7716 Op.addRegOperands(Inst, 1);
7717 } else if (Op.isImmModifier()) {
7718 OptionalIdx[Op.getImmTy()] = I;
7719 } else if (!Op.isToken()) {
7720 llvm_unreachable("unexpected operand type");
7721 }
7722 }
7723
7724 bool IsGFX10Plus = isGFX10Plus();
7725
7726 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7727 if (IsGFX10Plus)
7728 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7729 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7730 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7731 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7732 if (IsGFX10Plus)
7733 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7734 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7735 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7736 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7737 if (!IsGFX10Plus)
7738 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7739 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7740 }
7741
cvtMIMGAtomic(MCInst & Inst,const OperandVector & Operands)7742 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7743 cvtMIMG(Inst, Operands, true);
7744 }
7745
cvtSMEMAtomic(MCInst & Inst,const OperandVector & Operands)7746 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7747 OptionalImmIndexMap OptionalIdx;
7748 bool IsAtomicReturn = false;
7749
7750 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7751 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7752 if (!Op.isCPol())
7753 continue;
7754 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7755 break;
7756 }
7757
7758 if (!IsAtomicReturn) {
7759 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7760 if (NewOpc != -1)
7761 Inst.setOpcode(NewOpc);
7762 }
7763
7764 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
7765 SIInstrFlags::IsAtomicRet;
7766
7767 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7768 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7769
7770 // Add the register arguments
7771 if (Op.isReg()) {
7772 Op.addRegOperands(Inst, 1);
7773 if (IsAtomicReturn && i == 1)
7774 Op.addRegOperands(Inst, 1);
7775 continue;
7776 }
7777
7778 // Handle the case where soffset is an immediate
7779 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7780 Op.addImmOperands(Inst, 1);
7781 continue;
7782 }
7783
7784 // Handle tokens like 'offen' which are sometimes hard-coded into the
7785 // asm string. There are no MCInst operands for these.
7786 if (Op.isToken()) {
7787 continue;
7788 }
7789 assert(Op.isImm());
7790
7791 // Handle optional arguments
7792 OptionalIdx[Op.getImmTy()] = i;
7793 }
7794
7795 if ((int)Inst.getNumOperands() <=
7796 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7797 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7798 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7799 }
7800
cvtIntersectRay(MCInst & Inst,const OperandVector & Operands)7801 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7802 const OperandVector &Operands) {
7803 for (unsigned I = 1; I < Operands.size(); ++I) {
7804 auto &Operand = (AMDGPUOperand &)*Operands[I];
7805 if (Operand.isReg())
7806 Operand.addRegOperands(Inst, 1);
7807 }
7808
7809 Inst.addOperand(MCOperand::createImm(1)); // a16
7810 }
7811
7812 //===----------------------------------------------------------------------===//
7813 // smrd
7814 //===----------------------------------------------------------------------===//
7815
isSMRDOffset8() const7816 bool AMDGPUOperand::isSMRDOffset8() const {
7817 return isImm() && isUInt<8>(getImm());
7818 }
7819
isSMEMOffset() const7820 bool AMDGPUOperand::isSMEMOffset() const {
7821 return isImmTy(ImmTyNone) ||
7822 isImmTy(ImmTyOffset); // Offset range is checked later by validator.
7823 }
7824
isSMRDLiteralOffset() const7825 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7826 // 32-bit literals are only supported on CI and we only want to use them
7827 // when the offset is > 8-bits.
7828 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7829 }
7830
defaultSMRDOffset8() const7831 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7832 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7833 }
7834
defaultSMEMOffset() const7835 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7836 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7837 }
7838
defaultSMRDLiteralOffset() const7839 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7840 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7841 }
7842
defaultFlatOffset() const7843 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7844 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7845 }
7846
7847 //===----------------------------------------------------------------------===//
7848 // vop3
7849 //===----------------------------------------------------------------------===//
7850
ConvertOmodMul(int64_t & Mul)7851 static bool ConvertOmodMul(int64_t &Mul) {
7852 if (Mul != 1 && Mul != 2 && Mul != 4)
7853 return false;
7854
7855 Mul >>= 1;
7856 return true;
7857 }
7858
ConvertOmodDiv(int64_t & Div)7859 static bool ConvertOmodDiv(int64_t &Div) {
7860 if (Div == 1) {
7861 Div = 0;
7862 return true;
7863 }
7864
7865 if (Div == 2) {
7866 Div = 3;
7867 return true;
7868 }
7869
7870 return false;
7871 }
7872
7873 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7874 // This is intentional and ensures compatibility with sp3.
7875 // See bug 35397 for details.
ConvertBoundCtrl(int64_t & BoundCtrl)7876 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7877 if (BoundCtrl == 0 || BoundCtrl == 1) {
7878 BoundCtrl = 1;
7879 return true;
7880 }
7881 return false;
7882 }
7883
7884 // Note: the order in this table matches the order of operands in AsmString.
7885 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7886 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr},
7887 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr},
7888 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr},
7889 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7890 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7891 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr},
7892 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
7893 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
7894 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7895 {"", AMDGPUOperand::ImmTyCPol, false, nullptr},
7896 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr},
7897 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
7898 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
7899 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr},
7900 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr},
7901 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7902 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
7903 {"da", AMDGPUOperand::ImmTyDA, true, nullptr},
7904 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
7905 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr},
7906 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
7907 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
7908 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
7909 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr},
7910 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7911 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7912 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7913 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7914 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7915 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7916 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7917 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7918 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7919 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7920 {"dpp8", AMDGPUOperand::ImmTyDPP8, false, nullptr},
7921 {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr},
7922 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7923 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7924 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7925 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr},
7926 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7927 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7928 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr},
7929 {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr},
7930 {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr}
7931 };
7932
onBeginOfFile()7933 void AMDGPUAsmParser::onBeginOfFile() {
7934 if (!getParser().getStreamer().getTargetStreamer() ||
7935 getSTI().getTargetTriple().getArch() == Triple::r600)
7936 return;
7937
7938 if (!getTargetStreamer().getTargetID())
7939 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7940
7941 if (isHsaAbiVersion3AndAbove(&getSTI()))
7942 getTargetStreamer().EmitDirectiveAMDGCNTarget();
7943 }
7944
parseOptionalOperand(OperandVector & Operands)7945 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7946
7947 OperandMatchResultTy res = parseOptionalOpr(Operands);
7948
7949 // This is a hack to enable hardcoded mandatory operands which follow
7950 // optional operands.
7951 //
7952 // Current design assumes that all operands after the first optional operand
7953 // are also optional. However implementation of some instructions violates
7954 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7955 //
7956 // To alleviate this problem, we have to (implicitly) parse extra operands
7957 // to make sure autogenerated parser of custom operands never hit hardcoded
7958 // mandatory operands.
7959
7960 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7961 if (res != MatchOperand_Success ||
7962 isToken(AsmToken::EndOfStatement))
7963 break;
7964
7965 trySkipToken(AsmToken::Comma);
7966 res = parseOptionalOpr(Operands);
7967 }
7968
7969 return res;
7970 }
7971
parseOptionalOpr(OperandVector & Operands)7972 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7973 OperandMatchResultTy res;
7974 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7975 // try to parse any optional operand here
7976 if (Op.IsBit) {
7977 res = parseNamedBit(Op.Name, Operands, Op.Type);
7978 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7979 res = parseOModOperand(Operands);
7980 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7981 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7982 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7983 res = parseSDWASel(Operands, Op.Name, Op.Type);
7984 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7985 res = parseSDWADstUnused(Operands);
7986 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7987 Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7988 Op.Type == AMDGPUOperand::ImmTyNegLo ||
7989 Op.Type == AMDGPUOperand::ImmTyNegHi) {
7990 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7991 Op.ConvertResult);
7992 } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7993 res = parseDim(Operands);
7994 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7995 res = parseCPol(Operands);
7996 } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) {
7997 res = parseDPP8(Operands);
7998 } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) {
7999 res = parseDPPCtrl(Operands);
8000 } else {
8001 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
8002 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
8003 res = parseOperandArrayWithPrefix("neg", Operands,
8004 AMDGPUOperand::ImmTyBLGP,
8005 nullptr);
8006 }
8007 }
8008 if (res != MatchOperand_NoMatch) {
8009 return res;
8010 }
8011 }
8012 return MatchOperand_NoMatch;
8013 }
8014
parseOModOperand(OperandVector & Operands)8015 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
8016 StringRef Name = getTokenStr();
8017 if (Name == "mul") {
8018 return parseIntWithPrefix("mul", Operands,
8019 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8020 }
8021
8022 if (Name == "div") {
8023 return parseIntWithPrefix("div", Operands,
8024 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8025 }
8026
8027 return MatchOperand_NoMatch;
8028 }
8029
8030 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8031 // the number of src operands present, then copies that bit into src0_modifiers.
cvtVOP3DstOpSelOnly(MCInst & Inst)8032 void cvtVOP3DstOpSelOnly(MCInst &Inst) {
8033 int Opc = Inst.getOpcode();
8034 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8035 if (OpSelIdx == -1)
8036 return;
8037
8038 int SrcNum;
8039 const int Ops[] = { AMDGPU::OpName::src0,
8040 AMDGPU::OpName::src1,
8041 AMDGPU::OpName::src2 };
8042 for (SrcNum = 0;
8043 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
8044 ++SrcNum);
8045 assert(SrcNum > 0);
8046
8047 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8048
8049 if ((OpSel & (1 << SrcNum)) != 0) {
8050 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8051 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8052 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
8053 }
8054 }
8055
cvtVOP3OpSel(MCInst & Inst,const OperandVector & Operands)8056 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8057 const OperandVector &Operands) {
8058 cvtVOP3P(Inst, Operands);
8059 cvtVOP3DstOpSelOnly(Inst);
8060 }
8061
cvtVOP3OpSel(MCInst & Inst,const OperandVector & Operands,OptionalImmIndexMap & OptionalIdx)8062 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8063 OptionalImmIndexMap &OptionalIdx) {
8064 cvtVOP3P(Inst, Operands, OptionalIdx);
8065 cvtVOP3DstOpSelOnly(Inst);
8066 }
8067
isRegOrImmWithInputMods(const MCInstrDesc & Desc,unsigned OpNum)8068 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8069 // 1. This operand is input modifiers
8070 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8071 // 2. This is not last operand
8072 && Desc.NumOperands > (OpNum + 1)
8073 // 3. Next operand is register class
8074 && Desc.OpInfo[OpNum + 1].RegClass != -1
8075 // 4. Next register is not tied to any other operand
8076 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
8077 }
8078
cvtVOP3Interp(MCInst & Inst,const OperandVector & Operands)8079 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8080 {
8081 OptionalImmIndexMap OptionalIdx;
8082 unsigned Opc = Inst.getOpcode();
8083
8084 unsigned I = 1;
8085 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8086 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8087 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8088 }
8089
8090 for (unsigned E = Operands.size(); I != E; ++I) {
8091 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8092 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8093 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8094 } else if (Op.isInterpSlot() ||
8095 Op.isInterpAttr() ||
8096 Op.isAttrChan()) {
8097 Inst.addOperand(MCOperand::createImm(Op.getImm()));
8098 } else if (Op.isImmModifier()) {
8099 OptionalIdx[Op.getImmTy()] = I;
8100 } else {
8101 llvm_unreachable("unhandled operand type");
8102 }
8103 }
8104
8105 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
8106 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
8107 }
8108
8109 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8110 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8111 }
8112
8113 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8114 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8115 }
8116 }
8117
cvtVINTERP(MCInst & Inst,const OperandVector & Operands)8118 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8119 {
8120 OptionalImmIndexMap OptionalIdx;
8121 unsigned Opc = Inst.getOpcode();
8122
8123 unsigned I = 1;
8124 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8125 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8126 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8127 }
8128
8129 for (unsigned E = Operands.size(); I != E; ++I) {
8130 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8131 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8132 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8133 } else if (Op.isImmModifier()) {
8134 OptionalIdx[Op.getImmTy()] = I;
8135 } else {
8136 llvm_unreachable("unhandled operand type");
8137 }
8138 }
8139
8140 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8141
8142 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8143 if (OpSelIdx != -1)
8144 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8145
8146 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8147
8148 if (OpSelIdx == -1)
8149 return;
8150
8151 const int Ops[] = { AMDGPU::OpName::src0,
8152 AMDGPU::OpName::src1,
8153 AMDGPU::OpName::src2 };
8154 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8155 AMDGPU::OpName::src1_modifiers,
8156 AMDGPU::OpName::src2_modifiers };
8157
8158 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8159
8160 for (int J = 0; J < 3; ++J) {
8161 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8162 if (OpIdx == -1)
8163 break;
8164
8165 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8166 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8167
8168 if ((OpSel & (1 << J)) != 0)
8169 ModVal |= SISrcMods::OP_SEL_0;
8170 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8171 (OpSel & (1 << 3)) != 0)
8172 ModVal |= SISrcMods::DST_OP_SEL;
8173
8174 Inst.getOperand(ModIdx).setImm(ModVal);
8175 }
8176 }
8177
cvtVOP3(MCInst & Inst,const OperandVector & Operands,OptionalImmIndexMap & OptionalIdx)8178 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8179 OptionalImmIndexMap &OptionalIdx) {
8180 unsigned Opc = Inst.getOpcode();
8181
8182 unsigned I = 1;
8183 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8184 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8185 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8186 }
8187
8188 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
8189 // This instruction has src modifiers
8190 for (unsigned E = Operands.size(); I != E; ++I) {
8191 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8192 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8193 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8194 } else if (Op.isImmModifier()) {
8195 OptionalIdx[Op.getImmTy()] = I;
8196 } else if (Op.isRegOrImm()) {
8197 Op.addRegOrImmOperands(Inst, 1);
8198 } else {
8199 llvm_unreachable("unhandled operand type");
8200 }
8201 }
8202 } else {
8203 // No src modifiers
8204 for (unsigned E = Operands.size(); I != E; ++I) {
8205 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8206 if (Op.isMod()) {
8207 OptionalIdx[Op.getImmTy()] = I;
8208 } else {
8209 Op.addRegOrImmOperands(Inst, 1);
8210 }
8211 }
8212 }
8213
8214 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8215 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8216 }
8217
8218 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8219 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8220 }
8221
8222 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8223 // it has src2 register operand that is tied to dst operand
8224 // we don't allow modifiers for this operand in assembler so src2_modifiers
8225 // should be 0.
8226 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
8227 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
8228 Opc == AMDGPU::V_MAC_F32_e64_vi ||
8229 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
8230 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
8231 Opc == AMDGPU::V_MAC_F16_e64_vi ||
8232 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
8233 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
8234 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
8235 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
8236 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
8237 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
8238 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
8239 Opc == AMDGPU::V_FMAC_F16_e64_gfx11) {
8240 auto it = Inst.begin();
8241 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8242 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8243 ++it;
8244 // Copy the operand to ensure it's not invalidated when Inst grows.
8245 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8246 }
8247 }
8248
cvtVOP3(MCInst & Inst,const OperandVector & Operands)8249 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8250 OptionalImmIndexMap OptionalIdx;
8251 cvtVOP3(Inst, Operands, OptionalIdx);
8252 }
8253
cvtVOP3P(MCInst & Inst,const OperandVector & Operands,OptionalImmIndexMap & OptIdx)8254 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8255 OptionalImmIndexMap &OptIdx) {
8256 const int Opc = Inst.getOpcode();
8257 const MCInstrDesc &Desc = MII.get(Opc);
8258
8259 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8260
8261 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8262 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) {
8263 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8264 Inst.addOperand(Inst.getOperand(0));
8265 }
8266
8267 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
8268 assert(!IsPacked);
8269 Inst.addOperand(Inst.getOperand(0));
8270 }
8271
8272 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8273 // instruction, and then figure out where to actually put the modifiers
8274
8275 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8276 if (OpSelIdx != -1) {
8277 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8278 }
8279
8280 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8281 if (OpSelHiIdx != -1) {
8282 int DefaultVal = IsPacked ? -1 : 0;
8283 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8284 DefaultVal);
8285 }
8286
8287 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8288 if (NegLoIdx != -1) {
8289 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8290 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8291 }
8292
8293 const int Ops[] = { AMDGPU::OpName::src0,
8294 AMDGPU::OpName::src1,
8295 AMDGPU::OpName::src2 };
8296 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8297 AMDGPU::OpName::src1_modifiers,
8298 AMDGPU::OpName::src2_modifiers };
8299
8300 unsigned OpSel = 0;
8301 unsigned OpSelHi = 0;
8302 unsigned NegLo = 0;
8303 unsigned NegHi = 0;
8304
8305 if (OpSelIdx != -1)
8306 OpSel = Inst.getOperand(OpSelIdx).getImm();
8307
8308 if (OpSelHiIdx != -1)
8309 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8310
8311 if (NegLoIdx != -1) {
8312 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8313 NegLo = Inst.getOperand(NegLoIdx).getImm();
8314 NegHi = Inst.getOperand(NegHiIdx).getImm();
8315 }
8316
8317 for (int J = 0; J < 3; ++J) {
8318 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8319 if (OpIdx == -1)
8320 break;
8321
8322 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8323
8324 if (ModIdx == -1)
8325 continue;
8326
8327 uint32_t ModVal = 0;
8328
8329 if ((OpSel & (1 << J)) != 0)
8330 ModVal |= SISrcMods::OP_SEL_0;
8331
8332 if ((OpSelHi & (1 << J)) != 0)
8333 ModVal |= SISrcMods::OP_SEL_1;
8334
8335 if ((NegLo & (1 << J)) != 0)
8336 ModVal |= SISrcMods::NEG;
8337
8338 if ((NegHi & (1 << J)) != 0)
8339 ModVal |= SISrcMods::NEG_HI;
8340
8341 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8342 }
8343 }
8344
cvtVOP3P(MCInst & Inst,const OperandVector & Operands)8345 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8346 OptionalImmIndexMap OptIdx;
8347 cvtVOP3(Inst, Operands, OptIdx);
8348 cvtVOP3P(Inst, Operands, OptIdx);
8349 }
8350
8351 //===----------------------------------------------------------------------===//
8352 // VOPD
8353 //===----------------------------------------------------------------------===//
8354
parseVOPD(OperandVector & Operands)8355 OperandMatchResultTy AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8356 if (!hasVOPD(getSTI()))
8357 return MatchOperand_NoMatch;
8358
8359 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8360 SMLoc S = getLoc();
8361 lex();
8362 lex();
8363 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8364 const MCExpr *Expr;
8365 if (isToken(AsmToken::Identifier) && !Parser.parseExpression(Expr)) {
8366 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8367 return MatchOperand_Success;
8368 }
8369 Error(S, "invalid VOPD :: usage");
8370 return MatchOperand_ParseFail;
8371 }
8372 return MatchOperand_NoMatch;
8373 }
8374
8375 // Create VOPD MCInst operands using parsed assembler operands.
8376 // Parsed VOPD operands are ordered as follows:
8377 // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
8378 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
8379 // If both OpX and OpY have an imm, the first imm has a different name:
8380 // OpXMnemo dstX src0X [vsrc1X|immDeferred vsrc1X|vsrc1X immDeferred] '::'
8381 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
8382 // MCInst operands have the following order:
8383 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
cvtVOPD(MCInst & Inst,const OperandVector & Operands)8384 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8385 auto addOp = [&](uint16_t i) { // NOLINT:function pointer
8386 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8387 if (Op.isReg()) {
8388 Op.addRegOperands(Inst, 1);
8389 return;
8390 }
8391 if (Op.isImm()) {
8392 Op.addImmOperands(Inst, 1);
8393 return;
8394 }
8395 // Handle tokens like 'offen' which are sometimes hard-coded into the
8396 // asm string. There are no MCInst operands for these.
8397 if (Op.isToken()) {
8398 return;
8399 }
8400 llvm_unreachable("Unhandled operand type in cvtVOPD");
8401 };
8402
8403 // Indices into MCInst.Operands
8404 const auto FmamkOpXImmMCIndex = 3; // dstX, dstY, src0X, imm, ...
8405 const auto FmaakOpXImmMCIndex = 4; // dstX, dstY, src0X, src1X, imm, ...
8406 const auto MinOpYImmMCIndex = 4; // dstX, dstY, src0X, src0Y, imm, ...
8407
8408 unsigned Opc = Inst.getOpcode();
8409 bool HasVsrc1X =
8410 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1X) != -1;
8411 bool HasImmX =
8412 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 ||
8413 (HasVsrc1X && (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) ==
8414 FmamkOpXImmMCIndex ||
8415 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) ==
8416 FmaakOpXImmMCIndex));
8417
8418 bool HasVsrc1Y =
8419 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1Y) != -1;
8420 bool HasImmY =
8421 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 ||
8422 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) >=
8423 MinOpYImmMCIndex + HasVsrc1X;
8424
8425 // Indices of parsed operands relative to dst
8426 const auto DstIdx = 0;
8427 const auto Src0Idx = 1;
8428 const auto Vsrc1OrImmIdx = 2;
8429
8430 const auto OpXOperandsSize = 2 + HasImmX + HasVsrc1X;
8431 const auto BridgeTokensSize = 2; // Special VOPD tokens ('::' and OpYMnemo)
8432
8433 // Offsets into parsed operands
8434 const auto OpXFirstOperandOffset = 1;
8435 const auto OpYFirstOperandOffset =
8436 OpXFirstOperandOffset + OpXOperandsSize + BridgeTokensSize;
8437
8438 // Order of addOp calls determines MC operand order
8439 addOp(OpXFirstOperandOffset + DstIdx); // vdstX
8440 addOp(OpYFirstOperandOffset + DstIdx); // vdstY
8441
8442 addOp(OpXFirstOperandOffset + Src0Idx); // src0X
8443 if (HasImmX) {
8444 // immX then vsrc1X for fmamk, vsrc1X then immX for fmaak
8445 addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx);
8446 addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx + 1);
8447 } else {
8448 if (HasVsrc1X) // all except v_mov
8449 addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1X
8450 }
8451
8452 addOp(OpYFirstOperandOffset + Src0Idx); // src0Y
8453 if (HasImmY) {
8454 // immY then vsrc1Y for fmamk, vsrc1Y then immY for fmaak
8455 addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx);
8456 addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx + 1);
8457 } else {
8458 if (HasVsrc1Y) // all except v_mov
8459 addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1Y
8460 }
8461 }
8462
8463 //===----------------------------------------------------------------------===//
8464 // dpp
8465 //===----------------------------------------------------------------------===//
8466
isDPP8() const8467 bool AMDGPUOperand::isDPP8() const {
8468 return isImmTy(ImmTyDPP8);
8469 }
8470
isDPPCtrl() const8471 bool AMDGPUOperand::isDPPCtrl() const {
8472 using namespace AMDGPU::DPP;
8473
8474 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8475 if (result) {
8476 int64_t Imm = getImm();
8477 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8478 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8479 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8480 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8481 (Imm == DppCtrl::WAVE_SHL1) ||
8482 (Imm == DppCtrl::WAVE_ROL1) ||
8483 (Imm == DppCtrl::WAVE_SHR1) ||
8484 (Imm == DppCtrl::WAVE_ROR1) ||
8485 (Imm == DppCtrl::ROW_MIRROR) ||
8486 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8487 (Imm == DppCtrl::BCAST15) ||
8488 (Imm == DppCtrl::BCAST31) ||
8489 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8490 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8491 }
8492 return false;
8493 }
8494
8495 //===----------------------------------------------------------------------===//
8496 // mAI
8497 //===----------------------------------------------------------------------===//
8498
isBLGP() const8499 bool AMDGPUOperand::isBLGP() const {
8500 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8501 }
8502
isCBSZ() const8503 bool AMDGPUOperand::isCBSZ() const {
8504 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8505 }
8506
isABID() const8507 bool AMDGPUOperand::isABID() const {
8508 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8509 }
8510
isS16Imm() const8511 bool AMDGPUOperand::isS16Imm() const {
8512 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8513 }
8514
isU16Imm() const8515 bool AMDGPUOperand::isU16Imm() const {
8516 return isImm() && isUInt<16>(getImm());
8517 }
8518
8519 //===----------------------------------------------------------------------===//
8520 // dim
8521 //===----------------------------------------------------------------------===//
8522
parseDimId(unsigned & Encoding)8523 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8524 // We want to allow "dim:1D" etc.,
8525 // but the initial 1 is tokenized as an integer.
8526 std::string Token;
8527 if (isToken(AsmToken::Integer)) {
8528 SMLoc Loc = getToken().getEndLoc();
8529 Token = std::string(getTokenStr());
8530 lex();
8531 if (getLoc() != Loc)
8532 return false;
8533 }
8534
8535 StringRef Suffix;
8536 if (!parseId(Suffix))
8537 return false;
8538 Token += Suffix;
8539
8540 StringRef DimId = Token;
8541 if (DimId.startswith("SQ_RSRC_IMG_"))
8542 DimId = DimId.drop_front(12);
8543
8544 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8545 if (!DimInfo)
8546 return false;
8547
8548 Encoding = DimInfo->Encoding;
8549 return true;
8550 }
8551
parseDim(OperandVector & Operands)8552 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8553 if (!isGFX10Plus())
8554 return MatchOperand_NoMatch;
8555
8556 SMLoc S = getLoc();
8557
8558 if (!trySkipId("dim", AsmToken::Colon))
8559 return MatchOperand_NoMatch;
8560
8561 unsigned Encoding;
8562 SMLoc Loc = getLoc();
8563 if (!parseDimId(Encoding)) {
8564 Error(Loc, "invalid dim value");
8565 return MatchOperand_ParseFail;
8566 }
8567
8568 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8569 AMDGPUOperand::ImmTyDim));
8570 return MatchOperand_Success;
8571 }
8572
8573 //===----------------------------------------------------------------------===//
8574 // dpp
8575 //===----------------------------------------------------------------------===//
8576
parseDPP8(OperandVector & Operands)8577 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8578 SMLoc S = getLoc();
8579
8580 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8581 return MatchOperand_NoMatch;
8582
8583 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8584
8585 int64_t Sels[8];
8586
8587 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8588 return MatchOperand_ParseFail;
8589
8590 for (size_t i = 0; i < 8; ++i) {
8591 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8592 return MatchOperand_ParseFail;
8593
8594 SMLoc Loc = getLoc();
8595 if (getParser().parseAbsoluteExpression(Sels[i]))
8596 return MatchOperand_ParseFail;
8597 if (0 > Sels[i] || 7 < Sels[i]) {
8598 Error(Loc, "expected a 3-bit value");
8599 return MatchOperand_ParseFail;
8600 }
8601 }
8602
8603 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8604 return MatchOperand_ParseFail;
8605
8606 unsigned DPP8 = 0;
8607 for (size_t i = 0; i < 8; ++i)
8608 DPP8 |= (Sels[i] << (i * 3));
8609
8610 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8611 return MatchOperand_Success;
8612 }
8613
8614 bool
isSupportedDPPCtrl(StringRef Ctrl,const OperandVector & Operands)8615 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8616 const OperandVector &Operands) {
8617 if (Ctrl == "row_newbcast")
8618 return isGFX90A();
8619
8620 if (Ctrl == "row_share" ||
8621 Ctrl == "row_xmask")
8622 return isGFX10Plus();
8623
8624 if (Ctrl == "wave_shl" ||
8625 Ctrl == "wave_shr" ||
8626 Ctrl == "wave_rol" ||
8627 Ctrl == "wave_ror" ||
8628 Ctrl == "row_bcast")
8629 return isVI() || isGFX9();
8630
8631 return Ctrl == "row_mirror" ||
8632 Ctrl == "row_half_mirror" ||
8633 Ctrl == "quad_perm" ||
8634 Ctrl == "row_shl" ||
8635 Ctrl == "row_shr" ||
8636 Ctrl == "row_ror";
8637 }
8638
8639 int64_t
parseDPPCtrlPerm()8640 AMDGPUAsmParser::parseDPPCtrlPerm() {
8641 // quad_perm:[%d,%d,%d,%d]
8642
8643 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8644 return -1;
8645
8646 int64_t Val = 0;
8647 for (int i = 0; i < 4; ++i) {
8648 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8649 return -1;
8650
8651 int64_t Temp;
8652 SMLoc Loc = getLoc();
8653 if (getParser().parseAbsoluteExpression(Temp))
8654 return -1;
8655 if (Temp < 0 || Temp > 3) {
8656 Error(Loc, "expected a 2-bit value");
8657 return -1;
8658 }
8659
8660 Val += (Temp << i * 2);
8661 }
8662
8663 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8664 return -1;
8665
8666 return Val;
8667 }
8668
8669 int64_t
parseDPPCtrlSel(StringRef Ctrl)8670 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8671 using namespace AMDGPU::DPP;
8672
8673 // sel:%d
8674
8675 int64_t Val;
8676 SMLoc Loc = getLoc();
8677
8678 if (getParser().parseAbsoluteExpression(Val))
8679 return -1;
8680
8681 struct DppCtrlCheck {
8682 int64_t Ctrl;
8683 int Lo;
8684 int Hi;
8685 };
8686
8687 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8688 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
8689 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
8690 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
8691 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
8692 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
8693 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
8694 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
8695 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8696 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8697 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8698 .Default({-1, 0, 0});
8699
8700 bool Valid;
8701 if (Check.Ctrl == -1) {
8702 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8703 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8704 } else {
8705 Valid = Check.Lo <= Val && Val <= Check.Hi;
8706 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8707 }
8708
8709 if (!Valid) {
8710 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8711 return -1;
8712 }
8713
8714 return Val;
8715 }
8716
8717 OperandMatchResultTy
parseDPPCtrl(OperandVector & Operands)8718 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8719 using namespace AMDGPU::DPP;
8720
8721 if (!isToken(AsmToken::Identifier) ||
8722 !isSupportedDPPCtrl(getTokenStr(), Operands))
8723 return MatchOperand_NoMatch;
8724
8725 SMLoc S = getLoc();
8726 int64_t Val = -1;
8727 StringRef Ctrl;
8728
8729 parseId(Ctrl);
8730
8731 if (Ctrl == "row_mirror") {
8732 Val = DppCtrl::ROW_MIRROR;
8733 } else if (Ctrl == "row_half_mirror") {
8734 Val = DppCtrl::ROW_HALF_MIRROR;
8735 } else {
8736 if (skipToken(AsmToken::Colon, "expected a colon")) {
8737 if (Ctrl == "quad_perm") {
8738 Val = parseDPPCtrlPerm();
8739 } else {
8740 Val = parseDPPCtrlSel(Ctrl);
8741 }
8742 }
8743 }
8744
8745 if (Val == -1)
8746 return MatchOperand_ParseFail;
8747
8748 Operands.push_back(
8749 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8750 return MatchOperand_Success;
8751 }
8752
defaultRowMask() const8753 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8754 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8755 }
8756
defaultEndpgmImmOperands() const8757 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8758 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8759 }
8760
defaultBankMask() const8761 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8762 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8763 }
8764
defaultBoundCtrl() const8765 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8766 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8767 }
8768
defaultFI() const8769 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8770 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8771 }
8772
cvtVOP3DPP(MCInst & Inst,const OperandVector & Operands,bool IsDPP8)8773 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8774 OptionalImmIndexMap OptionalIdx;
8775 unsigned Opc = Inst.getOpcode();
8776 bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8777 unsigned I = 1;
8778 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8779 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8780 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8781 }
8782
8783 int Fi = 0;
8784 for (unsigned E = Operands.size(); I != E; ++I) {
8785 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8786 MCOI::TIED_TO);
8787 if (TiedTo != -1) {
8788 assert((unsigned)TiedTo < Inst.getNumOperands());
8789 // handle tied old or src2 for MAC instructions
8790 Inst.addOperand(Inst.getOperand(TiedTo));
8791 }
8792 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8793 // Add the register arguments
8794 if (IsDPP8 && Op.isFI()) {
8795 Fi = Op.getImm();
8796 } else if (HasModifiers &&
8797 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8798 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8799 } else if (Op.isReg()) {
8800 Op.addRegOperands(Inst, 1);
8801 } else if (Op.isImm() &&
8802 Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) {
8803 assert(!HasModifiers && "Case should be unreachable with modifiers");
8804 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
8805 Op.addImmOperands(Inst, 1);
8806 } else if (Op.isImm()) {
8807 OptionalIdx[Op.getImmTy()] = I;
8808 } else {
8809 llvm_unreachable("unhandled operand type");
8810 }
8811 }
8812 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8813 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8814 }
8815 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8816 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8817 }
8818 if (Desc.TSFlags & SIInstrFlags::VOP3P)
8819 cvtVOP3P(Inst, Operands, OptionalIdx);
8820 else if (Desc.TSFlags & SIInstrFlags::VOP3)
8821 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
8822 else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) {
8823 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8824 }
8825
8826 if (IsDPP8) {
8827 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
8828 using namespace llvm::AMDGPU::DPP;
8829 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8830 } else {
8831 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
8832 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8833 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8834 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8835 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8836 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8837 }
8838 }
8839 }
8840
cvtDPP(MCInst & Inst,const OperandVector & Operands,bool IsDPP8)8841 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8842 OptionalImmIndexMap OptionalIdx;
8843
8844 unsigned Opc = Inst.getOpcode();
8845 bool HasModifiers =
8846 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8847 unsigned I = 1;
8848 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8849 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8850 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8851 }
8852
8853 int Fi = 0;
8854 for (unsigned E = Operands.size(); I != E; ++I) {
8855 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8856 MCOI::TIED_TO);
8857 if (TiedTo != -1) {
8858 assert((unsigned)TiedTo < Inst.getNumOperands());
8859 // handle tied old or src2 for MAC instructions
8860 Inst.addOperand(Inst.getOperand(TiedTo));
8861 }
8862 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8863 // Add the register arguments
8864 if (Op.isReg() && validateVccOperand(Op.getReg())) {
8865 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8866 // Skip it.
8867 continue;
8868 }
8869
8870 if (IsDPP8) {
8871 if (Op.isDPP8()) {
8872 Op.addImmOperands(Inst, 1);
8873 } else if (HasModifiers &&
8874 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8875 Op.addRegWithFPInputModsOperands(Inst, 2);
8876 } else if (Op.isFI()) {
8877 Fi = Op.getImm();
8878 } else if (Op.isReg()) {
8879 Op.addRegOperands(Inst, 1);
8880 } else {
8881 llvm_unreachable("Invalid operand type");
8882 }
8883 } else {
8884 if (HasModifiers &&
8885 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8886 Op.addRegWithFPInputModsOperands(Inst, 2);
8887 } else if (Op.isReg()) {
8888 Op.addRegOperands(Inst, 1);
8889 } else if (Op.isDPPCtrl()) {
8890 Op.addImmOperands(Inst, 1);
8891 } else if (Op.isImm()) {
8892 // Handle optional arguments
8893 OptionalIdx[Op.getImmTy()] = I;
8894 } else {
8895 llvm_unreachable("Invalid operand type");
8896 }
8897 }
8898 }
8899
8900 if (IsDPP8) {
8901 using namespace llvm::AMDGPU::DPP;
8902 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8903 } else {
8904 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8905 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8906 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8907 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8908 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8909 }
8910 }
8911 }
8912
8913 //===----------------------------------------------------------------------===//
8914 // sdwa
8915 //===----------------------------------------------------------------------===//
8916
8917 OperandMatchResultTy
parseSDWASel(OperandVector & Operands,StringRef Prefix,AMDGPUOperand::ImmTy Type)8918 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8919 AMDGPUOperand::ImmTy Type) {
8920 using namespace llvm::AMDGPU::SDWA;
8921
8922 SMLoc S = getLoc();
8923 StringRef Value;
8924 OperandMatchResultTy res;
8925
8926 SMLoc StringLoc;
8927 res = parseStringWithPrefix(Prefix, Value, StringLoc);
8928 if (res != MatchOperand_Success) {
8929 return res;
8930 }
8931
8932 int64_t Int;
8933 Int = StringSwitch<int64_t>(Value)
8934 .Case("BYTE_0", SdwaSel::BYTE_0)
8935 .Case("BYTE_1", SdwaSel::BYTE_1)
8936 .Case("BYTE_2", SdwaSel::BYTE_2)
8937 .Case("BYTE_3", SdwaSel::BYTE_3)
8938 .Case("WORD_0", SdwaSel::WORD_0)
8939 .Case("WORD_1", SdwaSel::WORD_1)
8940 .Case("DWORD", SdwaSel::DWORD)
8941 .Default(0xffffffff);
8942
8943 if (Int == 0xffffffff) {
8944 Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8945 return MatchOperand_ParseFail;
8946 }
8947
8948 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8949 return MatchOperand_Success;
8950 }
8951
8952 OperandMatchResultTy
parseSDWADstUnused(OperandVector & Operands)8953 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8954 using namespace llvm::AMDGPU::SDWA;
8955
8956 SMLoc S = getLoc();
8957 StringRef Value;
8958 OperandMatchResultTy res;
8959
8960 SMLoc StringLoc;
8961 res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8962 if (res != MatchOperand_Success) {
8963 return res;
8964 }
8965
8966 int64_t Int;
8967 Int = StringSwitch<int64_t>(Value)
8968 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8969 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8970 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8971 .Default(0xffffffff);
8972
8973 if (Int == 0xffffffff) {
8974 Error(StringLoc, "invalid dst_unused value");
8975 return MatchOperand_ParseFail;
8976 }
8977
8978 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8979 return MatchOperand_Success;
8980 }
8981
cvtSdwaVOP1(MCInst & Inst,const OperandVector & Operands)8982 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8983 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8984 }
8985
cvtSdwaVOP2(MCInst & Inst,const OperandVector & Operands)8986 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8987 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8988 }
8989
cvtSdwaVOP2b(MCInst & Inst,const OperandVector & Operands)8990 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8991 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8992 }
8993
cvtSdwaVOP2e(MCInst & Inst,const OperandVector & Operands)8994 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8995 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8996 }
8997
cvtSdwaVOPC(MCInst & Inst,const OperandVector & Operands)8998 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8999 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9000 }
9001
cvtSDWA(MCInst & Inst,const OperandVector & Operands,uint64_t BasicInstType,bool SkipDstVcc,bool SkipSrcVcc)9002 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9003 uint64_t BasicInstType,
9004 bool SkipDstVcc,
9005 bool SkipSrcVcc) {
9006 using namespace llvm::AMDGPU::SDWA;
9007
9008 OptionalImmIndexMap OptionalIdx;
9009 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9010 bool SkippedVcc = false;
9011
9012 unsigned I = 1;
9013 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9014 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9015 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9016 }
9017
9018 for (unsigned E = Operands.size(); I != E; ++I) {
9019 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9020 if (SkipVcc && !SkippedVcc && Op.isReg() &&
9021 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9022 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9023 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9024 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9025 // Skip VCC only if we didn't skip it on previous iteration.
9026 // Note that src0 and src1 occupy 2 slots each because of modifiers.
9027 if (BasicInstType == SIInstrFlags::VOP2 &&
9028 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9029 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9030 SkippedVcc = true;
9031 continue;
9032 } else if (BasicInstType == SIInstrFlags::VOPC &&
9033 Inst.getNumOperands() == 0) {
9034 SkippedVcc = true;
9035 continue;
9036 }
9037 }
9038 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9039 Op.addRegOrImmWithInputModsOperands(Inst, 2);
9040 } else if (Op.isImm()) {
9041 // Handle optional arguments
9042 OptionalIdx[Op.getImmTy()] = I;
9043 } else {
9044 llvm_unreachable("Invalid operand type");
9045 }
9046 SkippedVcc = false;
9047 }
9048
9049 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
9050 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
9051 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
9052 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9053 switch (BasicInstType) {
9054 case SIInstrFlags::VOP1:
9055 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
9056 AMDGPU::OpName::clamp) != -1) {
9057 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9058 AMDGPUOperand::ImmTyClampSI, 0);
9059 }
9060 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
9061 AMDGPU::OpName::omod) != -1) {
9062 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9063 AMDGPUOperand::ImmTyOModSI, 0);
9064 }
9065 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
9066 AMDGPU::OpName::dst_sel) != -1) {
9067 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9068 AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
9069 }
9070 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
9071 AMDGPU::OpName::dst_unused) != -1) {
9072 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9073 AMDGPUOperand::ImmTySdwaDstUnused,
9074 DstUnused::UNUSED_PRESERVE);
9075 }
9076 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
9077 break;
9078
9079 case SIInstrFlags::VOP2:
9080 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9081 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
9082 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9083 }
9084 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
9085 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
9086 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
9087 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
9088 break;
9089
9090 case SIInstrFlags::VOPC:
9091 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
9092 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9093 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
9094 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
9095 break;
9096
9097 default:
9098 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9099 }
9100 }
9101
9102 // special case v_mac_{f16, f32}:
9103 // it has src2 register operand that is tied to dst operand
9104 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9105 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9106 auto it = Inst.begin();
9107 std::advance(
9108 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9109 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9110 }
9111 }
9112
9113 //===----------------------------------------------------------------------===//
9114 // mAI
9115 //===----------------------------------------------------------------------===//
9116
defaultBLGP() const9117 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
9118 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
9119 }
9120
defaultCBSZ() const9121 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
9122 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
9123 }
9124
defaultABID() const9125 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
9126 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
9127 }
9128
9129 /// Force static initialization.
LLVMInitializeAMDGPUAsmParser()9130 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
9131 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
9132 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
9133 }
9134
9135 #define GET_REGISTER_MATCHER
9136 #define GET_MATCHER_IMPLEMENTATION
9137 #define GET_MNEMONIC_SPELL_CHECKER
9138 #define GET_MNEMONIC_CHECKER
9139 #include "AMDGPUGenAsmMatcher.inc"
9140
9141 // This function should be defined after auto-generated include so that we have
9142 // MatchClassKind enum defined
validateTargetOperandClass(MCParsedAsmOperand & Op,unsigned Kind)9143 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9144 unsigned Kind) {
9145 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9146 // But MatchInstructionImpl() expects to meet token and fails to validate
9147 // operand. This method checks if we are given immediate operand but expect to
9148 // get corresponding token.
9149 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9150 switch (Kind) {
9151 case MCK_addr64:
9152 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9153 case MCK_gds:
9154 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9155 case MCK_lds:
9156 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9157 case MCK_idxen:
9158 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9159 case MCK_offen:
9160 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9161 case MCK_SSrcB32:
9162 // When operands have expression values, they will return true for isToken,
9163 // because it is not possible to distinguish between a token and an
9164 // expression at parse time. MatchInstructionImpl() will always try to
9165 // match an operand as a token, when isToken returns true, and when the
9166 // name of the expression is not a valid token, the match will fail,
9167 // so we need to handle it here.
9168 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
9169 case MCK_SSrcF32:
9170 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
9171 case MCK_SoppBrTarget:
9172 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
9173 case MCK_VReg32OrOff:
9174 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9175 case MCK_InterpSlot:
9176 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9177 case MCK_Attr:
9178 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9179 case MCK_AttrChan:
9180 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
9181 case MCK_ImmSMEMOffset:
9182 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
9183 case MCK_SReg_64:
9184 case MCK_SReg_64_XEXEC:
9185 // Null is defined as a 32-bit register but
9186 // it should also be enabled with 64-bit operands.
9187 // The following code enables it for SReg_64 operands
9188 // used as source and destination. Remaining source
9189 // operands are handled in isInlinableImm.
9190 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9191 default:
9192 return Match_InvalidOperand;
9193 }
9194 }
9195
9196 //===----------------------------------------------------------------------===//
9197 // endpgm
9198 //===----------------------------------------------------------------------===//
9199
parseEndpgmOp(OperandVector & Operands)9200 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
9201 SMLoc S = getLoc();
9202 int64_t Imm = 0;
9203
9204 if (!parseExpr(Imm)) {
9205 // The operand is optional, if not present default to 0
9206 Imm = 0;
9207 }
9208
9209 if (!isUInt<16>(Imm)) {
9210 Error(S, "expected a 16-bit value");
9211 return MatchOperand_ParseFail;
9212 }
9213
9214 Operands.push_back(
9215 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9216 return MatchOperand_Success;
9217 }
9218
isEndpgm() const9219 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9220
9221 //===----------------------------------------------------------------------===//
9222 // LDSDIR
9223 //===----------------------------------------------------------------------===//
9224
defaultWaitVDST() const9225 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const {
9226 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST);
9227 }
9228
isWaitVDST() const9229 bool AMDGPUOperand::isWaitVDST() const {
9230 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
9231 }
9232
9233 //===----------------------------------------------------------------------===//
9234 // VINTERP
9235 //===----------------------------------------------------------------------===//
9236
defaultWaitEXP() const9237 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const {
9238 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP);
9239 }
9240
isWaitEXP() const9241 bool AMDGPUOperand::isWaitEXP() const {
9242 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
9243 }
9244