1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCInstrDesc.h" 29 #include "llvm/MC/MCParser/MCAsmLexer.h" 30 #include "llvm/MC/MCParser/MCAsmParser.h" 31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 32 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 33 #include "llvm/MC/MCSymbol.h" 34 #include "llvm/MC/TargetRegistry.h" 35 #include "llvm/Support/AMDGPUMetadata.h" 36 #include "llvm/Support/AMDHSAKernelDescriptor.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/MachineValueType.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/Support/TargetParser.h" 41 42 using namespace llvm; 43 using namespace llvm::AMDGPU; 44 using namespace llvm::amdhsa; 45 46 namespace { 47 48 class AMDGPUAsmParser; 49 50 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 51 52 //===----------------------------------------------------------------------===// 53 // Operand 54 //===----------------------------------------------------------------------===// 55 56 class AMDGPUOperand : public MCParsedAsmOperand { 57 enum KindTy { 58 Token, 59 Immediate, 60 Register, 61 Expression 62 } Kind; 63 64 SMLoc StartLoc, EndLoc; 65 const AMDGPUAsmParser *AsmParser; 66 67 public: 68 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 69 : Kind(Kind_), AsmParser(AsmParser_) {} 70 71 using Ptr = std::unique_ptr<AMDGPUOperand>; 72 73 struct Modifiers { 74 bool Abs = false; 75 bool Neg = false; 76 bool Sext = false; 77 78 bool hasFPModifiers() const { return Abs || Neg; } 79 bool hasIntModifiers() const { return Sext; } 80 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 81 82 int64_t getFPModifiersOperand() const { 83 int64_t Operand = 0; 84 Operand |= Abs ? SISrcMods::ABS : 0u; 85 Operand |= Neg ? SISrcMods::NEG : 0u; 86 return Operand; 87 } 88 89 int64_t getIntModifiersOperand() const { 90 int64_t Operand = 0; 91 Operand |= Sext ? SISrcMods::SEXT : 0u; 92 return Operand; 93 } 94 95 int64_t getModifiersOperand() const { 96 assert(!(hasFPModifiers() && hasIntModifiers()) 97 && "fp and int modifiers should not be used simultaneously"); 98 if (hasFPModifiers()) { 99 return getFPModifiersOperand(); 100 } else if (hasIntModifiers()) { 101 return getIntModifiersOperand(); 102 } else { 103 return 0; 104 } 105 } 106 107 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 108 }; 109 110 enum ImmTy { 111 ImmTyNone, 112 ImmTyGDS, 113 ImmTyLDS, 114 ImmTyOffen, 115 ImmTyIdxen, 116 ImmTyAddr64, 117 ImmTyOffset, 118 ImmTyInstOffset, 119 ImmTyOffset0, 120 ImmTyOffset1, 121 ImmTyCPol, 122 ImmTySWZ, 123 ImmTyTFE, 124 ImmTyD16, 125 ImmTyClampSI, 126 ImmTyOModSI, 127 ImmTySdwaDstSel, 128 ImmTySdwaSrc0Sel, 129 ImmTySdwaSrc1Sel, 130 ImmTySdwaDstUnused, 131 ImmTyDMask, 132 ImmTyDim, 133 ImmTyUNorm, 134 ImmTyDA, 135 ImmTyR128A16, 136 ImmTyA16, 137 ImmTyLWE, 138 ImmTyExpTgt, 139 ImmTyExpCompr, 140 ImmTyExpVM, 141 ImmTyFORMAT, 142 ImmTyHwreg, 143 ImmTyOff, 144 ImmTySendMsg, 145 ImmTyInterpSlot, 146 ImmTyInterpAttr, 147 ImmTyAttrChan, 148 ImmTyOpSel, 149 ImmTyOpSelHi, 150 ImmTyNegLo, 151 ImmTyNegHi, 152 ImmTyDPP8, 153 ImmTyDppCtrl, 154 ImmTyDppRowMask, 155 ImmTyDppBankMask, 156 ImmTyDppBoundCtrl, 157 ImmTyDppFi, 158 ImmTySwizzle, 159 ImmTyGprIdxMode, 160 ImmTyHigh, 161 ImmTyBLGP, 162 ImmTyCBSZ, 163 ImmTyABID, 164 ImmTyEndpgm, 165 ImmTyWaitVDST, 166 ImmTyWaitEXP, 167 }; 168 169 enum ImmKindTy { 170 ImmKindTyNone, 171 ImmKindTyLiteral, 172 ImmKindTyConst, 173 }; 174 175 private: 176 struct TokOp { 177 const char *Data; 178 unsigned Length; 179 }; 180 181 struct ImmOp { 182 int64_t Val; 183 ImmTy Type; 184 bool IsFPImm; 185 mutable ImmKindTy Kind; 186 Modifiers Mods; 187 }; 188 189 struct RegOp { 190 unsigned RegNo; 191 Modifiers Mods; 192 }; 193 194 union { 195 TokOp Tok; 196 ImmOp Imm; 197 RegOp Reg; 198 const MCExpr *Expr; 199 }; 200 201 public: 202 bool isToken() const override { 203 if (Kind == Token) 204 return true; 205 206 // When parsing operands, we can't always tell if something was meant to be 207 // a token, like 'gds', or an expression that references a global variable. 208 // In this case, we assume the string is an expression, and if we need to 209 // interpret is a token, then we treat the symbol name as the token. 210 return isSymbolRefExpr(); 211 } 212 213 bool isSymbolRefExpr() const { 214 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 215 } 216 217 bool isImm() const override { 218 return Kind == Immediate; 219 } 220 221 void setImmKindNone() const { 222 assert(isImm()); 223 Imm.Kind = ImmKindTyNone; 224 } 225 226 void setImmKindLiteral() const { 227 assert(isImm()); 228 Imm.Kind = ImmKindTyLiteral; 229 } 230 231 void setImmKindConst() const { 232 assert(isImm()); 233 Imm.Kind = ImmKindTyConst; 234 } 235 236 bool IsImmKindLiteral() const { 237 return isImm() && Imm.Kind == ImmKindTyLiteral; 238 } 239 240 bool isImmKindConst() const { 241 return isImm() && Imm.Kind == ImmKindTyConst; 242 } 243 244 bool isInlinableImm(MVT type) const; 245 bool isLiteralImm(MVT type) const; 246 247 bool isRegKind() const { 248 return Kind == Register; 249 } 250 251 bool isReg() const override { 252 return isRegKind() && !hasModifiers(); 253 } 254 255 bool isRegOrInline(unsigned RCID, MVT type) const { 256 return isRegClass(RCID) || isInlinableImm(type); 257 } 258 259 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 260 return isRegOrInline(RCID, type) || isLiteralImm(type); 261 } 262 263 bool isRegOrImmWithInt16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 265 } 266 267 bool isRegOrImmWithInt32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 269 } 270 271 bool isRegOrInlineImmWithInt16InputMods() const { 272 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); 273 } 274 275 bool isRegOrInlineImmWithInt32InputMods() const { 276 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); 277 } 278 279 bool isRegOrImmWithInt64InputMods() const { 280 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 281 } 282 283 bool isRegOrImmWithFP16InputMods() const { 284 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 285 } 286 287 bool isRegOrImmWithFP32InputMods() const { 288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 289 } 290 291 bool isRegOrImmWithFP64InputMods() const { 292 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 293 } 294 295 bool isRegOrInlineImmWithFP16InputMods() const { 296 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16); 297 } 298 299 bool isRegOrInlineImmWithFP32InputMods() const { 300 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); 301 } 302 303 304 bool isVReg() const { 305 return isRegClass(AMDGPU::VGPR_32RegClassID) || 306 isRegClass(AMDGPU::VReg_64RegClassID) || 307 isRegClass(AMDGPU::VReg_96RegClassID) || 308 isRegClass(AMDGPU::VReg_128RegClassID) || 309 isRegClass(AMDGPU::VReg_160RegClassID) || 310 isRegClass(AMDGPU::VReg_192RegClassID) || 311 isRegClass(AMDGPU::VReg_256RegClassID) || 312 isRegClass(AMDGPU::VReg_512RegClassID) || 313 isRegClass(AMDGPU::VReg_1024RegClassID); 314 } 315 316 bool isVReg32() const { 317 return isRegClass(AMDGPU::VGPR_32RegClassID); 318 } 319 320 bool isVReg32OrOff() const { 321 return isOff() || isVReg32(); 322 } 323 324 bool isNull() const { 325 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 326 } 327 328 bool isVRegWithInputMods() const; 329 330 bool isSDWAOperand(MVT type) const; 331 bool isSDWAFP16Operand() const; 332 bool isSDWAFP32Operand() const; 333 bool isSDWAInt16Operand() const; 334 bool isSDWAInt32Operand() const; 335 336 bool isImmTy(ImmTy ImmT) const { 337 return isImm() && Imm.Type == ImmT; 338 } 339 340 bool isImmModifier() const { 341 return isImm() && Imm.Type != ImmTyNone; 342 } 343 344 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 345 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 346 bool isDMask() const { return isImmTy(ImmTyDMask); } 347 bool isDim() const { return isImmTy(ImmTyDim); } 348 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 349 bool isDA() const { return isImmTy(ImmTyDA); } 350 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 351 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 352 bool isLWE() const { return isImmTy(ImmTyLWE); } 353 bool isOff() const { return isImmTy(ImmTyOff); } 354 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 355 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 356 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 357 bool isOffen() const { return isImmTy(ImmTyOffen); } 358 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 359 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 360 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 361 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 362 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 363 364 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 365 bool isGDS() const { return isImmTy(ImmTyGDS); } 366 bool isLDS() const { return isImmTy(ImmTyLDS); } 367 bool isCPol() const { return isImmTy(ImmTyCPol); } 368 bool isSWZ() const { return isImmTy(ImmTySWZ); } 369 bool isTFE() const { return isImmTy(ImmTyTFE); } 370 bool isD16() const { return isImmTy(ImmTyD16); } 371 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 372 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 373 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 374 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 375 bool isFI() const { return isImmTy(ImmTyDppFi); } 376 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 377 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 378 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 379 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 380 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 381 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 382 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 383 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 384 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 385 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 386 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 387 bool isHigh() const { return isImmTy(ImmTyHigh); } 388 389 bool isMod() const { 390 return isClampSI() || isOModSI(); 391 } 392 393 bool isRegOrImm() const { 394 return isReg() || isImm(); 395 } 396 397 bool isRegClass(unsigned RCID) const; 398 399 bool isInlineValue() const; 400 401 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 402 return isRegOrInline(RCID, type) && !hasModifiers(); 403 } 404 405 bool isSCSrcB16() const { 406 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 407 } 408 409 bool isSCSrcV2B16() const { 410 return isSCSrcB16(); 411 } 412 413 bool isSCSrcB32() const { 414 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 415 } 416 417 bool isSCSrcB64() const { 418 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 419 } 420 421 bool isBoolReg() const; 422 423 bool isSCSrcF16() const { 424 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 425 } 426 427 bool isSCSrcV2F16() const { 428 return isSCSrcF16(); 429 } 430 431 bool isSCSrcF32() const { 432 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 433 } 434 435 bool isSCSrcF64() const { 436 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 437 } 438 439 bool isSSrcB32() const { 440 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 441 } 442 443 bool isSSrcB16() const { 444 return isSCSrcB16() || isLiteralImm(MVT::i16); 445 } 446 447 bool isSSrcV2B16() const { 448 llvm_unreachable("cannot happen"); 449 return isSSrcB16(); 450 } 451 452 bool isSSrcB64() const { 453 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 454 // See isVSrc64(). 455 return isSCSrcB64() || isLiteralImm(MVT::i64); 456 } 457 458 bool isSSrcF32() const { 459 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 460 } 461 462 bool isSSrcF64() const { 463 return isSCSrcB64() || isLiteralImm(MVT::f64); 464 } 465 466 bool isSSrcF16() const { 467 return isSCSrcB16() || isLiteralImm(MVT::f16); 468 } 469 470 bool isSSrcV2F16() const { 471 llvm_unreachable("cannot happen"); 472 return isSSrcF16(); 473 } 474 475 bool isSSrcV2FP32() const { 476 llvm_unreachable("cannot happen"); 477 return isSSrcF32(); 478 } 479 480 bool isSCSrcV2FP32() const { 481 llvm_unreachable("cannot happen"); 482 return isSCSrcF32(); 483 } 484 485 bool isSSrcV2INT32() const { 486 llvm_unreachable("cannot happen"); 487 return isSSrcB32(); 488 } 489 490 bool isSCSrcV2INT32() const { 491 llvm_unreachable("cannot happen"); 492 return isSCSrcB32(); 493 } 494 495 bool isSSrcOrLdsB32() const { 496 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 497 isLiteralImm(MVT::i32) || isExpr(); 498 } 499 500 bool isVCSrcB32() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 502 } 503 504 bool isVCSrcB64() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 506 } 507 508 bool isVCSrcB16() const { 509 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 510 } 511 512 bool isVCSrcV2B16() const { 513 return isVCSrcB16(); 514 } 515 516 bool isVCSrcF32() const { 517 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 518 } 519 520 bool isVCSrcF64() const { 521 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 522 } 523 524 bool isVCSrcF16() const { 525 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 526 } 527 528 bool isVCSrcV2F16() const { 529 return isVCSrcF16(); 530 } 531 532 bool isVSrcB32() const { 533 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 534 } 535 536 bool isVSrcB64() const { 537 return isVCSrcF64() || isLiteralImm(MVT::i64); 538 } 539 540 bool isVSrcB16() const { 541 return isVCSrcB16() || isLiteralImm(MVT::i16); 542 } 543 544 bool isVSrcV2B16() const { 545 return isVSrcB16() || isLiteralImm(MVT::v2i16); 546 } 547 548 bool isVCSrcV2FP32() const { 549 return isVCSrcF64(); 550 } 551 552 bool isVSrcV2FP32() const { 553 return isVSrcF64() || isLiteralImm(MVT::v2f32); 554 } 555 556 bool isVCSrcV2INT32() const { 557 return isVCSrcB64(); 558 } 559 560 bool isVSrcV2INT32() const { 561 return isVSrcB64() || isLiteralImm(MVT::v2i32); 562 } 563 564 bool isVSrcF32() const { 565 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 566 } 567 568 bool isVSrcF64() const { 569 return isVCSrcF64() || isLiteralImm(MVT::f64); 570 } 571 572 bool isVSrcF16() const { 573 return isVCSrcF16() || isLiteralImm(MVT::f16); 574 } 575 576 bool isVSrcV2F16() const { 577 return isVSrcF16() || isLiteralImm(MVT::v2f16); 578 } 579 580 bool isVISrcB32() const { 581 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 582 } 583 584 bool isVISrcB16() const { 585 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 586 } 587 588 bool isVISrcV2B16() const { 589 return isVISrcB16(); 590 } 591 592 bool isVISrcF32() const { 593 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 594 } 595 596 bool isVISrcF16() const { 597 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 598 } 599 600 bool isVISrcV2F16() const { 601 return isVISrcF16() || isVISrcB32(); 602 } 603 604 bool isVISrc_64B64() const { 605 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 606 } 607 608 bool isVISrc_64F64() const { 609 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 610 } 611 612 bool isVISrc_64V2FP32() const { 613 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 614 } 615 616 bool isVISrc_64V2INT32() const { 617 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 618 } 619 620 bool isVISrc_256B64() const { 621 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 622 } 623 624 bool isVISrc_256F64() const { 625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 626 } 627 628 bool isVISrc_128B16() const { 629 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 630 } 631 632 bool isVISrc_128V2B16() const { 633 return isVISrc_128B16(); 634 } 635 636 bool isVISrc_128B32() const { 637 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 638 } 639 640 bool isVISrc_128F32() const { 641 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 642 } 643 644 bool isVISrc_256V2FP32() const { 645 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 646 } 647 648 bool isVISrc_256V2INT32() const { 649 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 650 } 651 652 bool isVISrc_512B32() const { 653 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 654 } 655 656 bool isVISrc_512B16() const { 657 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 658 } 659 660 bool isVISrc_512V2B16() const { 661 return isVISrc_512B16(); 662 } 663 664 bool isVISrc_512F32() const { 665 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 666 } 667 668 bool isVISrc_512F16() const { 669 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 670 } 671 672 bool isVISrc_512V2F16() const { 673 return isVISrc_512F16() || isVISrc_512B32(); 674 } 675 676 bool isVISrc_1024B32() const { 677 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 678 } 679 680 bool isVISrc_1024B16() const { 681 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 682 } 683 684 bool isVISrc_1024V2B16() const { 685 return isVISrc_1024B16(); 686 } 687 688 bool isVISrc_1024F32() const { 689 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 690 } 691 692 bool isVISrc_1024F16() const { 693 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 694 } 695 696 bool isVISrc_1024V2F16() const { 697 return isVISrc_1024F16() || isVISrc_1024B32(); 698 } 699 700 bool isAISrcB32() const { 701 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 702 } 703 704 bool isAISrcB16() const { 705 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 706 } 707 708 bool isAISrcV2B16() const { 709 return isAISrcB16(); 710 } 711 712 bool isAISrcF32() const { 713 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 714 } 715 716 bool isAISrcF16() const { 717 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 718 } 719 720 bool isAISrcV2F16() const { 721 return isAISrcF16() || isAISrcB32(); 722 } 723 724 bool isAISrc_64B64() const { 725 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 726 } 727 728 bool isAISrc_64F64() const { 729 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 730 } 731 732 bool isAISrc_128B32() const { 733 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 734 } 735 736 bool isAISrc_128B16() const { 737 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 738 } 739 740 bool isAISrc_128V2B16() const { 741 return isAISrc_128B16(); 742 } 743 744 bool isAISrc_128F32() const { 745 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 746 } 747 748 bool isAISrc_128F16() const { 749 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 750 } 751 752 bool isAISrc_128V2F16() const { 753 return isAISrc_128F16() || isAISrc_128B32(); 754 } 755 756 bool isVISrc_128F16() const { 757 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 758 } 759 760 bool isVISrc_128V2F16() const { 761 return isVISrc_128F16() || isVISrc_128B32(); 762 } 763 764 bool isAISrc_256B64() const { 765 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 766 } 767 768 bool isAISrc_256F64() const { 769 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 770 } 771 772 bool isAISrc_512B32() const { 773 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 774 } 775 776 bool isAISrc_512B16() const { 777 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 778 } 779 780 bool isAISrc_512V2B16() const { 781 return isAISrc_512B16(); 782 } 783 784 bool isAISrc_512F32() const { 785 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 786 } 787 788 bool isAISrc_512F16() const { 789 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 790 } 791 792 bool isAISrc_512V2F16() const { 793 return isAISrc_512F16() || isAISrc_512B32(); 794 } 795 796 bool isAISrc_1024B32() const { 797 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 798 } 799 800 bool isAISrc_1024B16() const { 801 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 802 } 803 804 bool isAISrc_1024V2B16() const { 805 return isAISrc_1024B16(); 806 } 807 808 bool isAISrc_1024F32() const { 809 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 810 } 811 812 bool isAISrc_1024F16() const { 813 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 814 } 815 816 bool isAISrc_1024V2F16() const { 817 return isAISrc_1024F16() || isAISrc_1024B32(); 818 } 819 820 bool isKImmFP32() const { 821 return isLiteralImm(MVT::f32); 822 } 823 824 bool isKImmFP16() const { 825 return isLiteralImm(MVT::f16); 826 } 827 828 bool isMem() const override { 829 return false; 830 } 831 832 bool isExpr() const { 833 return Kind == Expression; 834 } 835 836 bool isSoppBrTarget() const { 837 return isExpr() || isImm(); 838 } 839 840 bool isSWaitCnt() const; 841 bool isDepCtr() const; 842 bool isSDelayAlu() const; 843 bool isHwreg() const; 844 bool isSendMsg() const; 845 bool isSwizzle() const; 846 bool isSMRDOffset8() const; 847 bool isSMEMOffset() const; 848 bool isSMRDLiteralOffset() const; 849 bool isDPP8() const; 850 bool isDPPCtrl() const; 851 bool isBLGP() const; 852 bool isCBSZ() const; 853 bool isABID() const; 854 bool isGPRIdxMode() const; 855 bool isS16Imm() const; 856 bool isU16Imm() const; 857 bool isEndpgm() const; 858 bool isWaitVDST() const; 859 bool isWaitEXP() const; 860 861 StringRef getExpressionAsToken() const { 862 assert(isExpr()); 863 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 864 return S->getSymbol().getName(); 865 } 866 867 StringRef getToken() const { 868 assert(isToken()); 869 870 if (Kind == Expression) 871 return getExpressionAsToken(); 872 873 return StringRef(Tok.Data, Tok.Length); 874 } 875 876 int64_t getImm() const { 877 assert(isImm()); 878 return Imm.Val; 879 } 880 881 void setImm(int64_t Val) { 882 assert(isImm()); 883 Imm.Val = Val; 884 } 885 886 ImmTy getImmTy() const { 887 assert(isImm()); 888 return Imm.Type; 889 } 890 891 unsigned getReg() const override { 892 assert(isRegKind()); 893 return Reg.RegNo; 894 } 895 896 SMLoc getStartLoc() const override { 897 return StartLoc; 898 } 899 900 SMLoc getEndLoc() const override { 901 return EndLoc; 902 } 903 904 SMRange getLocRange() const { 905 return SMRange(StartLoc, EndLoc); 906 } 907 908 Modifiers getModifiers() const { 909 assert(isRegKind() || isImmTy(ImmTyNone)); 910 return isRegKind() ? Reg.Mods : Imm.Mods; 911 } 912 913 void setModifiers(Modifiers Mods) { 914 assert(isRegKind() || isImmTy(ImmTyNone)); 915 if (isRegKind()) 916 Reg.Mods = Mods; 917 else 918 Imm.Mods = Mods; 919 } 920 921 bool hasModifiers() const { 922 return getModifiers().hasModifiers(); 923 } 924 925 bool hasFPModifiers() const { 926 return getModifiers().hasFPModifiers(); 927 } 928 929 bool hasIntModifiers() const { 930 return getModifiers().hasIntModifiers(); 931 } 932 933 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 934 935 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 936 937 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 938 939 template <unsigned Bitwidth> 940 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 941 942 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 943 addKImmFPOperands<16>(Inst, N); 944 } 945 946 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 947 addKImmFPOperands<32>(Inst, N); 948 } 949 950 void addRegOperands(MCInst &Inst, unsigned N) const; 951 952 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 953 addRegOperands(Inst, N); 954 } 955 956 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 957 if (isRegKind()) 958 addRegOperands(Inst, N); 959 else if (isExpr()) 960 Inst.addOperand(MCOperand::createExpr(Expr)); 961 else 962 addImmOperands(Inst, N); 963 } 964 965 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 966 Modifiers Mods = getModifiers(); 967 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 968 if (isRegKind()) { 969 addRegOperands(Inst, N); 970 } else { 971 addImmOperands(Inst, N, false); 972 } 973 } 974 975 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 976 assert(!hasIntModifiers()); 977 addRegOrImmWithInputModsOperands(Inst, N); 978 } 979 980 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 981 assert(!hasFPModifiers()); 982 addRegOrImmWithInputModsOperands(Inst, N); 983 } 984 985 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 986 Modifiers Mods = getModifiers(); 987 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 988 assert(isRegKind()); 989 addRegOperands(Inst, N); 990 } 991 992 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 993 assert(!hasIntModifiers()); 994 addRegWithInputModsOperands(Inst, N); 995 } 996 997 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 998 assert(!hasFPModifiers()); 999 addRegWithInputModsOperands(Inst, N); 1000 } 1001 1002 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 1003 if (isImm()) 1004 addImmOperands(Inst, N); 1005 else { 1006 assert(isExpr()); 1007 Inst.addOperand(MCOperand::createExpr(Expr)); 1008 } 1009 } 1010 1011 static void printImmTy(raw_ostream& OS, ImmTy Type) { 1012 switch (Type) { 1013 case ImmTyNone: OS << "None"; break; 1014 case ImmTyGDS: OS << "GDS"; break; 1015 case ImmTyLDS: OS << "LDS"; break; 1016 case ImmTyOffen: OS << "Offen"; break; 1017 case ImmTyIdxen: OS << "Idxen"; break; 1018 case ImmTyAddr64: OS << "Addr64"; break; 1019 case ImmTyOffset: OS << "Offset"; break; 1020 case ImmTyInstOffset: OS << "InstOffset"; break; 1021 case ImmTyOffset0: OS << "Offset0"; break; 1022 case ImmTyOffset1: OS << "Offset1"; break; 1023 case ImmTyCPol: OS << "CPol"; break; 1024 case ImmTySWZ: OS << "SWZ"; break; 1025 case ImmTyTFE: OS << "TFE"; break; 1026 case ImmTyD16: OS << "D16"; break; 1027 case ImmTyFORMAT: OS << "FORMAT"; break; 1028 case ImmTyClampSI: OS << "ClampSI"; break; 1029 case ImmTyOModSI: OS << "OModSI"; break; 1030 case ImmTyDPP8: OS << "DPP8"; break; 1031 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1032 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1033 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1034 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1035 case ImmTyDppFi: OS << "FI"; break; 1036 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1037 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1038 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1039 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1040 case ImmTyDMask: OS << "DMask"; break; 1041 case ImmTyDim: OS << "Dim"; break; 1042 case ImmTyUNorm: OS << "UNorm"; break; 1043 case ImmTyDA: OS << "DA"; break; 1044 case ImmTyR128A16: OS << "R128A16"; break; 1045 case ImmTyA16: OS << "A16"; break; 1046 case ImmTyLWE: OS << "LWE"; break; 1047 case ImmTyOff: OS << "Off"; break; 1048 case ImmTyExpTgt: OS << "ExpTgt"; break; 1049 case ImmTyExpCompr: OS << "ExpCompr"; break; 1050 case ImmTyExpVM: OS << "ExpVM"; break; 1051 case ImmTyHwreg: OS << "Hwreg"; break; 1052 case ImmTySendMsg: OS << "SendMsg"; break; 1053 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1054 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1055 case ImmTyAttrChan: OS << "AttrChan"; break; 1056 case ImmTyOpSel: OS << "OpSel"; break; 1057 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1058 case ImmTyNegLo: OS << "NegLo"; break; 1059 case ImmTyNegHi: OS << "NegHi"; break; 1060 case ImmTySwizzle: OS << "Swizzle"; break; 1061 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1062 case ImmTyHigh: OS << "High"; break; 1063 case ImmTyBLGP: OS << "BLGP"; break; 1064 case ImmTyCBSZ: OS << "CBSZ"; break; 1065 case ImmTyABID: OS << "ABID"; break; 1066 case ImmTyEndpgm: OS << "Endpgm"; break; 1067 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1068 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1069 } 1070 } 1071 1072 void print(raw_ostream &OS) const override { 1073 switch (Kind) { 1074 case Register: 1075 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1076 break; 1077 case Immediate: 1078 OS << '<' << getImm(); 1079 if (getImmTy() != ImmTyNone) { 1080 OS << " type: "; printImmTy(OS, getImmTy()); 1081 } 1082 OS << " mods: " << Imm.Mods << '>'; 1083 break; 1084 case Token: 1085 OS << '\'' << getToken() << '\''; 1086 break; 1087 case Expression: 1088 OS << "<expr " << *Expr << '>'; 1089 break; 1090 } 1091 } 1092 1093 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1094 int64_t Val, SMLoc Loc, 1095 ImmTy Type = ImmTyNone, 1096 bool IsFPImm = false) { 1097 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1098 Op->Imm.Val = Val; 1099 Op->Imm.IsFPImm = IsFPImm; 1100 Op->Imm.Kind = ImmKindTyNone; 1101 Op->Imm.Type = Type; 1102 Op->Imm.Mods = Modifiers(); 1103 Op->StartLoc = Loc; 1104 Op->EndLoc = Loc; 1105 return Op; 1106 } 1107 1108 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1109 StringRef Str, SMLoc Loc, 1110 bool HasExplicitEncodingSize = true) { 1111 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1112 Res->Tok.Data = Str.data(); 1113 Res->Tok.Length = Str.size(); 1114 Res->StartLoc = Loc; 1115 Res->EndLoc = Loc; 1116 return Res; 1117 } 1118 1119 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1120 unsigned RegNo, SMLoc S, 1121 SMLoc E) { 1122 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1123 Op->Reg.RegNo = RegNo; 1124 Op->Reg.Mods = Modifiers(); 1125 Op->StartLoc = S; 1126 Op->EndLoc = E; 1127 return Op; 1128 } 1129 1130 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1131 const class MCExpr *Expr, SMLoc S) { 1132 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1133 Op->Expr = Expr; 1134 Op->StartLoc = S; 1135 Op->EndLoc = S; 1136 return Op; 1137 } 1138 }; 1139 1140 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1141 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1142 return OS; 1143 } 1144 1145 //===----------------------------------------------------------------------===// 1146 // AsmParser 1147 //===----------------------------------------------------------------------===// 1148 1149 // Holds info related to the current kernel, e.g. count of SGPRs used. 1150 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1151 // .amdgpu_hsa_kernel or at EOF. 1152 class KernelScopeInfo { 1153 int SgprIndexUnusedMin = -1; 1154 int VgprIndexUnusedMin = -1; 1155 int AgprIndexUnusedMin = -1; 1156 MCContext *Ctx = nullptr; 1157 MCSubtargetInfo const *MSTI = nullptr; 1158 1159 void usesSgprAt(int i) { 1160 if (i >= SgprIndexUnusedMin) { 1161 SgprIndexUnusedMin = ++i; 1162 if (Ctx) { 1163 MCSymbol* const Sym = 1164 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1165 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1166 } 1167 } 1168 } 1169 1170 void usesVgprAt(int i) { 1171 if (i >= VgprIndexUnusedMin) { 1172 VgprIndexUnusedMin = ++i; 1173 if (Ctx) { 1174 MCSymbol* const Sym = 1175 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1176 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1177 VgprIndexUnusedMin); 1178 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1179 } 1180 } 1181 } 1182 1183 void usesAgprAt(int i) { 1184 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1185 if (!hasMAIInsts(*MSTI)) 1186 return; 1187 1188 if (i >= AgprIndexUnusedMin) { 1189 AgprIndexUnusedMin = ++i; 1190 if (Ctx) { 1191 MCSymbol* const Sym = 1192 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1193 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1194 1195 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1196 MCSymbol* const vSym = 1197 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1198 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1199 VgprIndexUnusedMin); 1200 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1201 } 1202 } 1203 } 1204 1205 public: 1206 KernelScopeInfo() = default; 1207 1208 void initialize(MCContext &Context) { 1209 Ctx = &Context; 1210 MSTI = Ctx->getSubtargetInfo(); 1211 1212 usesSgprAt(SgprIndexUnusedMin = -1); 1213 usesVgprAt(VgprIndexUnusedMin = -1); 1214 if (hasMAIInsts(*MSTI)) { 1215 usesAgprAt(AgprIndexUnusedMin = -1); 1216 } 1217 } 1218 1219 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1220 unsigned RegWidth) { 1221 switch (RegKind) { 1222 case IS_SGPR: 1223 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1224 break; 1225 case IS_AGPR: 1226 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1227 break; 1228 case IS_VGPR: 1229 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1230 break; 1231 default: 1232 break; 1233 } 1234 } 1235 }; 1236 1237 class AMDGPUAsmParser : public MCTargetAsmParser { 1238 MCAsmParser &Parser; 1239 1240 // Number of extra operands parsed after the first optional operand. 1241 // This may be necessary to skip hardcoded mandatory operands. 1242 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1243 1244 unsigned ForcedEncodingSize = 0; 1245 bool ForcedDPP = false; 1246 bool ForcedSDWA = false; 1247 KernelScopeInfo KernelScope; 1248 unsigned CPolSeen; 1249 1250 /// @name Auto-generated Match Functions 1251 /// { 1252 1253 #define GET_ASSEMBLER_HEADER 1254 #include "AMDGPUGenAsmMatcher.inc" 1255 1256 /// } 1257 1258 private: 1259 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1260 bool OutOfRangeError(SMRange Range); 1261 /// Calculate VGPR/SGPR blocks required for given target, reserved 1262 /// registers, and user-specified NextFreeXGPR values. 1263 /// 1264 /// \param Features [in] Target features, used for bug corrections. 1265 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1266 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1267 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1268 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1269 /// descriptor field, if valid. 1270 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1271 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1272 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1273 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1274 /// \param VGPRBlocks [out] Result VGPR block count. 1275 /// \param SGPRBlocks [out] Result SGPR block count. 1276 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1277 bool FlatScrUsed, bool XNACKUsed, 1278 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1279 SMRange VGPRRange, unsigned NextFreeSGPR, 1280 SMRange SGPRRange, unsigned &VGPRBlocks, 1281 unsigned &SGPRBlocks); 1282 bool ParseDirectiveAMDGCNTarget(); 1283 bool ParseDirectiveAMDHSAKernel(); 1284 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1285 bool ParseDirectiveHSACodeObjectVersion(); 1286 bool ParseDirectiveHSACodeObjectISA(); 1287 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1288 bool ParseDirectiveAMDKernelCodeT(); 1289 // TODO: Possibly make subtargetHasRegister const. 1290 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1291 bool ParseDirectiveAMDGPUHsaKernel(); 1292 1293 bool ParseDirectiveISAVersion(); 1294 bool ParseDirectiveHSAMetadata(); 1295 bool ParseDirectivePALMetadataBegin(); 1296 bool ParseDirectivePALMetadata(); 1297 bool ParseDirectiveAMDGPULDS(); 1298 1299 /// Common code to parse out a block of text (typically YAML) between start and 1300 /// end directives. 1301 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1302 const char *AssemblerDirectiveEnd, 1303 std::string &CollectString); 1304 1305 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1306 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1307 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1308 unsigned &RegNum, unsigned &RegWidth, 1309 bool RestoreOnFailure = false); 1310 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1311 unsigned &RegNum, unsigned &RegWidth, 1312 SmallVectorImpl<AsmToken> &Tokens); 1313 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1314 unsigned &RegWidth, 1315 SmallVectorImpl<AsmToken> &Tokens); 1316 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1317 unsigned &RegWidth, 1318 SmallVectorImpl<AsmToken> &Tokens); 1319 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1320 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1321 bool ParseRegRange(unsigned& Num, unsigned& Width); 1322 unsigned getRegularReg(RegisterKind RegKind, 1323 unsigned RegNum, 1324 unsigned RegWidth, 1325 SMLoc Loc); 1326 1327 bool isRegister(); 1328 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1329 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1330 void initializeGprCountSymbol(RegisterKind RegKind); 1331 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1332 unsigned RegWidth); 1333 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1334 bool IsAtomic, bool IsLds = false); 1335 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1336 bool IsGdsHardcoded); 1337 1338 public: 1339 enum AMDGPUMatchResultTy { 1340 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1341 }; 1342 enum OperandMode { 1343 OperandMode_Default, 1344 OperandMode_NSA, 1345 }; 1346 1347 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1348 1349 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1350 const MCInstrInfo &MII, 1351 const MCTargetOptions &Options) 1352 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1353 MCAsmParserExtension::Initialize(Parser); 1354 1355 if (getFeatureBits().none()) { 1356 // Set default features. 1357 copySTI().ToggleFeature("southern-islands"); 1358 } 1359 1360 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1361 1362 { 1363 // TODO: make those pre-defined variables read-only. 1364 // Currently there is none suitable machinery in the core llvm-mc for this. 1365 // MCSymbol::isRedefinable is intended for another purpose, and 1366 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1367 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1368 MCContext &Ctx = getContext(); 1369 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1370 MCSymbol *Sym = 1371 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1372 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1373 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1374 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1375 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1376 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1377 } else { 1378 MCSymbol *Sym = 1379 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1380 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1381 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1382 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1383 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1384 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1385 } 1386 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1387 initializeGprCountSymbol(IS_VGPR); 1388 initializeGprCountSymbol(IS_SGPR); 1389 } else 1390 KernelScope.initialize(getContext()); 1391 } 1392 } 1393 1394 bool hasMIMG_R128() const { 1395 return AMDGPU::hasMIMG_R128(getSTI()); 1396 } 1397 1398 bool hasPackedD16() const { 1399 return AMDGPU::hasPackedD16(getSTI()); 1400 } 1401 1402 bool hasGFX10A16() const { 1403 return AMDGPU::hasGFX10A16(getSTI()); 1404 } 1405 1406 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1407 1408 bool isSI() const { 1409 return AMDGPU::isSI(getSTI()); 1410 } 1411 1412 bool isCI() const { 1413 return AMDGPU::isCI(getSTI()); 1414 } 1415 1416 bool isVI() const { 1417 return AMDGPU::isVI(getSTI()); 1418 } 1419 1420 bool isGFX9() const { 1421 return AMDGPU::isGFX9(getSTI()); 1422 } 1423 1424 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1425 bool isGFX90A() const { 1426 return AMDGPU::isGFX90A(getSTI()); 1427 } 1428 1429 bool isGFX940() const { 1430 return AMDGPU::isGFX940(getSTI()); 1431 } 1432 1433 bool isGFX9Plus() const { 1434 return AMDGPU::isGFX9Plus(getSTI()); 1435 } 1436 1437 bool isGFX10() const { 1438 return AMDGPU::isGFX10(getSTI()); 1439 } 1440 1441 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1442 1443 bool isGFX11() const { 1444 return AMDGPU::isGFX11(getSTI()); 1445 } 1446 1447 bool isGFX11Plus() const { 1448 return AMDGPU::isGFX11Plus(getSTI()); 1449 } 1450 1451 bool isGFX10_BEncoding() const { 1452 return AMDGPU::isGFX10_BEncoding(getSTI()); 1453 } 1454 1455 bool hasInv2PiInlineImm() const { 1456 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1457 } 1458 1459 bool hasFlatOffsets() const { 1460 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1461 } 1462 1463 bool hasArchitectedFlatScratch() const { 1464 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1465 } 1466 1467 bool hasSGPR102_SGPR103() const { 1468 return !isVI() && !isGFX9(); 1469 } 1470 1471 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1472 1473 bool hasIntClamp() const { 1474 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1475 } 1476 1477 AMDGPUTargetStreamer &getTargetStreamer() { 1478 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1479 return static_cast<AMDGPUTargetStreamer &>(TS); 1480 } 1481 1482 const MCRegisterInfo *getMRI() const { 1483 // We need this const_cast because for some reason getContext() is not const 1484 // in MCAsmParser. 1485 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1486 } 1487 1488 const MCInstrInfo *getMII() const { 1489 return &MII; 1490 } 1491 1492 const FeatureBitset &getFeatureBits() const { 1493 return getSTI().getFeatureBits(); 1494 } 1495 1496 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1497 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1498 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1499 1500 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1501 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1502 bool isForcedDPP() const { return ForcedDPP; } 1503 bool isForcedSDWA() const { return ForcedSDWA; } 1504 ArrayRef<unsigned> getMatchedVariants() const; 1505 StringRef getMatchedVariantName() const; 1506 1507 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1508 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1509 bool RestoreOnFailure); 1510 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1511 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1512 SMLoc &EndLoc) override; 1513 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1514 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1515 unsigned Kind) override; 1516 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1517 OperandVector &Operands, MCStreamer &Out, 1518 uint64_t &ErrorInfo, 1519 bool MatchingInlineAsm) override; 1520 bool ParseDirective(AsmToken DirectiveID) override; 1521 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1522 OperandMode Mode = OperandMode_Default); 1523 StringRef parseMnemonicSuffix(StringRef Name); 1524 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1525 SMLoc NameLoc, OperandVector &Operands) override; 1526 //bool ProcessInstruction(MCInst &Inst); 1527 1528 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1529 1530 OperandMatchResultTy 1531 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1532 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1533 bool (*ConvertResult)(int64_t &) = nullptr); 1534 1535 OperandMatchResultTy 1536 parseOperandArrayWithPrefix(const char *Prefix, 1537 OperandVector &Operands, 1538 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1539 bool (*ConvertResult)(int64_t&) = nullptr); 1540 1541 OperandMatchResultTy 1542 parseNamedBit(StringRef Name, OperandVector &Operands, 1543 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1544 OperandMatchResultTy parseCPol(OperandVector &Operands); 1545 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1546 StringRef &Value, 1547 SMLoc &StringLoc); 1548 1549 bool isModifier(); 1550 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1551 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1552 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1553 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1554 bool parseSP3NegModifier(); 1555 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1556 OperandMatchResultTy parseReg(OperandVector &Operands); 1557 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1558 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1559 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1560 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1561 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1562 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1563 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1564 OperandMatchResultTy parseUfmt(int64_t &Format); 1565 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1566 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1567 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1568 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1569 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1570 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1571 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1572 1573 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1574 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1575 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1576 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1577 1578 bool parseCnt(int64_t &IntVal); 1579 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1580 1581 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1582 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1583 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1584 1585 bool parseDelay(int64_t &Delay); 1586 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands); 1587 1588 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1589 1590 private: 1591 struct OperandInfoTy { 1592 SMLoc Loc; 1593 int64_t Id; 1594 bool IsSymbolic = false; 1595 bool IsDefined = false; 1596 1597 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1598 }; 1599 1600 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1601 bool validateSendMsg(const OperandInfoTy &Msg, 1602 const OperandInfoTy &Op, 1603 const OperandInfoTy &Stream); 1604 1605 bool parseHwregBody(OperandInfoTy &HwReg, 1606 OperandInfoTy &Offset, 1607 OperandInfoTy &Width); 1608 bool validateHwreg(const OperandInfoTy &HwReg, 1609 const OperandInfoTy &Offset, 1610 const OperandInfoTy &Width); 1611 1612 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1613 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1614 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1615 1616 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1617 const OperandVector &Operands) const; 1618 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1619 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1620 SMLoc getLitLoc(const OperandVector &Operands) const; 1621 SMLoc getConstLoc(const OperandVector &Operands) const; 1622 1623 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1624 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1625 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1626 bool validateSOPLiteral(const MCInst &Inst) const; 1627 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1628 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1629 bool validateIntClampSupported(const MCInst &Inst); 1630 bool validateMIMGAtomicDMask(const MCInst &Inst); 1631 bool validateMIMGGatherDMask(const MCInst &Inst); 1632 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1633 Optional<StringRef> validateMIMGDataSize(const MCInst &Inst); 1634 bool validateMIMGAddrSize(const MCInst &Inst); 1635 bool validateMIMGD16(const MCInst &Inst); 1636 bool validateMIMGDim(const MCInst &Inst); 1637 bool validateMIMGMSAA(const MCInst &Inst); 1638 bool validateOpSel(const MCInst &Inst); 1639 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1640 bool validateVccOperand(unsigned Reg) const; 1641 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1642 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1643 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1644 bool validateAGPRLdSt(const MCInst &Inst) const; 1645 bool validateVGPRAlign(const MCInst &Inst) const; 1646 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1647 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1648 bool validateDivScale(const MCInst &Inst); 1649 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1650 const SMLoc &IDLoc); 1651 bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands, 1652 const SMLoc &IDLoc); 1653 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1654 unsigned getConstantBusLimit(unsigned Opcode) const; 1655 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1656 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1657 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1658 1659 bool isSupportedMnemo(StringRef Mnemo, 1660 const FeatureBitset &FBS); 1661 bool isSupportedMnemo(StringRef Mnemo, 1662 const FeatureBitset &FBS, 1663 ArrayRef<unsigned> Variants); 1664 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1665 1666 bool isId(const StringRef Id) const; 1667 bool isId(const AsmToken &Token, const StringRef Id) const; 1668 bool isToken(const AsmToken::TokenKind Kind) const; 1669 bool trySkipId(const StringRef Id); 1670 bool trySkipId(const StringRef Pref, const StringRef Id); 1671 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1672 bool trySkipToken(const AsmToken::TokenKind Kind); 1673 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1674 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1675 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1676 1677 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1678 AsmToken::TokenKind getTokenKind() const; 1679 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1680 bool parseExpr(OperandVector &Operands); 1681 StringRef getTokenStr() const; 1682 AsmToken peekToken(); 1683 AsmToken getToken() const; 1684 SMLoc getLoc() const; 1685 void lex(); 1686 1687 public: 1688 void onBeginOfFile() override; 1689 1690 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1691 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1692 1693 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1694 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1695 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1696 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1697 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1698 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1699 1700 bool parseSwizzleOperand(int64_t &Op, 1701 const unsigned MinVal, 1702 const unsigned MaxVal, 1703 const StringRef ErrMsg, 1704 SMLoc &Loc); 1705 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1706 const unsigned MinVal, 1707 const unsigned MaxVal, 1708 const StringRef ErrMsg); 1709 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1710 bool parseSwizzleOffset(int64_t &Imm); 1711 bool parseSwizzleMacro(int64_t &Imm); 1712 bool parseSwizzleQuadPerm(int64_t &Imm); 1713 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1714 bool parseSwizzleBroadcast(int64_t &Imm); 1715 bool parseSwizzleSwap(int64_t &Imm); 1716 bool parseSwizzleReverse(int64_t &Imm); 1717 1718 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1719 int64_t parseGPRIdxMacro(); 1720 1721 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1722 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1723 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1724 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1725 1726 AMDGPUOperand::Ptr defaultCPol() const; 1727 1728 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1729 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1730 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1731 AMDGPUOperand::Ptr defaultFlatOffset() const; 1732 1733 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1734 1735 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1736 OptionalImmIndexMap &OptionalIdx); 1737 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1738 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1739 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1740 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1741 OptionalImmIndexMap &OptionalIdx); 1742 1743 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1744 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1745 1746 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1747 bool IsAtomic = false); 1748 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1749 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1750 1751 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1752 1753 bool parseDimId(unsigned &Encoding); 1754 OperandMatchResultTy parseDim(OperandVector &Operands); 1755 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1756 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1757 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1758 int64_t parseDPPCtrlSel(StringRef Ctrl); 1759 int64_t parseDPPCtrlPerm(); 1760 AMDGPUOperand::Ptr defaultRowMask() const; 1761 AMDGPUOperand::Ptr defaultBankMask() const; 1762 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1763 AMDGPUOperand::Ptr defaultFI() const; 1764 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1765 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1766 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1767 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { cvtVOP3DPP(Inst, Operands, true); } 1768 1769 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1770 AMDGPUOperand::ImmTy Type); 1771 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1772 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1773 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1774 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1775 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1776 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1777 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1778 uint64_t BasicInstType, 1779 bool SkipDstVcc = false, 1780 bool SkipSrcVcc = false); 1781 1782 AMDGPUOperand::Ptr defaultBLGP() const; 1783 AMDGPUOperand::Ptr defaultCBSZ() const; 1784 AMDGPUOperand::Ptr defaultABID() const; 1785 1786 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1787 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1788 1789 AMDGPUOperand::Ptr defaultWaitVDST() const; 1790 AMDGPUOperand::Ptr defaultWaitEXP() const; 1791 }; 1792 1793 struct OptionalOperand { 1794 const char *Name; 1795 AMDGPUOperand::ImmTy Type; 1796 bool IsBit; 1797 bool (*ConvertResult)(int64_t&); 1798 }; 1799 1800 } // end anonymous namespace 1801 1802 // May be called with integer type with equivalent bitwidth. 1803 static const fltSemantics *getFltSemantics(unsigned Size) { 1804 switch (Size) { 1805 case 4: 1806 return &APFloat::IEEEsingle(); 1807 case 8: 1808 return &APFloat::IEEEdouble(); 1809 case 2: 1810 return &APFloat::IEEEhalf(); 1811 default: 1812 llvm_unreachable("unsupported fp type"); 1813 } 1814 } 1815 1816 static const fltSemantics *getFltSemantics(MVT VT) { 1817 return getFltSemantics(VT.getSizeInBits() / 8); 1818 } 1819 1820 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1821 switch (OperandType) { 1822 case AMDGPU::OPERAND_REG_IMM_INT32: 1823 case AMDGPU::OPERAND_REG_IMM_FP32: 1824 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1825 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1826 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1827 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1828 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1829 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1830 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1831 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1832 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1833 case AMDGPU::OPERAND_KIMM32: 1834 return &APFloat::IEEEsingle(); 1835 case AMDGPU::OPERAND_REG_IMM_INT64: 1836 case AMDGPU::OPERAND_REG_IMM_FP64: 1837 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1838 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1839 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1840 return &APFloat::IEEEdouble(); 1841 case AMDGPU::OPERAND_REG_IMM_INT16: 1842 case AMDGPU::OPERAND_REG_IMM_FP16: 1843 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1844 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1845 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1846 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1847 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1848 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1849 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1850 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1851 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1852 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1853 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1854 case AMDGPU::OPERAND_KIMM16: 1855 return &APFloat::IEEEhalf(); 1856 default: 1857 llvm_unreachable("unsupported fp type"); 1858 } 1859 } 1860 1861 //===----------------------------------------------------------------------===// 1862 // Operand 1863 //===----------------------------------------------------------------------===// 1864 1865 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1866 bool Lost; 1867 1868 // Convert literal to single precision 1869 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1870 APFloat::rmNearestTiesToEven, 1871 &Lost); 1872 // We allow precision lost but not overflow or underflow 1873 if (Status != APFloat::opOK && 1874 Lost && 1875 ((Status & APFloat::opOverflow) != 0 || 1876 (Status & APFloat::opUnderflow) != 0)) { 1877 return false; 1878 } 1879 1880 return true; 1881 } 1882 1883 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1884 return isUIntN(Size, Val) || isIntN(Size, Val); 1885 } 1886 1887 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1888 if (VT.getScalarType() == MVT::i16) { 1889 // FP immediate values are broken. 1890 return isInlinableIntLiteral(Val); 1891 } 1892 1893 // f16/v2f16 operands work correctly for all values. 1894 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1895 } 1896 1897 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1898 1899 // This is a hack to enable named inline values like 1900 // shared_base with both 32-bit and 64-bit operands. 1901 // Note that these values are defined as 1902 // 32-bit operands only. 1903 if (isInlineValue()) { 1904 return true; 1905 } 1906 1907 if (!isImmTy(ImmTyNone)) { 1908 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1909 return false; 1910 } 1911 // TODO: We should avoid using host float here. It would be better to 1912 // check the float bit values which is what a few other places do. 1913 // We've had bot failures before due to weird NaN support on mips hosts. 1914 1915 APInt Literal(64, Imm.Val); 1916 1917 if (Imm.IsFPImm) { // We got fp literal token 1918 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1919 return AMDGPU::isInlinableLiteral64(Imm.Val, 1920 AsmParser->hasInv2PiInlineImm()); 1921 } 1922 1923 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1924 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1925 return false; 1926 1927 if (type.getScalarSizeInBits() == 16) { 1928 return isInlineableLiteralOp16( 1929 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1930 type, AsmParser->hasInv2PiInlineImm()); 1931 } 1932 1933 // Check if single precision literal is inlinable 1934 return AMDGPU::isInlinableLiteral32( 1935 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1936 AsmParser->hasInv2PiInlineImm()); 1937 } 1938 1939 // We got int literal token. 1940 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1941 return AMDGPU::isInlinableLiteral64(Imm.Val, 1942 AsmParser->hasInv2PiInlineImm()); 1943 } 1944 1945 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1946 return false; 1947 } 1948 1949 if (type.getScalarSizeInBits() == 16) { 1950 return isInlineableLiteralOp16( 1951 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1952 type, AsmParser->hasInv2PiInlineImm()); 1953 } 1954 1955 return AMDGPU::isInlinableLiteral32( 1956 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1957 AsmParser->hasInv2PiInlineImm()); 1958 } 1959 1960 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1961 // Check that this immediate can be added as literal 1962 if (!isImmTy(ImmTyNone)) { 1963 return false; 1964 } 1965 1966 if (!Imm.IsFPImm) { 1967 // We got int literal token. 1968 1969 if (type == MVT::f64 && hasFPModifiers()) { 1970 // Cannot apply fp modifiers to int literals preserving the same semantics 1971 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1972 // disable these cases. 1973 return false; 1974 } 1975 1976 unsigned Size = type.getSizeInBits(); 1977 if (Size == 64) 1978 Size = 32; 1979 1980 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1981 // types. 1982 return isSafeTruncation(Imm.Val, Size); 1983 } 1984 1985 // We got fp literal token 1986 if (type == MVT::f64) { // Expected 64-bit fp operand 1987 // We would set low 64-bits of literal to zeroes but we accept this literals 1988 return true; 1989 } 1990 1991 if (type == MVT::i64) { // Expected 64-bit int operand 1992 // We don't allow fp literals in 64-bit integer instructions. It is 1993 // unclear how we should encode them. 1994 return false; 1995 } 1996 1997 // We allow fp literals with f16x2 operands assuming that the specified 1998 // literal goes into the lower half and the upper half is zero. We also 1999 // require that the literal may be losslessly converted to f16. 2000 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 2001 (type == MVT::v2i16)? MVT::i16 : 2002 (type == MVT::v2f32)? MVT::f32 : type; 2003 2004 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 2005 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 2006 } 2007 2008 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 2009 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 2010 } 2011 2012 bool AMDGPUOperand::isVRegWithInputMods() const { 2013 return isRegClass(AMDGPU::VGPR_32RegClassID) || 2014 // GFX90A allows DPP on 64-bit operands. 2015 (isRegClass(AMDGPU::VReg_64RegClassID) && 2016 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 2017 } 2018 2019 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 2020 if (AsmParser->isVI()) 2021 return isVReg32(); 2022 else if (AsmParser->isGFX9Plus()) 2023 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2024 else 2025 return false; 2026 } 2027 2028 bool AMDGPUOperand::isSDWAFP16Operand() const { 2029 return isSDWAOperand(MVT::f16); 2030 } 2031 2032 bool AMDGPUOperand::isSDWAFP32Operand() const { 2033 return isSDWAOperand(MVT::f32); 2034 } 2035 2036 bool AMDGPUOperand::isSDWAInt16Operand() const { 2037 return isSDWAOperand(MVT::i16); 2038 } 2039 2040 bool AMDGPUOperand::isSDWAInt32Operand() const { 2041 return isSDWAOperand(MVT::i32); 2042 } 2043 2044 bool AMDGPUOperand::isBoolReg() const { 2045 auto FB = AsmParser->getFeatureBits(); 2046 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2047 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2048 } 2049 2050 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2051 { 2052 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2053 assert(Size == 2 || Size == 4 || Size == 8); 2054 2055 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2056 2057 if (Imm.Mods.Abs) { 2058 Val &= ~FpSignMask; 2059 } 2060 if (Imm.Mods.Neg) { 2061 Val ^= FpSignMask; 2062 } 2063 2064 return Val; 2065 } 2066 2067 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2068 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2069 Inst.getNumOperands())) { 2070 addLiteralImmOperand(Inst, Imm.Val, 2071 ApplyModifiers & 2072 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2073 } else { 2074 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2075 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2076 setImmKindNone(); 2077 } 2078 } 2079 2080 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2081 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2082 auto OpNum = Inst.getNumOperands(); 2083 // Check that this operand accepts literals 2084 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2085 2086 if (ApplyModifiers) { 2087 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2088 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2089 Val = applyInputFPModifiers(Val, Size); 2090 } 2091 2092 APInt Literal(64, Val); 2093 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2094 2095 if (Imm.IsFPImm) { // We got fp literal token 2096 switch (OpTy) { 2097 case AMDGPU::OPERAND_REG_IMM_INT64: 2098 case AMDGPU::OPERAND_REG_IMM_FP64: 2099 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2100 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2101 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2102 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2103 AsmParser->hasInv2PiInlineImm())) { 2104 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2105 setImmKindConst(); 2106 return; 2107 } 2108 2109 // Non-inlineable 2110 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2111 // For fp operands we check if low 32 bits are zeros 2112 if (Literal.getLoBits(32) != 0) { 2113 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2114 "Can't encode literal as exact 64-bit floating-point operand. " 2115 "Low 32-bits will be set to zero"); 2116 } 2117 2118 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2119 setImmKindLiteral(); 2120 return; 2121 } 2122 2123 // We don't allow fp literals in 64-bit integer instructions. It is 2124 // unclear how we should encode them. This case should be checked earlier 2125 // in predicate methods (isLiteralImm()) 2126 llvm_unreachable("fp literal in 64-bit integer instruction."); 2127 2128 case AMDGPU::OPERAND_REG_IMM_INT32: 2129 case AMDGPU::OPERAND_REG_IMM_FP32: 2130 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2131 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2132 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2133 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2134 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2135 case AMDGPU::OPERAND_REG_IMM_INT16: 2136 case AMDGPU::OPERAND_REG_IMM_FP16: 2137 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2138 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2139 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2140 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2141 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2142 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2143 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2144 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2145 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2146 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2147 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2148 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2149 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2150 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2151 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2152 case AMDGPU::OPERAND_KIMM32: 2153 case AMDGPU::OPERAND_KIMM16: { 2154 bool lost; 2155 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2156 // Convert literal to single precision 2157 FPLiteral.convert(*getOpFltSemantics(OpTy), 2158 APFloat::rmNearestTiesToEven, &lost); 2159 // We allow precision lost but not overflow or underflow. This should be 2160 // checked earlier in isLiteralImm() 2161 2162 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2163 Inst.addOperand(MCOperand::createImm(ImmVal)); 2164 setImmKindLiteral(); 2165 return; 2166 } 2167 default: 2168 llvm_unreachable("invalid operand size"); 2169 } 2170 2171 return; 2172 } 2173 2174 // We got int literal token. 2175 // Only sign extend inline immediates. 2176 switch (OpTy) { 2177 case AMDGPU::OPERAND_REG_IMM_INT32: 2178 case AMDGPU::OPERAND_REG_IMM_FP32: 2179 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2180 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2181 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2182 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2183 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2184 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2185 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2186 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2187 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2188 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2189 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2190 if (isSafeTruncation(Val, 32) && 2191 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2192 AsmParser->hasInv2PiInlineImm())) { 2193 Inst.addOperand(MCOperand::createImm(Val)); 2194 setImmKindConst(); 2195 return; 2196 } 2197 2198 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2199 setImmKindLiteral(); 2200 return; 2201 2202 case AMDGPU::OPERAND_REG_IMM_INT64: 2203 case AMDGPU::OPERAND_REG_IMM_FP64: 2204 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2205 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2206 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2207 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2208 Inst.addOperand(MCOperand::createImm(Val)); 2209 setImmKindConst(); 2210 return; 2211 } 2212 2213 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2214 setImmKindLiteral(); 2215 return; 2216 2217 case AMDGPU::OPERAND_REG_IMM_INT16: 2218 case AMDGPU::OPERAND_REG_IMM_FP16: 2219 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2220 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2221 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2222 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2223 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2224 if (isSafeTruncation(Val, 16) && 2225 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2226 AsmParser->hasInv2PiInlineImm())) { 2227 Inst.addOperand(MCOperand::createImm(Val)); 2228 setImmKindConst(); 2229 return; 2230 } 2231 2232 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2233 setImmKindLiteral(); 2234 return; 2235 2236 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2237 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2238 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2239 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2240 assert(isSafeTruncation(Val, 16)); 2241 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2242 AsmParser->hasInv2PiInlineImm())); 2243 2244 Inst.addOperand(MCOperand::createImm(Val)); 2245 return; 2246 } 2247 case AMDGPU::OPERAND_KIMM32: 2248 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2249 setImmKindNone(); 2250 return; 2251 case AMDGPU::OPERAND_KIMM16: 2252 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2253 setImmKindNone(); 2254 return; 2255 default: 2256 llvm_unreachable("invalid operand size"); 2257 } 2258 } 2259 2260 template <unsigned Bitwidth> 2261 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2262 APInt Literal(64, Imm.Val); 2263 setImmKindNone(); 2264 2265 if (!Imm.IsFPImm) { 2266 // We got int literal token. 2267 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2268 return; 2269 } 2270 2271 bool Lost; 2272 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2273 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2274 APFloat::rmNearestTiesToEven, &Lost); 2275 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2276 } 2277 2278 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2279 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2280 } 2281 2282 static bool isInlineValue(unsigned Reg) { 2283 switch (Reg) { 2284 case AMDGPU::SRC_SHARED_BASE: 2285 case AMDGPU::SRC_SHARED_LIMIT: 2286 case AMDGPU::SRC_PRIVATE_BASE: 2287 case AMDGPU::SRC_PRIVATE_LIMIT: 2288 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2289 return true; 2290 case AMDGPU::SRC_VCCZ: 2291 case AMDGPU::SRC_EXECZ: 2292 case AMDGPU::SRC_SCC: 2293 return true; 2294 case AMDGPU::SGPR_NULL: 2295 return true; 2296 default: 2297 return false; 2298 } 2299 } 2300 2301 bool AMDGPUOperand::isInlineValue() const { 2302 return isRegKind() && ::isInlineValue(getReg()); 2303 } 2304 2305 //===----------------------------------------------------------------------===// 2306 // AsmParser 2307 //===----------------------------------------------------------------------===// 2308 2309 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2310 if (Is == IS_VGPR) { 2311 switch (RegWidth) { 2312 default: return -1; 2313 case 32: 2314 return AMDGPU::VGPR_32RegClassID; 2315 case 64: 2316 return AMDGPU::VReg_64RegClassID; 2317 case 96: 2318 return AMDGPU::VReg_96RegClassID; 2319 case 128: 2320 return AMDGPU::VReg_128RegClassID; 2321 case 160: 2322 return AMDGPU::VReg_160RegClassID; 2323 case 192: 2324 return AMDGPU::VReg_192RegClassID; 2325 case 224: 2326 return AMDGPU::VReg_224RegClassID; 2327 case 256: 2328 return AMDGPU::VReg_256RegClassID; 2329 case 512: 2330 return AMDGPU::VReg_512RegClassID; 2331 case 1024: 2332 return AMDGPU::VReg_1024RegClassID; 2333 } 2334 } else if (Is == IS_TTMP) { 2335 switch (RegWidth) { 2336 default: return -1; 2337 case 32: 2338 return AMDGPU::TTMP_32RegClassID; 2339 case 64: 2340 return AMDGPU::TTMP_64RegClassID; 2341 case 128: 2342 return AMDGPU::TTMP_128RegClassID; 2343 case 256: 2344 return AMDGPU::TTMP_256RegClassID; 2345 case 512: 2346 return AMDGPU::TTMP_512RegClassID; 2347 } 2348 } else if (Is == IS_SGPR) { 2349 switch (RegWidth) { 2350 default: return -1; 2351 case 32: 2352 return AMDGPU::SGPR_32RegClassID; 2353 case 64: 2354 return AMDGPU::SGPR_64RegClassID; 2355 case 96: 2356 return AMDGPU::SGPR_96RegClassID; 2357 case 128: 2358 return AMDGPU::SGPR_128RegClassID; 2359 case 160: 2360 return AMDGPU::SGPR_160RegClassID; 2361 case 192: 2362 return AMDGPU::SGPR_192RegClassID; 2363 case 224: 2364 return AMDGPU::SGPR_224RegClassID; 2365 case 256: 2366 return AMDGPU::SGPR_256RegClassID; 2367 case 512: 2368 return AMDGPU::SGPR_512RegClassID; 2369 } 2370 } else if (Is == IS_AGPR) { 2371 switch (RegWidth) { 2372 default: return -1; 2373 case 32: 2374 return AMDGPU::AGPR_32RegClassID; 2375 case 64: 2376 return AMDGPU::AReg_64RegClassID; 2377 case 96: 2378 return AMDGPU::AReg_96RegClassID; 2379 case 128: 2380 return AMDGPU::AReg_128RegClassID; 2381 case 160: 2382 return AMDGPU::AReg_160RegClassID; 2383 case 192: 2384 return AMDGPU::AReg_192RegClassID; 2385 case 224: 2386 return AMDGPU::AReg_224RegClassID; 2387 case 256: 2388 return AMDGPU::AReg_256RegClassID; 2389 case 512: 2390 return AMDGPU::AReg_512RegClassID; 2391 case 1024: 2392 return AMDGPU::AReg_1024RegClassID; 2393 } 2394 } 2395 return -1; 2396 } 2397 2398 static unsigned getSpecialRegForName(StringRef RegName) { 2399 return StringSwitch<unsigned>(RegName) 2400 .Case("exec", AMDGPU::EXEC) 2401 .Case("vcc", AMDGPU::VCC) 2402 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2403 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2404 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2405 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2406 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2407 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2408 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2409 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2410 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2411 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2412 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2413 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2414 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2415 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2416 .Case("m0", AMDGPU::M0) 2417 .Case("vccz", AMDGPU::SRC_VCCZ) 2418 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2419 .Case("execz", AMDGPU::SRC_EXECZ) 2420 .Case("src_execz", AMDGPU::SRC_EXECZ) 2421 .Case("scc", AMDGPU::SRC_SCC) 2422 .Case("src_scc", AMDGPU::SRC_SCC) 2423 .Case("tba", AMDGPU::TBA) 2424 .Case("tma", AMDGPU::TMA) 2425 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2426 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2427 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2428 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2429 .Case("vcc_lo", AMDGPU::VCC_LO) 2430 .Case("vcc_hi", AMDGPU::VCC_HI) 2431 .Case("exec_lo", AMDGPU::EXEC_LO) 2432 .Case("exec_hi", AMDGPU::EXEC_HI) 2433 .Case("tma_lo", AMDGPU::TMA_LO) 2434 .Case("tma_hi", AMDGPU::TMA_HI) 2435 .Case("tba_lo", AMDGPU::TBA_LO) 2436 .Case("tba_hi", AMDGPU::TBA_HI) 2437 .Case("pc", AMDGPU::PC_REG) 2438 .Case("null", AMDGPU::SGPR_NULL) 2439 .Default(AMDGPU::NoRegister); 2440 } 2441 2442 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2443 SMLoc &EndLoc, bool RestoreOnFailure) { 2444 auto R = parseRegister(); 2445 if (!R) return true; 2446 assert(R->isReg()); 2447 RegNo = R->getReg(); 2448 StartLoc = R->getStartLoc(); 2449 EndLoc = R->getEndLoc(); 2450 return false; 2451 } 2452 2453 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2454 SMLoc &EndLoc) { 2455 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2456 } 2457 2458 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2459 SMLoc &StartLoc, 2460 SMLoc &EndLoc) { 2461 bool Result = 2462 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2463 bool PendingErrors = getParser().hasPendingError(); 2464 getParser().clearPendingErrors(); 2465 if (PendingErrors) 2466 return MatchOperand_ParseFail; 2467 if (Result) 2468 return MatchOperand_NoMatch; 2469 return MatchOperand_Success; 2470 } 2471 2472 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2473 RegisterKind RegKind, unsigned Reg1, 2474 SMLoc Loc) { 2475 switch (RegKind) { 2476 case IS_SPECIAL: 2477 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2478 Reg = AMDGPU::EXEC; 2479 RegWidth = 64; 2480 return true; 2481 } 2482 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2483 Reg = AMDGPU::FLAT_SCR; 2484 RegWidth = 64; 2485 return true; 2486 } 2487 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2488 Reg = AMDGPU::XNACK_MASK; 2489 RegWidth = 64; 2490 return true; 2491 } 2492 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2493 Reg = AMDGPU::VCC; 2494 RegWidth = 64; 2495 return true; 2496 } 2497 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2498 Reg = AMDGPU::TBA; 2499 RegWidth = 64; 2500 return true; 2501 } 2502 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2503 Reg = AMDGPU::TMA; 2504 RegWidth = 64; 2505 return true; 2506 } 2507 Error(Loc, "register does not fit in the list"); 2508 return false; 2509 case IS_VGPR: 2510 case IS_SGPR: 2511 case IS_AGPR: 2512 case IS_TTMP: 2513 if (Reg1 != Reg + RegWidth / 32) { 2514 Error(Loc, "registers in a list must have consecutive indices"); 2515 return false; 2516 } 2517 RegWidth += 32; 2518 return true; 2519 default: 2520 llvm_unreachable("unexpected register kind"); 2521 } 2522 } 2523 2524 struct RegInfo { 2525 StringLiteral Name; 2526 RegisterKind Kind; 2527 }; 2528 2529 static constexpr RegInfo RegularRegisters[] = { 2530 {{"v"}, IS_VGPR}, 2531 {{"s"}, IS_SGPR}, 2532 {{"ttmp"}, IS_TTMP}, 2533 {{"acc"}, IS_AGPR}, 2534 {{"a"}, IS_AGPR}, 2535 }; 2536 2537 static bool isRegularReg(RegisterKind Kind) { 2538 return Kind == IS_VGPR || 2539 Kind == IS_SGPR || 2540 Kind == IS_TTMP || 2541 Kind == IS_AGPR; 2542 } 2543 2544 static const RegInfo* getRegularRegInfo(StringRef Str) { 2545 for (const RegInfo &Reg : RegularRegisters) 2546 if (Str.startswith(Reg.Name)) 2547 return &Reg; 2548 return nullptr; 2549 } 2550 2551 static bool getRegNum(StringRef Str, unsigned& Num) { 2552 return !Str.getAsInteger(10, Num); 2553 } 2554 2555 bool 2556 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2557 const AsmToken &NextToken) const { 2558 2559 // A list of consecutive registers: [s0,s1,s2,s3] 2560 if (Token.is(AsmToken::LBrac)) 2561 return true; 2562 2563 if (!Token.is(AsmToken::Identifier)) 2564 return false; 2565 2566 // A single register like s0 or a range of registers like s[0:1] 2567 2568 StringRef Str = Token.getString(); 2569 const RegInfo *Reg = getRegularRegInfo(Str); 2570 if (Reg) { 2571 StringRef RegName = Reg->Name; 2572 StringRef RegSuffix = Str.substr(RegName.size()); 2573 if (!RegSuffix.empty()) { 2574 unsigned Num; 2575 // A single register with an index: rXX 2576 if (getRegNum(RegSuffix, Num)) 2577 return true; 2578 } else { 2579 // A range of registers: r[XX:YY]. 2580 if (NextToken.is(AsmToken::LBrac)) 2581 return true; 2582 } 2583 } 2584 2585 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2586 } 2587 2588 bool 2589 AMDGPUAsmParser::isRegister() 2590 { 2591 return isRegister(getToken(), peekToken()); 2592 } 2593 2594 unsigned 2595 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2596 unsigned RegNum, 2597 unsigned RegWidth, 2598 SMLoc Loc) { 2599 2600 assert(isRegularReg(RegKind)); 2601 2602 unsigned AlignSize = 1; 2603 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2604 // SGPR and TTMP registers must be aligned. 2605 // Max required alignment is 4 dwords. 2606 AlignSize = std::min(RegWidth / 32, 4u); 2607 } 2608 2609 if (RegNum % AlignSize != 0) { 2610 Error(Loc, "invalid register alignment"); 2611 return AMDGPU::NoRegister; 2612 } 2613 2614 unsigned RegIdx = RegNum / AlignSize; 2615 int RCID = getRegClass(RegKind, RegWidth); 2616 if (RCID == -1) { 2617 Error(Loc, "invalid or unsupported register size"); 2618 return AMDGPU::NoRegister; 2619 } 2620 2621 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2622 const MCRegisterClass RC = TRI->getRegClass(RCID); 2623 if (RegIdx >= RC.getNumRegs()) { 2624 Error(Loc, "register index is out of range"); 2625 return AMDGPU::NoRegister; 2626 } 2627 2628 return RC.getRegister(RegIdx); 2629 } 2630 2631 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2632 int64_t RegLo, RegHi; 2633 if (!skipToken(AsmToken::LBrac, "missing register index")) 2634 return false; 2635 2636 SMLoc FirstIdxLoc = getLoc(); 2637 SMLoc SecondIdxLoc; 2638 2639 if (!parseExpr(RegLo)) 2640 return false; 2641 2642 if (trySkipToken(AsmToken::Colon)) { 2643 SecondIdxLoc = getLoc(); 2644 if (!parseExpr(RegHi)) 2645 return false; 2646 } else { 2647 RegHi = RegLo; 2648 } 2649 2650 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2651 return false; 2652 2653 if (!isUInt<32>(RegLo)) { 2654 Error(FirstIdxLoc, "invalid register index"); 2655 return false; 2656 } 2657 2658 if (!isUInt<32>(RegHi)) { 2659 Error(SecondIdxLoc, "invalid register index"); 2660 return false; 2661 } 2662 2663 if (RegLo > RegHi) { 2664 Error(FirstIdxLoc, "first register index should not exceed second index"); 2665 return false; 2666 } 2667 2668 Num = static_cast<unsigned>(RegLo); 2669 RegWidth = 32 * ((RegHi - RegLo) + 1); 2670 return true; 2671 } 2672 2673 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2674 unsigned &RegNum, unsigned &RegWidth, 2675 SmallVectorImpl<AsmToken> &Tokens) { 2676 assert(isToken(AsmToken::Identifier)); 2677 unsigned Reg = getSpecialRegForName(getTokenStr()); 2678 if (Reg) { 2679 RegNum = 0; 2680 RegWidth = 32; 2681 RegKind = IS_SPECIAL; 2682 Tokens.push_back(getToken()); 2683 lex(); // skip register name 2684 } 2685 return Reg; 2686 } 2687 2688 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2689 unsigned &RegNum, unsigned &RegWidth, 2690 SmallVectorImpl<AsmToken> &Tokens) { 2691 assert(isToken(AsmToken::Identifier)); 2692 StringRef RegName = getTokenStr(); 2693 auto Loc = getLoc(); 2694 2695 const RegInfo *RI = getRegularRegInfo(RegName); 2696 if (!RI) { 2697 Error(Loc, "invalid register name"); 2698 return AMDGPU::NoRegister; 2699 } 2700 2701 Tokens.push_back(getToken()); 2702 lex(); // skip register name 2703 2704 RegKind = RI->Kind; 2705 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2706 if (!RegSuffix.empty()) { 2707 // Single 32-bit register: vXX. 2708 if (!getRegNum(RegSuffix, RegNum)) { 2709 Error(Loc, "invalid register index"); 2710 return AMDGPU::NoRegister; 2711 } 2712 RegWidth = 32; 2713 } else { 2714 // Range of registers: v[XX:YY]. ":YY" is optional. 2715 if (!ParseRegRange(RegNum, RegWidth)) 2716 return AMDGPU::NoRegister; 2717 } 2718 2719 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2720 } 2721 2722 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2723 unsigned &RegWidth, 2724 SmallVectorImpl<AsmToken> &Tokens) { 2725 unsigned Reg = AMDGPU::NoRegister; 2726 auto ListLoc = getLoc(); 2727 2728 if (!skipToken(AsmToken::LBrac, 2729 "expected a register or a list of registers")) { 2730 return AMDGPU::NoRegister; 2731 } 2732 2733 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2734 2735 auto Loc = getLoc(); 2736 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2737 return AMDGPU::NoRegister; 2738 if (RegWidth != 32) { 2739 Error(Loc, "expected a single 32-bit register"); 2740 return AMDGPU::NoRegister; 2741 } 2742 2743 for (; trySkipToken(AsmToken::Comma); ) { 2744 RegisterKind NextRegKind; 2745 unsigned NextReg, NextRegNum, NextRegWidth; 2746 Loc = getLoc(); 2747 2748 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2749 NextRegNum, NextRegWidth, 2750 Tokens)) { 2751 return AMDGPU::NoRegister; 2752 } 2753 if (NextRegWidth != 32) { 2754 Error(Loc, "expected a single 32-bit register"); 2755 return AMDGPU::NoRegister; 2756 } 2757 if (NextRegKind != RegKind) { 2758 Error(Loc, "registers in a list must be of the same kind"); 2759 return AMDGPU::NoRegister; 2760 } 2761 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2762 return AMDGPU::NoRegister; 2763 } 2764 2765 if (!skipToken(AsmToken::RBrac, 2766 "expected a comma or a closing square bracket")) { 2767 return AMDGPU::NoRegister; 2768 } 2769 2770 if (isRegularReg(RegKind)) 2771 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2772 2773 return Reg; 2774 } 2775 2776 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2777 unsigned &RegNum, unsigned &RegWidth, 2778 SmallVectorImpl<AsmToken> &Tokens) { 2779 auto Loc = getLoc(); 2780 Reg = AMDGPU::NoRegister; 2781 2782 if (isToken(AsmToken::Identifier)) { 2783 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2784 if (Reg == AMDGPU::NoRegister) 2785 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2786 } else { 2787 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2788 } 2789 2790 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2791 if (Reg == AMDGPU::NoRegister) { 2792 assert(Parser.hasPendingError()); 2793 return false; 2794 } 2795 2796 if (!subtargetHasRegister(*TRI, Reg)) { 2797 if (Reg == AMDGPU::SGPR_NULL) { 2798 Error(Loc, "'null' operand is not supported on this GPU"); 2799 } else { 2800 Error(Loc, "register not available on this GPU"); 2801 } 2802 return false; 2803 } 2804 2805 return true; 2806 } 2807 2808 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2809 unsigned &RegNum, unsigned &RegWidth, 2810 bool RestoreOnFailure /*=false*/) { 2811 Reg = AMDGPU::NoRegister; 2812 2813 SmallVector<AsmToken, 1> Tokens; 2814 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2815 if (RestoreOnFailure) { 2816 while (!Tokens.empty()) { 2817 getLexer().UnLex(Tokens.pop_back_val()); 2818 } 2819 } 2820 return true; 2821 } 2822 return false; 2823 } 2824 2825 Optional<StringRef> 2826 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2827 switch (RegKind) { 2828 case IS_VGPR: 2829 return StringRef(".amdgcn.next_free_vgpr"); 2830 case IS_SGPR: 2831 return StringRef(".amdgcn.next_free_sgpr"); 2832 default: 2833 return None; 2834 } 2835 } 2836 2837 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2838 auto SymbolName = getGprCountSymbolName(RegKind); 2839 assert(SymbolName && "initializing invalid register kind"); 2840 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2841 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2842 } 2843 2844 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2845 unsigned DwordRegIndex, 2846 unsigned RegWidth) { 2847 // Symbols are only defined for GCN targets 2848 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2849 return true; 2850 2851 auto SymbolName = getGprCountSymbolName(RegKind); 2852 if (!SymbolName) 2853 return true; 2854 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2855 2856 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2857 int64_t OldCount; 2858 2859 if (!Sym->isVariable()) 2860 return !Error(getLoc(), 2861 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2862 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2863 return !Error( 2864 getLoc(), 2865 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2866 2867 if (OldCount <= NewMax) 2868 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2869 2870 return true; 2871 } 2872 2873 std::unique_ptr<AMDGPUOperand> 2874 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2875 const auto &Tok = getToken(); 2876 SMLoc StartLoc = Tok.getLoc(); 2877 SMLoc EndLoc = Tok.getEndLoc(); 2878 RegisterKind RegKind; 2879 unsigned Reg, RegNum, RegWidth; 2880 2881 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2882 return nullptr; 2883 } 2884 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2885 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2886 return nullptr; 2887 } else 2888 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2889 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2890 } 2891 2892 OperandMatchResultTy 2893 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2894 // TODO: add syntactic sugar for 1/(2*PI) 2895 2896 assert(!isRegister()); 2897 assert(!isModifier()); 2898 2899 const auto& Tok = getToken(); 2900 const auto& NextTok = peekToken(); 2901 bool IsReal = Tok.is(AsmToken::Real); 2902 SMLoc S = getLoc(); 2903 bool Negate = false; 2904 2905 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2906 lex(); 2907 IsReal = true; 2908 Negate = true; 2909 } 2910 2911 if (IsReal) { 2912 // Floating-point expressions are not supported. 2913 // Can only allow floating-point literals with an 2914 // optional sign. 2915 2916 StringRef Num = getTokenStr(); 2917 lex(); 2918 2919 APFloat RealVal(APFloat::IEEEdouble()); 2920 auto roundMode = APFloat::rmNearestTiesToEven; 2921 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2922 return MatchOperand_ParseFail; 2923 } 2924 if (Negate) 2925 RealVal.changeSign(); 2926 2927 Operands.push_back( 2928 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2929 AMDGPUOperand::ImmTyNone, true)); 2930 2931 return MatchOperand_Success; 2932 2933 } else { 2934 int64_t IntVal; 2935 const MCExpr *Expr; 2936 SMLoc S = getLoc(); 2937 2938 if (HasSP3AbsModifier) { 2939 // This is a workaround for handling expressions 2940 // as arguments of SP3 'abs' modifier, for example: 2941 // |1.0| 2942 // |-1| 2943 // |1+x| 2944 // This syntax is not compatible with syntax of standard 2945 // MC expressions (due to the trailing '|'). 2946 SMLoc EndLoc; 2947 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2948 return MatchOperand_ParseFail; 2949 } else { 2950 if (Parser.parseExpression(Expr)) 2951 return MatchOperand_ParseFail; 2952 } 2953 2954 if (Expr->evaluateAsAbsolute(IntVal)) { 2955 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2956 } else { 2957 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2958 } 2959 2960 return MatchOperand_Success; 2961 } 2962 2963 return MatchOperand_NoMatch; 2964 } 2965 2966 OperandMatchResultTy 2967 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2968 if (!isRegister()) 2969 return MatchOperand_NoMatch; 2970 2971 if (auto R = parseRegister()) { 2972 assert(R->isReg()); 2973 Operands.push_back(std::move(R)); 2974 return MatchOperand_Success; 2975 } 2976 return MatchOperand_ParseFail; 2977 } 2978 2979 OperandMatchResultTy 2980 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2981 auto res = parseReg(Operands); 2982 if (res != MatchOperand_NoMatch) { 2983 return res; 2984 } else if (isModifier()) { 2985 return MatchOperand_NoMatch; 2986 } else { 2987 return parseImm(Operands, HasSP3AbsMod); 2988 } 2989 } 2990 2991 bool 2992 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2993 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2994 const auto &str = Token.getString(); 2995 return str == "abs" || str == "neg" || str == "sext"; 2996 } 2997 return false; 2998 } 2999 3000 bool 3001 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 3002 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 3003 } 3004 3005 bool 3006 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3007 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 3008 } 3009 3010 bool 3011 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3012 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 3013 } 3014 3015 // Check if this is an operand modifier or an opcode modifier 3016 // which may look like an expression but it is not. We should 3017 // avoid parsing these modifiers as expressions. Currently 3018 // recognized sequences are: 3019 // |...| 3020 // abs(...) 3021 // neg(...) 3022 // sext(...) 3023 // -reg 3024 // -|...| 3025 // -abs(...) 3026 // name:... 3027 // Note that simple opcode modifiers like 'gds' may be parsed as 3028 // expressions; this is a special case. See getExpressionAsToken. 3029 // 3030 bool 3031 AMDGPUAsmParser::isModifier() { 3032 3033 AsmToken Tok = getToken(); 3034 AsmToken NextToken[2]; 3035 peekTokens(NextToken); 3036 3037 return isOperandModifier(Tok, NextToken[0]) || 3038 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3039 isOpcodeModifierWithVal(Tok, NextToken[0]); 3040 } 3041 3042 // Check if the current token is an SP3 'neg' modifier. 3043 // Currently this modifier is allowed in the following context: 3044 // 3045 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3046 // 2. Before an 'abs' modifier: -abs(...) 3047 // 3. Before an SP3 'abs' modifier: -|...| 3048 // 3049 // In all other cases "-" is handled as a part 3050 // of an expression that follows the sign. 3051 // 3052 // Note: When "-" is followed by an integer literal, 3053 // this is interpreted as integer negation rather 3054 // than a floating-point NEG modifier applied to N. 3055 // Beside being contr-intuitive, such use of floating-point 3056 // NEG modifier would have resulted in different meaning 3057 // of integer literals used with VOP1/2/C and VOP3, 3058 // for example: 3059 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3060 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3061 // Negative fp literals with preceding "-" are 3062 // handled likewise for uniformity 3063 // 3064 bool 3065 AMDGPUAsmParser::parseSP3NegModifier() { 3066 3067 AsmToken NextToken[2]; 3068 peekTokens(NextToken); 3069 3070 if (isToken(AsmToken::Minus) && 3071 (isRegister(NextToken[0], NextToken[1]) || 3072 NextToken[0].is(AsmToken::Pipe) || 3073 isId(NextToken[0], "abs"))) { 3074 lex(); 3075 return true; 3076 } 3077 3078 return false; 3079 } 3080 3081 OperandMatchResultTy 3082 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3083 bool AllowImm) { 3084 bool Neg, SP3Neg; 3085 bool Abs, SP3Abs; 3086 SMLoc Loc; 3087 3088 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3089 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3090 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3091 return MatchOperand_ParseFail; 3092 } 3093 3094 SP3Neg = parseSP3NegModifier(); 3095 3096 Loc = getLoc(); 3097 Neg = trySkipId("neg"); 3098 if (Neg && SP3Neg) { 3099 Error(Loc, "expected register or immediate"); 3100 return MatchOperand_ParseFail; 3101 } 3102 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3103 return MatchOperand_ParseFail; 3104 3105 Abs = trySkipId("abs"); 3106 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3107 return MatchOperand_ParseFail; 3108 3109 Loc = getLoc(); 3110 SP3Abs = trySkipToken(AsmToken::Pipe); 3111 if (Abs && SP3Abs) { 3112 Error(Loc, "expected register or immediate"); 3113 return MatchOperand_ParseFail; 3114 } 3115 3116 OperandMatchResultTy Res; 3117 if (AllowImm) { 3118 Res = parseRegOrImm(Operands, SP3Abs); 3119 } else { 3120 Res = parseReg(Operands); 3121 } 3122 if (Res != MatchOperand_Success) { 3123 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3124 } 3125 3126 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3127 return MatchOperand_ParseFail; 3128 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3129 return MatchOperand_ParseFail; 3130 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3131 return MatchOperand_ParseFail; 3132 3133 AMDGPUOperand::Modifiers Mods; 3134 Mods.Abs = Abs || SP3Abs; 3135 Mods.Neg = Neg || SP3Neg; 3136 3137 if (Mods.hasFPModifiers()) { 3138 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3139 if (Op.isExpr()) { 3140 Error(Op.getStartLoc(), "expected an absolute expression"); 3141 return MatchOperand_ParseFail; 3142 } 3143 Op.setModifiers(Mods); 3144 } 3145 return MatchOperand_Success; 3146 } 3147 3148 OperandMatchResultTy 3149 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3150 bool AllowImm) { 3151 bool Sext = trySkipId("sext"); 3152 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3153 return MatchOperand_ParseFail; 3154 3155 OperandMatchResultTy Res; 3156 if (AllowImm) { 3157 Res = parseRegOrImm(Operands); 3158 } else { 3159 Res = parseReg(Operands); 3160 } 3161 if (Res != MatchOperand_Success) { 3162 return Sext? MatchOperand_ParseFail : Res; 3163 } 3164 3165 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3166 return MatchOperand_ParseFail; 3167 3168 AMDGPUOperand::Modifiers Mods; 3169 Mods.Sext = Sext; 3170 3171 if (Mods.hasIntModifiers()) { 3172 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3173 if (Op.isExpr()) { 3174 Error(Op.getStartLoc(), "expected an absolute expression"); 3175 return MatchOperand_ParseFail; 3176 } 3177 Op.setModifiers(Mods); 3178 } 3179 3180 return MatchOperand_Success; 3181 } 3182 3183 OperandMatchResultTy 3184 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3185 return parseRegOrImmWithFPInputMods(Operands, false); 3186 } 3187 3188 OperandMatchResultTy 3189 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3190 return parseRegOrImmWithIntInputMods(Operands, false); 3191 } 3192 3193 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3194 auto Loc = getLoc(); 3195 if (trySkipId("off")) { 3196 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3197 AMDGPUOperand::ImmTyOff, false)); 3198 return MatchOperand_Success; 3199 } 3200 3201 if (!isRegister()) 3202 return MatchOperand_NoMatch; 3203 3204 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3205 if (Reg) { 3206 Operands.push_back(std::move(Reg)); 3207 return MatchOperand_Success; 3208 } 3209 3210 return MatchOperand_ParseFail; 3211 3212 } 3213 3214 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3215 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3216 3217 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3218 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3219 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3220 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3221 return Match_InvalidOperand; 3222 3223 if ((TSFlags & SIInstrFlags::VOP3) && 3224 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3225 getForcedEncodingSize() != 64) 3226 return Match_PreferE32; 3227 3228 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3229 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3230 // v_mac_f32/16 allow only dst_sel == DWORD; 3231 auto OpNum = 3232 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3233 const auto &Op = Inst.getOperand(OpNum); 3234 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3235 return Match_InvalidOperand; 3236 } 3237 } 3238 3239 return Match_Success; 3240 } 3241 3242 static ArrayRef<unsigned> getAllVariants() { 3243 static const unsigned Variants[] = { 3244 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3245 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, 3246 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP 3247 }; 3248 3249 return makeArrayRef(Variants); 3250 } 3251 3252 // What asm variants we should check 3253 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3254 if (isForcedDPP() && isForcedVOP3()) { 3255 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; 3256 return makeArrayRef(Variants); 3257 } 3258 if (getForcedEncodingSize() == 32) { 3259 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3260 return makeArrayRef(Variants); 3261 } 3262 3263 if (isForcedVOP3()) { 3264 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3265 return makeArrayRef(Variants); 3266 } 3267 3268 if (isForcedSDWA()) { 3269 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3270 AMDGPUAsmVariants::SDWA9}; 3271 return makeArrayRef(Variants); 3272 } 3273 3274 if (isForcedDPP()) { 3275 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3276 return makeArrayRef(Variants); 3277 } 3278 3279 return getAllVariants(); 3280 } 3281 3282 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3283 if (isForcedDPP() && isForcedVOP3()) 3284 return "e64_dpp"; 3285 3286 if (getForcedEncodingSize() == 32) 3287 return "e32"; 3288 3289 if (isForcedVOP3()) 3290 return "e64"; 3291 3292 if (isForcedSDWA()) 3293 return "sdwa"; 3294 3295 if (isForcedDPP()) 3296 return "dpp"; 3297 3298 return ""; 3299 } 3300 3301 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3302 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3303 const unsigned Num = Desc.getNumImplicitUses(); 3304 for (unsigned i = 0; i < Num; ++i) { 3305 unsigned Reg = Desc.ImplicitUses[i]; 3306 switch (Reg) { 3307 case AMDGPU::FLAT_SCR: 3308 case AMDGPU::VCC: 3309 case AMDGPU::VCC_LO: 3310 case AMDGPU::VCC_HI: 3311 case AMDGPU::M0: 3312 return Reg; 3313 default: 3314 break; 3315 } 3316 } 3317 return AMDGPU::NoRegister; 3318 } 3319 3320 // NB: This code is correct only when used to check constant 3321 // bus limitations because GFX7 support no f16 inline constants. 3322 // Note that there are no cases when a GFX7 opcode violates 3323 // constant bus limitations due to the use of an f16 constant. 3324 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3325 unsigned OpIdx) const { 3326 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3327 3328 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3329 return false; 3330 } 3331 3332 const MCOperand &MO = Inst.getOperand(OpIdx); 3333 3334 int64_t Val = MO.getImm(); 3335 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3336 3337 switch (OpSize) { // expected operand size 3338 case 8: 3339 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3340 case 4: 3341 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3342 case 2: { 3343 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3344 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3345 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3346 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3347 return AMDGPU::isInlinableIntLiteral(Val); 3348 3349 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3350 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3351 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3352 return AMDGPU::isInlinableIntLiteralV216(Val); 3353 3354 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3355 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3356 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3357 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3358 3359 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3360 } 3361 default: 3362 llvm_unreachable("invalid operand size"); 3363 } 3364 } 3365 3366 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3367 if (!isGFX10Plus()) 3368 return 1; 3369 3370 switch (Opcode) { 3371 // 64-bit shift instructions can use only one scalar value input 3372 case AMDGPU::V_LSHLREV_B64_e64: 3373 case AMDGPU::V_LSHLREV_B64_gfx10: 3374 case AMDGPU::V_LSHLREV_B64_e64_gfx11: 3375 case AMDGPU::V_LSHRREV_B64_e64: 3376 case AMDGPU::V_LSHRREV_B64_gfx10: 3377 case AMDGPU::V_LSHRREV_B64_e64_gfx11: 3378 case AMDGPU::V_ASHRREV_I64_e64: 3379 case AMDGPU::V_ASHRREV_I64_gfx10: 3380 case AMDGPU::V_ASHRREV_I64_e64_gfx11: 3381 case AMDGPU::V_LSHL_B64_e64: 3382 case AMDGPU::V_LSHR_B64_e64: 3383 case AMDGPU::V_ASHR_I64_e64: 3384 return 1; 3385 default: 3386 return 2; 3387 } 3388 } 3389 3390 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3391 const MCOperand &MO = Inst.getOperand(OpIdx); 3392 if (MO.isImm()) { 3393 return !isInlineConstant(Inst, OpIdx); 3394 } else if (MO.isReg()) { 3395 auto Reg = MO.getReg(); 3396 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3397 auto PReg = mc2PseudoReg(Reg); 3398 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3399 } else { 3400 return true; 3401 } 3402 } 3403 3404 bool 3405 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3406 const OperandVector &Operands) { 3407 const unsigned Opcode = Inst.getOpcode(); 3408 const MCInstrDesc &Desc = MII.get(Opcode); 3409 unsigned LastSGPR = AMDGPU::NoRegister; 3410 unsigned ConstantBusUseCount = 0; 3411 unsigned NumLiterals = 0; 3412 unsigned LiteralSize; 3413 3414 if (Desc.TSFlags & 3415 (SIInstrFlags::VOPC | 3416 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3417 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3418 SIInstrFlags::SDWA)) { 3419 // Check special imm operands (used by madmk, etc) 3420 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3421 ++NumLiterals; 3422 LiteralSize = 4; 3423 } 3424 3425 SmallDenseSet<unsigned> SGPRsUsed; 3426 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3427 if (SGPRUsed != AMDGPU::NoRegister) { 3428 SGPRsUsed.insert(SGPRUsed); 3429 ++ConstantBusUseCount; 3430 } 3431 3432 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3433 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3434 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3435 3436 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3437 3438 for (int OpIdx : OpIndices) { 3439 if (OpIdx == -1) break; 3440 3441 const MCOperand &MO = Inst.getOperand(OpIdx); 3442 if (usesConstantBus(Inst, OpIdx)) { 3443 if (MO.isReg()) { 3444 LastSGPR = mc2PseudoReg(MO.getReg()); 3445 // Pairs of registers with a partial intersections like these 3446 // s0, s[0:1] 3447 // flat_scratch_lo, flat_scratch 3448 // flat_scratch_lo, flat_scratch_hi 3449 // are theoretically valid but they are disabled anyway. 3450 // Note that this code mimics SIInstrInfo::verifyInstruction 3451 if (!SGPRsUsed.count(LastSGPR)) { 3452 SGPRsUsed.insert(LastSGPR); 3453 ++ConstantBusUseCount; 3454 } 3455 } else { // Expression or a literal 3456 3457 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3458 continue; // special operand like VINTERP attr_chan 3459 3460 // An instruction may use only one literal. 3461 // This has been validated on the previous step. 3462 // See validateVOPLiteral. 3463 // This literal may be used as more than one operand. 3464 // If all these operands are of the same size, 3465 // this literal counts as one scalar value. 3466 // Otherwise it counts as 2 scalar values. 3467 // See "GFX10 Shader Programming", section 3.6.2.3. 3468 3469 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3470 if (Size < 4) Size = 4; 3471 3472 if (NumLiterals == 0) { 3473 NumLiterals = 1; 3474 LiteralSize = Size; 3475 } else if (LiteralSize != Size) { 3476 NumLiterals = 2; 3477 } 3478 } 3479 } 3480 } 3481 } 3482 ConstantBusUseCount += NumLiterals; 3483 3484 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3485 return true; 3486 3487 SMLoc LitLoc = getLitLoc(Operands); 3488 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3489 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3490 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3491 return false; 3492 } 3493 3494 bool 3495 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3496 const OperandVector &Operands) { 3497 const unsigned Opcode = Inst.getOpcode(); 3498 const MCInstrDesc &Desc = MII.get(Opcode); 3499 3500 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3501 if (DstIdx == -1 || 3502 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3503 return true; 3504 } 3505 3506 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3507 3508 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3509 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3510 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3511 3512 assert(DstIdx != -1); 3513 const MCOperand &Dst = Inst.getOperand(DstIdx); 3514 assert(Dst.isReg()); 3515 3516 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3517 3518 for (int SrcIdx : SrcIndices) { 3519 if (SrcIdx == -1) break; 3520 const MCOperand &Src = Inst.getOperand(SrcIdx); 3521 if (Src.isReg()) { 3522 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3523 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3524 Error(getRegLoc(SrcReg, Operands), 3525 "destination must be different than all sources"); 3526 return false; 3527 } 3528 } 3529 } 3530 3531 return true; 3532 } 3533 3534 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3535 3536 const unsigned Opc = Inst.getOpcode(); 3537 const MCInstrDesc &Desc = MII.get(Opc); 3538 3539 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3540 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3541 assert(ClampIdx != -1); 3542 return Inst.getOperand(ClampIdx).getImm() == 0; 3543 } 3544 3545 return true; 3546 } 3547 3548 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3549 3550 const unsigned Opc = Inst.getOpcode(); 3551 const MCInstrDesc &Desc = MII.get(Opc); 3552 3553 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3554 return None; 3555 3556 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3557 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3558 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3559 3560 assert(VDataIdx != -1); 3561 3562 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3563 return None; 3564 3565 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3566 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3567 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3568 if (DMask == 0) 3569 DMask = 1; 3570 3571 bool isPackedD16 = false; 3572 unsigned DataSize = 3573 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3574 if (hasPackedD16()) { 3575 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3576 isPackedD16 = D16Idx >= 0; 3577 if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) 3578 DataSize = (DataSize + 1) / 2; 3579 } 3580 3581 if ((VDataSize / 4) == DataSize + TFESize) 3582 return None; 3583 3584 return StringRef(isPackedD16 3585 ? "image data size does not match dmask, d16 and tfe" 3586 : "image data size does not match dmask and tfe"); 3587 } 3588 3589 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3590 const unsigned Opc = Inst.getOpcode(); 3591 const MCInstrDesc &Desc = MII.get(Opc); 3592 3593 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3594 return true; 3595 3596 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3597 3598 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3599 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3600 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3601 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3602 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3603 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3604 3605 assert(VAddr0Idx != -1); 3606 assert(SrsrcIdx != -1); 3607 assert(SrsrcIdx > VAddr0Idx); 3608 3609 if (DimIdx == -1) 3610 return true; // intersect_ray 3611 3612 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3613 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3614 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3615 unsigned ActualAddrSize = 3616 IsNSA ? SrsrcIdx - VAddr0Idx 3617 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3618 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3619 3620 unsigned ExpectedAddrSize = 3621 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3622 3623 if (!IsNSA) { 3624 if (ExpectedAddrSize > 8) 3625 ExpectedAddrSize = 16; 3626 3627 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3628 // This provides backward compatibility for assembly created 3629 // before 160b/192b/224b types were directly supported. 3630 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3631 return true; 3632 } 3633 3634 return ActualAddrSize == ExpectedAddrSize; 3635 } 3636 3637 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3638 3639 const unsigned Opc = Inst.getOpcode(); 3640 const MCInstrDesc &Desc = MII.get(Opc); 3641 3642 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3643 return true; 3644 if (!Desc.mayLoad() || !Desc.mayStore()) 3645 return true; // Not atomic 3646 3647 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3648 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3649 3650 // This is an incomplete check because image_atomic_cmpswap 3651 // may only use 0x3 and 0xf while other atomic operations 3652 // may use 0x1 and 0x3. However these limitations are 3653 // verified when we check that dmask matches dst size. 3654 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3655 } 3656 3657 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3658 3659 const unsigned Opc = Inst.getOpcode(); 3660 const MCInstrDesc &Desc = MII.get(Opc); 3661 3662 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3663 return true; 3664 3665 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3666 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3667 3668 // GATHER4 instructions use dmask in a different fashion compared to 3669 // other MIMG instructions. The only useful DMASK values are 3670 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3671 // (red,red,red,red) etc.) The ISA document doesn't mention 3672 // this. 3673 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3674 } 3675 3676 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3677 const unsigned Opc = Inst.getOpcode(); 3678 const MCInstrDesc &Desc = MII.get(Opc); 3679 3680 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3681 return true; 3682 3683 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3684 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3685 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3686 3687 if (!BaseOpcode->MSAA) 3688 return true; 3689 3690 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3691 assert(DimIdx != -1); 3692 3693 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3694 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3695 3696 return DimInfo->MSAA; 3697 } 3698 3699 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3700 { 3701 switch (Opcode) { 3702 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3703 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3704 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3705 return true; 3706 default: 3707 return false; 3708 } 3709 } 3710 3711 // movrels* opcodes should only allow VGPRS as src0. 3712 // This is specified in .td description for vop1/vop3, 3713 // but sdwa is handled differently. See isSDWAOperand. 3714 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3715 const OperandVector &Operands) { 3716 3717 const unsigned Opc = Inst.getOpcode(); 3718 const MCInstrDesc &Desc = MII.get(Opc); 3719 3720 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3721 return true; 3722 3723 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3724 assert(Src0Idx != -1); 3725 3726 SMLoc ErrLoc; 3727 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3728 if (Src0.isReg()) { 3729 auto Reg = mc2PseudoReg(Src0.getReg()); 3730 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3731 if (!isSGPR(Reg, TRI)) 3732 return true; 3733 ErrLoc = getRegLoc(Reg, Operands); 3734 } else { 3735 ErrLoc = getConstLoc(Operands); 3736 } 3737 3738 Error(ErrLoc, "source operand must be a VGPR"); 3739 return false; 3740 } 3741 3742 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3743 const OperandVector &Operands) { 3744 3745 const unsigned Opc = Inst.getOpcode(); 3746 3747 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3748 return true; 3749 3750 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3751 assert(Src0Idx != -1); 3752 3753 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3754 if (!Src0.isReg()) 3755 return true; 3756 3757 auto Reg = mc2PseudoReg(Src0.getReg()); 3758 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3759 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3760 Error(getRegLoc(Reg, Operands), 3761 "source operand must be either a VGPR or an inline constant"); 3762 return false; 3763 } 3764 3765 return true; 3766 } 3767 3768 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3769 const OperandVector &Operands) { 3770 const unsigned Opc = Inst.getOpcode(); 3771 const MCInstrDesc &Desc = MII.get(Opc); 3772 3773 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3774 return true; 3775 3776 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3777 if (Src2Idx == -1) 3778 return true; 3779 3780 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3781 if (!Src2.isReg()) 3782 return true; 3783 3784 MCRegister Src2Reg = Src2.getReg(); 3785 MCRegister DstReg = Inst.getOperand(0).getReg(); 3786 if (Src2Reg == DstReg) 3787 return true; 3788 3789 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3790 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3791 return true; 3792 3793 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3794 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3795 "source 2 operand must not partially overlap with dst"); 3796 return false; 3797 } 3798 3799 return true; 3800 } 3801 3802 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3803 switch (Inst.getOpcode()) { 3804 default: 3805 return true; 3806 case V_DIV_SCALE_F32_gfx6_gfx7: 3807 case V_DIV_SCALE_F32_vi: 3808 case V_DIV_SCALE_F32_gfx10: 3809 case V_DIV_SCALE_F64_gfx6_gfx7: 3810 case V_DIV_SCALE_F64_vi: 3811 case V_DIV_SCALE_F64_gfx10: 3812 break; 3813 } 3814 3815 // TODO: Check that src0 = src1 or src2. 3816 3817 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3818 AMDGPU::OpName::src2_modifiers, 3819 AMDGPU::OpName::src2_modifiers}) { 3820 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3821 .getImm() & 3822 SISrcMods::ABS) { 3823 return false; 3824 } 3825 } 3826 3827 return true; 3828 } 3829 3830 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3831 3832 const unsigned Opc = Inst.getOpcode(); 3833 const MCInstrDesc &Desc = MII.get(Opc); 3834 3835 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3836 return true; 3837 3838 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3839 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3840 if (isCI() || isSI()) 3841 return false; 3842 } 3843 3844 return true; 3845 } 3846 3847 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3848 const unsigned Opc = Inst.getOpcode(); 3849 const MCInstrDesc &Desc = MII.get(Opc); 3850 3851 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3852 return true; 3853 3854 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3855 if (DimIdx < 0) 3856 return true; 3857 3858 long Imm = Inst.getOperand(DimIdx).getImm(); 3859 if (Imm < 0 || Imm >= 8) 3860 return false; 3861 3862 return true; 3863 } 3864 3865 static bool IsRevOpcode(const unsigned Opcode) 3866 { 3867 switch (Opcode) { 3868 case AMDGPU::V_SUBREV_F32_e32: 3869 case AMDGPU::V_SUBREV_F32_e64: 3870 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3871 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3872 case AMDGPU::V_SUBREV_F32_e32_vi: 3873 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3874 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3875 case AMDGPU::V_SUBREV_F32_e64_vi: 3876 3877 case AMDGPU::V_SUBREV_CO_U32_e32: 3878 case AMDGPU::V_SUBREV_CO_U32_e64: 3879 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3880 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3881 3882 case AMDGPU::V_SUBBREV_U32_e32: 3883 case AMDGPU::V_SUBBREV_U32_e64: 3884 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3885 case AMDGPU::V_SUBBREV_U32_e32_vi: 3886 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3887 case AMDGPU::V_SUBBREV_U32_e64_vi: 3888 3889 case AMDGPU::V_SUBREV_U32_e32: 3890 case AMDGPU::V_SUBREV_U32_e64: 3891 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3892 case AMDGPU::V_SUBREV_U32_e32_vi: 3893 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3894 case AMDGPU::V_SUBREV_U32_e64_vi: 3895 3896 case AMDGPU::V_SUBREV_F16_e32: 3897 case AMDGPU::V_SUBREV_F16_e64: 3898 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3899 case AMDGPU::V_SUBREV_F16_e32_vi: 3900 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3901 case AMDGPU::V_SUBREV_F16_e64_vi: 3902 3903 case AMDGPU::V_SUBREV_U16_e32: 3904 case AMDGPU::V_SUBREV_U16_e64: 3905 case AMDGPU::V_SUBREV_U16_e32_vi: 3906 case AMDGPU::V_SUBREV_U16_e64_vi: 3907 3908 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3909 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3910 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3911 3912 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3913 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3914 3915 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3916 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3917 3918 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3919 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3920 3921 case AMDGPU::V_LSHRREV_B32_e32: 3922 case AMDGPU::V_LSHRREV_B32_e64: 3923 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3924 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3925 case AMDGPU::V_LSHRREV_B32_e32_vi: 3926 case AMDGPU::V_LSHRREV_B32_e64_vi: 3927 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3928 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3929 3930 case AMDGPU::V_ASHRREV_I32_e32: 3931 case AMDGPU::V_ASHRREV_I32_e64: 3932 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3933 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3934 case AMDGPU::V_ASHRREV_I32_e32_vi: 3935 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3936 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3937 case AMDGPU::V_ASHRREV_I32_e64_vi: 3938 3939 case AMDGPU::V_LSHLREV_B32_e32: 3940 case AMDGPU::V_LSHLREV_B32_e64: 3941 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3942 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3943 case AMDGPU::V_LSHLREV_B32_e32_vi: 3944 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3945 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3946 case AMDGPU::V_LSHLREV_B32_e64_vi: 3947 3948 case AMDGPU::V_LSHLREV_B16_e32: 3949 case AMDGPU::V_LSHLREV_B16_e64: 3950 case AMDGPU::V_LSHLREV_B16_e32_vi: 3951 case AMDGPU::V_LSHLREV_B16_e64_vi: 3952 case AMDGPU::V_LSHLREV_B16_gfx10: 3953 3954 case AMDGPU::V_LSHRREV_B16_e32: 3955 case AMDGPU::V_LSHRREV_B16_e64: 3956 case AMDGPU::V_LSHRREV_B16_e32_vi: 3957 case AMDGPU::V_LSHRREV_B16_e64_vi: 3958 case AMDGPU::V_LSHRREV_B16_gfx10: 3959 3960 case AMDGPU::V_ASHRREV_I16_e32: 3961 case AMDGPU::V_ASHRREV_I16_e64: 3962 case AMDGPU::V_ASHRREV_I16_e32_vi: 3963 case AMDGPU::V_ASHRREV_I16_e64_vi: 3964 case AMDGPU::V_ASHRREV_I16_gfx10: 3965 3966 case AMDGPU::V_LSHLREV_B64_e64: 3967 case AMDGPU::V_LSHLREV_B64_gfx10: 3968 case AMDGPU::V_LSHLREV_B64_vi: 3969 3970 case AMDGPU::V_LSHRREV_B64_e64: 3971 case AMDGPU::V_LSHRREV_B64_gfx10: 3972 case AMDGPU::V_LSHRREV_B64_vi: 3973 3974 case AMDGPU::V_ASHRREV_I64_e64: 3975 case AMDGPU::V_ASHRREV_I64_gfx10: 3976 case AMDGPU::V_ASHRREV_I64_vi: 3977 3978 case AMDGPU::V_PK_LSHLREV_B16: 3979 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3980 case AMDGPU::V_PK_LSHLREV_B16_vi: 3981 3982 case AMDGPU::V_PK_LSHRREV_B16: 3983 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3984 case AMDGPU::V_PK_LSHRREV_B16_vi: 3985 case AMDGPU::V_PK_ASHRREV_I16: 3986 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3987 case AMDGPU::V_PK_ASHRREV_I16_vi: 3988 return true; 3989 default: 3990 return false; 3991 } 3992 } 3993 3994 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3995 3996 using namespace SIInstrFlags; 3997 const unsigned Opcode = Inst.getOpcode(); 3998 const MCInstrDesc &Desc = MII.get(Opcode); 3999 4000 // lds_direct register is defined so that it can be used 4001 // with 9-bit operands only. Ignore encodings which do not accept these. 4002 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 4003 if ((Desc.TSFlags & Enc) == 0) 4004 return None; 4005 4006 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 4007 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 4008 if (SrcIdx == -1) 4009 break; 4010 const auto &Src = Inst.getOperand(SrcIdx); 4011 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 4012 4013 if (isGFX90A() || isGFX11Plus()) 4014 return StringRef("lds_direct is not supported on this GPU"); 4015 4016 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 4017 return StringRef("lds_direct cannot be used with this instruction"); 4018 4019 if (SrcName != OpName::src0) 4020 return StringRef("lds_direct may be used as src0 only"); 4021 } 4022 } 4023 4024 return None; 4025 } 4026 4027 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 4028 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4029 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4030 if (Op.isFlatOffset()) 4031 return Op.getStartLoc(); 4032 } 4033 return getLoc(); 4034 } 4035 4036 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4037 const OperandVector &Operands) { 4038 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4039 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4040 return true; 4041 4042 auto Opcode = Inst.getOpcode(); 4043 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4044 assert(OpNum != -1); 4045 4046 const auto &Op = Inst.getOperand(OpNum); 4047 if (!hasFlatOffsets() && Op.getImm() != 0) { 4048 Error(getFlatOffsetLoc(Operands), 4049 "flat offset modifier is not supported on this GPU"); 4050 return false; 4051 } 4052 4053 // For FLAT segment the offset must be positive; 4054 // MSB is ignored and forced to zero. 4055 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 4056 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 4057 if (!isIntN(OffsetSize, Op.getImm())) { 4058 Error(getFlatOffsetLoc(Operands), 4059 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4060 return false; 4061 } 4062 } else { 4063 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 4064 if (!isUIntN(OffsetSize, Op.getImm())) { 4065 Error(getFlatOffsetLoc(Operands), 4066 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4067 return false; 4068 } 4069 } 4070 4071 return true; 4072 } 4073 4074 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4075 // Start with second operand because SMEM Offset cannot be dst or src0. 4076 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4077 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4078 if (Op.isSMEMOffset()) 4079 return Op.getStartLoc(); 4080 } 4081 return getLoc(); 4082 } 4083 4084 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4085 const OperandVector &Operands) { 4086 if (isCI() || isSI()) 4087 return true; 4088 4089 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4090 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4091 return true; 4092 4093 auto Opcode = Inst.getOpcode(); 4094 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4095 if (OpNum == -1) 4096 return true; 4097 4098 const auto &Op = Inst.getOperand(OpNum); 4099 if (!Op.isImm()) 4100 return true; 4101 4102 uint64_t Offset = Op.getImm(); 4103 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4104 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4105 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4106 return true; 4107 4108 Error(getSMEMOffsetLoc(Operands), 4109 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4110 "expected a 21-bit signed offset"); 4111 4112 return false; 4113 } 4114 4115 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4116 unsigned Opcode = Inst.getOpcode(); 4117 const MCInstrDesc &Desc = MII.get(Opcode); 4118 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4119 return true; 4120 4121 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4122 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4123 4124 const int OpIndices[] = { Src0Idx, Src1Idx }; 4125 4126 unsigned NumExprs = 0; 4127 unsigned NumLiterals = 0; 4128 uint32_t LiteralValue; 4129 4130 for (int OpIdx : OpIndices) { 4131 if (OpIdx == -1) break; 4132 4133 const MCOperand &MO = Inst.getOperand(OpIdx); 4134 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4135 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4136 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4137 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4138 if (NumLiterals == 0 || LiteralValue != Value) { 4139 LiteralValue = Value; 4140 ++NumLiterals; 4141 } 4142 } else if (MO.isExpr()) { 4143 ++NumExprs; 4144 } 4145 } 4146 } 4147 4148 return NumLiterals + NumExprs <= 1; 4149 } 4150 4151 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4152 const unsigned Opc = Inst.getOpcode(); 4153 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4154 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4155 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4156 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4157 4158 if (OpSel & ~3) 4159 return false; 4160 } 4161 4162 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4163 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4164 if (OpSelIdx != -1) { 4165 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4166 return false; 4167 } 4168 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4169 if (OpSelHiIdx != -1) { 4170 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4171 return false; 4172 } 4173 } 4174 4175 return true; 4176 } 4177 4178 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4179 const OperandVector &Operands) { 4180 const unsigned Opc = Inst.getOpcode(); 4181 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4182 if (DppCtrlIdx < 0) 4183 return true; 4184 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4185 4186 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4187 // DPP64 is supported for row_newbcast only. 4188 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4189 if (Src0Idx >= 0 && 4190 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4191 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4192 Error(S, "64 bit dpp only supports row_newbcast"); 4193 return false; 4194 } 4195 } 4196 4197 return true; 4198 } 4199 4200 // Check if VCC register matches wavefront size 4201 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4202 auto FB = getFeatureBits(); 4203 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4204 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4205 } 4206 4207 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4208 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4209 const OperandVector &Operands) { 4210 unsigned Opcode = Inst.getOpcode(); 4211 const MCInstrDesc &Desc = MII.get(Opcode); 4212 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4213 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4214 ImmIdx == -1) 4215 return true; 4216 4217 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4218 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4219 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4220 4221 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4222 4223 unsigned NumExprs = 0; 4224 unsigned NumLiterals = 0; 4225 uint32_t LiteralValue; 4226 4227 for (int OpIdx : OpIndices) { 4228 if (OpIdx == -1) 4229 continue; 4230 4231 const MCOperand &MO = Inst.getOperand(OpIdx); 4232 if (!MO.isImm() && !MO.isExpr()) 4233 continue; 4234 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4235 continue; 4236 4237 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4238 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4239 Error(getConstLoc(Operands), 4240 "inline constants are not allowed for this operand"); 4241 return false; 4242 } 4243 4244 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4245 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4246 if (NumLiterals == 0 || LiteralValue != Value) { 4247 LiteralValue = Value; 4248 ++NumLiterals; 4249 } 4250 } else if (MO.isExpr()) { 4251 ++NumExprs; 4252 } 4253 } 4254 NumLiterals += NumExprs; 4255 4256 if (!NumLiterals) 4257 return true; 4258 4259 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4260 Error(getLitLoc(Operands), "literal operands are not supported"); 4261 return false; 4262 } 4263 4264 if (NumLiterals > 1) { 4265 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4266 return false; 4267 } 4268 4269 return true; 4270 } 4271 4272 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4273 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4274 const MCRegisterInfo *MRI) { 4275 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4276 if (OpIdx < 0) 4277 return -1; 4278 4279 const MCOperand &Op = Inst.getOperand(OpIdx); 4280 if (!Op.isReg()) 4281 return -1; 4282 4283 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4284 auto Reg = Sub ? Sub : Op.getReg(); 4285 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4286 return AGPR32.contains(Reg) ? 1 : 0; 4287 } 4288 4289 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4290 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4291 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4292 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4293 SIInstrFlags::DS)) == 0) 4294 return true; 4295 4296 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4297 : AMDGPU::OpName::vdata; 4298 4299 const MCRegisterInfo *MRI = getMRI(); 4300 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4301 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4302 4303 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4304 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4305 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4306 return false; 4307 } 4308 4309 auto FB = getFeatureBits(); 4310 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4311 if (DataAreg < 0 || DstAreg < 0) 4312 return true; 4313 return DstAreg == DataAreg; 4314 } 4315 4316 return DstAreg < 1 && DataAreg < 1; 4317 } 4318 4319 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4320 auto FB = getFeatureBits(); 4321 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4322 return true; 4323 4324 const MCRegisterInfo *MRI = getMRI(); 4325 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4326 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4327 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4328 const MCOperand &Op = Inst.getOperand(I); 4329 if (!Op.isReg()) 4330 continue; 4331 4332 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4333 if (!Sub) 4334 continue; 4335 4336 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4337 return false; 4338 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4339 return false; 4340 } 4341 4342 return true; 4343 } 4344 4345 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4346 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4347 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4348 if (Op.isBLGP()) 4349 return Op.getStartLoc(); 4350 } 4351 return SMLoc(); 4352 } 4353 4354 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4355 const OperandVector &Operands) { 4356 unsigned Opc = Inst.getOpcode(); 4357 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4358 if (BlgpIdx == -1) 4359 return true; 4360 SMLoc BLGPLoc = getBLGPLoc(Operands); 4361 if (!BLGPLoc.isValid()) 4362 return true; 4363 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4364 auto FB = getFeatureBits(); 4365 bool UsesNeg = false; 4366 if (FB[AMDGPU::FeatureGFX940Insts]) { 4367 switch (Opc) { 4368 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4369 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4370 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4371 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4372 UsesNeg = true; 4373 } 4374 } 4375 4376 if (IsNeg == UsesNeg) 4377 return true; 4378 4379 Error(BLGPLoc, 4380 UsesNeg ? "invalid modifier: blgp is not supported" 4381 : "invalid modifier: neg is not supported"); 4382 4383 return false; 4384 } 4385 4386 // gfx90a has an undocumented limitation: 4387 // DS_GWS opcodes must use even aligned registers. 4388 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4389 const OperandVector &Operands) { 4390 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4391 return true; 4392 4393 int Opc = Inst.getOpcode(); 4394 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4395 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4396 return true; 4397 4398 const MCRegisterInfo *MRI = getMRI(); 4399 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4400 int Data0Pos = 4401 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4402 assert(Data0Pos != -1); 4403 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4404 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4405 if (RegIdx & 1) { 4406 SMLoc RegLoc = getRegLoc(Reg, Operands); 4407 Error(RegLoc, "vgpr must be even aligned"); 4408 return false; 4409 } 4410 4411 return true; 4412 } 4413 4414 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4415 const OperandVector &Operands, 4416 const SMLoc &IDLoc) { 4417 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4418 AMDGPU::OpName::cpol); 4419 if (CPolPos == -1) 4420 return true; 4421 4422 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4423 4424 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4425 if (TSFlags & SIInstrFlags::SMRD) { 4426 if (CPol && (isSI() || isCI())) { 4427 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4428 Error(S, "cache policy is not supported for SMRD instructions"); 4429 return false; 4430 } 4431 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4432 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4433 return false; 4434 } 4435 } 4436 4437 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4438 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4439 StringRef CStr(S.getPointer()); 4440 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4441 Error(S, "scc is not supported on this GPU"); 4442 return false; 4443 } 4444 4445 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4446 return true; 4447 4448 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4449 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4450 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4451 : "instruction must use glc"); 4452 return false; 4453 } 4454 } else { 4455 if (CPol & CPol::GLC) { 4456 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4457 StringRef CStr(S.getPointer()); 4458 S = SMLoc::getFromPointer( 4459 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4460 Error(S, isGFX940() ? "instruction must not use sc0" 4461 : "instruction must not use glc"); 4462 return false; 4463 } 4464 } 4465 4466 return true; 4467 } 4468 4469 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst, 4470 const OperandVector &Operands, 4471 const SMLoc &IDLoc) { 4472 if (isGFX940()) 4473 return true; 4474 4475 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4476 if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) != 4477 (SIInstrFlags::VALU | SIInstrFlags::FLAT)) 4478 return true; 4479 // This is FLAT LDS DMA. 4480 4481 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands); 4482 StringRef CStr(S.getPointer()); 4483 if (!CStr.startswith("lds")) { 4484 // This is incorrectly selected LDS DMA version of a FLAT load opcode. 4485 // And LDS version should have 'lds' modifier, but it follows optional 4486 // operands so its absense is ignored by the matcher. 4487 Error(IDLoc, "invalid operands for instruction"); 4488 return false; 4489 } 4490 4491 return true; 4492 } 4493 4494 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4495 const SMLoc &IDLoc, 4496 const OperandVector &Operands) { 4497 if (auto ErrMsg = validateLdsDirect(Inst)) { 4498 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4499 return false; 4500 } 4501 if (!validateSOPLiteral(Inst)) { 4502 Error(getLitLoc(Operands), 4503 "only one literal operand is allowed"); 4504 return false; 4505 } 4506 if (!validateVOPLiteral(Inst, Operands)) { 4507 return false; 4508 } 4509 if (!validateConstantBusLimitations(Inst, Operands)) { 4510 return false; 4511 } 4512 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4513 return false; 4514 } 4515 if (!validateIntClampSupported(Inst)) { 4516 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4517 "integer clamping is not supported on this GPU"); 4518 return false; 4519 } 4520 if (!validateOpSel(Inst)) { 4521 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4522 "invalid op_sel operand"); 4523 return false; 4524 } 4525 if (!validateDPP(Inst, Operands)) { 4526 return false; 4527 } 4528 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4529 if (!validateMIMGD16(Inst)) { 4530 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4531 "d16 modifier is not supported on this GPU"); 4532 return false; 4533 } 4534 if (!validateMIMGDim(Inst)) { 4535 Error(IDLoc, "dim modifier is required on this GPU"); 4536 return false; 4537 } 4538 if (!validateMIMGMSAA(Inst)) { 4539 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4540 "invalid dim; must be MSAA type"); 4541 return false; 4542 } 4543 if (auto ErrMsg = validateMIMGDataSize(Inst)) { 4544 Error(IDLoc, *ErrMsg); 4545 return false; 4546 } 4547 if (!validateMIMGAddrSize(Inst)) { 4548 Error(IDLoc, 4549 "image address size does not match dim and a16"); 4550 return false; 4551 } 4552 if (!validateMIMGAtomicDMask(Inst)) { 4553 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4554 "invalid atomic image dmask"); 4555 return false; 4556 } 4557 if (!validateMIMGGatherDMask(Inst)) { 4558 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4559 "invalid image_gather dmask: only one bit must be set"); 4560 return false; 4561 } 4562 if (!validateMovrels(Inst, Operands)) { 4563 return false; 4564 } 4565 if (!validateFlatOffset(Inst, Operands)) { 4566 return false; 4567 } 4568 if (!validateSMEMOffset(Inst, Operands)) { 4569 return false; 4570 } 4571 if (!validateMAIAccWrite(Inst, Operands)) { 4572 return false; 4573 } 4574 if (!validateMFMA(Inst, Operands)) { 4575 return false; 4576 } 4577 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4578 return false; 4579 } 4580 4581 if (!validateAGPRLdSt(Inst)) { 4582 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4583 ? "invalid register class: data and dst should be all VGPR or AGPR" 4584 : "invalid register class: agpr loads and stores not supported on this GPU" 4585 ); 4586 return false; 4587 } 4588 if (!validateVGPRAlign(Inst)) { 4589 Error(IDLoc, 4590 "invalid register class: vgpr tuples must be 64 bit aligned"); 4591 return false; 4592 } 4593 if (!validateGWS(Inst, Operands)) { 4594 return false; 4595 } 4596 4597 if (!validateBLGP(Inst, Operands)) { 4598 return false; 4599 } 4600 4601 if (!validateDivScale(Inst)) { 4602 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4603 return false; 4604 } 4605 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4606 return false; 4607 } 4608 4609 if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) { 4610 return false; 4611 } 4612 4613 return true; 4614 } 4615 4616 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4617 const FeatureBitset &FBS, 4618 unsigned VariantID = 0); 4619 4620 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4621 const FeatureBitset &AvailableFeatures, 4622 unsigned VariantID); 4623 4624 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4625 const FeatureBitset &FBS) { 4626 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4627 } 4628 4629 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4630 const FeatureBitset &FBS, 4631 ArrayRef<unsigned> Variants) { 4632 for (auto Variant : Variants) { 4633 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4634 return true; 4635 } 4636 4637 return false; 4638 } 4639 4640 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4641 const SMLoc &IDLoc) { 4642 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4643 4644 // Check if requested instruction variant is supported. 4645 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4646 return false; 4647 4648 // This instruction is not supported. 4649 // Clear any other pending errors because they are no longer relevant. 4650 getParser().clearPendingErrors(); 4651 4652 // Requested instruction variant is not supported. 4653 // Check if any other variants are supported. 4654 StringRef VariantName = getMatchedVariantName(); 4655 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4656 return Error(IDLoc, 4657 Twine(VariantName, 4658 " variant of this instruction is not supported")); 4659 } 4660 4661 // Finally check if this instruction is supported on any other GPU. 4662 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4663 return Error(IDLoc, "instruction not supported on this GPU"); 4664 } 4665 4666 // Instruction not supported on any GPU. Probably a typo. 4667 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4668 return Error(IDLoc, "invalid instruction" + Suggestion); 4669 } 4670 4671 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4672 OperandVector &Operands, 4673 MCStreamer &Out, 4674 uint64_t &ErrorInfo, 4675 bool MatchingInlineAsm) { 4676 MCInst Inst; 4677 unsigned Result = Match_Success; 4678 for (auto Variant : getMatchedVariants()) { 4679 uint64_t EI; 4680 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4681 Variant); 4682 // We order match statuses from least to most specific. We use most specific 4683 // status as resulting 4684 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4685 if ((R == Match_Success) || 4686 (R == Match_PreferE32) || 4687 (R == Match_MissingFeature && Result != Match_PreferE32) || 4688 (R == Match_InvalidOperand && Result != Match_MissingFeature 4689 && Result != Match_PreferE32) || 4690 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4691 && Result != Match_MissingFeature 4692 && Result != Match_PreferE32)) { 4693 Result = R; 4694 ErrorInfo = EI; 4695 } 4696 if (R == Match_Success) 4697 break; 4698 } 4699 4700 if (Result == Match_Success) { 4701 if (!validateInstruction(Inst, IDLoc, Operands)) { 4702 return true; 4703 } 4704 Inst.setLoc(IDLoc); 4705 Out.emitInstruction(Inst, getSTI()); 4706 return false; 4707 } 4708 4709 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4710 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4711 return true; 4712 } 4713 4714 switch (Result) { 4715 default: break; 4716 case Match_MissingFeature: 4717 // It has been verified that the specified instruction 4718 // mnemonic is valid. A match was found but it requires 4719 // features which are not supported on this GPU. 4720 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4721 4722 case Match_InvalidOperand: { 4723 SMLoc ErrorLoc = IDLoc; 4724 if (ErrorInfo != ~0ULL) { 4725 if (ErrorInfo >= Operands.size()) { 4726 return Error(IDLoc, "too few operands for instruction"); 4727 } 4728 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4729 if (ErrorLoc == SMLoc()) 4730 ErrorLoc = IDLoc; 4731 } 4732 return Error(ErrorLoc, "invalid operand for instruction"); 4733 } 4734 4735 case Match_PreferE32: 4736 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4737 "should be encoded as e32"); 4738 case Match_MnemonicFail: 4739 llvm_unreachable("Invalid instructions should have been handled already"); 4740 } 4741 llvm_unreachable("Implement any new match types added!"); 4742 } 4743 4744 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4745 int64_t Tmp = -1; 4746 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4747 return true; 4748 } 4749 if (getParser().parseAbsoluteExpression(Tmp)) { 4750 return true; 4751 } 4752 Ret = static_cast<uint32_t>(Tmp); 4753 return false; 4754 } 4755 4756 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4757 uint32_t &Minor) { 4758 if (ParseAsAbsoluteExpression(Major)) 4759 return TokError("invalid major version"); 4760 4761 if (!trySkipToken(AsmToken::Comma)) 4762 return TokError("minor version number required, comma expected"); 4763 4764 if (ParseAsAbsoluteExpression(Minor)) 4765 return TokError("invalid minor version"); 4766 4767 return false; 4768 } 4769 4770 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4771 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4772 return TokError("directive only supported for amdgcn architecture"); 4773 4774 std::string TargetIDDirective; 4775 SMLoc TargetStart = getTok().getLoc(); 4776 if (getParser().parseEscapedString(TargetIDDirective)) 4777 return true; 4778 4779 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4780 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4781 return getParser().Error(TargetRange.Start, 4782 (Twine(".amdgcn_target directive's target id ") + 4783 Twine(TargetIDDirective) + 4784 Twine(" does not match the specified target id ") + 4785 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4786 4787 return false; 4788 } 4789 4790 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4791 return Error(Range.Start, "value out of range", Range); 4792 } 4793 4794 bool AMDGPUAsmParser::calculateGPRBlocks( 4795 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4796 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4797 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4798 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4799 // TODO(scott.linder): These calculations are duplicated from 4800 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4801 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4802 4803 unsigned NumVGPRs = NextFreeVGPR; 4804 unsigned NumSGPRs = NextFreeSGPR; 4805 4806 if (Version.Major >= 10) 4807 NumSGPRs = 0; 4808 else { 4809 unsigned MaxAddressableNumSGPRs = 4810 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4811 4812 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4813 NumSGPRs > MaxAddressableNumSGPRs) 4814 return OutOfRangeError(SGPRRange); 4815 4816 NumSGPRs += 4817 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4818 4819 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4820 NumSGPRs > MaxAddressableNumSGPRs) 4821 return OutOfRangeError(SGPRRange); 4822 4823 if (Features.test(FeatureSGPRInitBug)) 4824 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4825 } 4826 4827 VGPRBlocks = 4828 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4829 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4830 4831 return false; 4832 } 4833 4834 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4835 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4836 return TokError("directive only supported for amdgcn architecture"); 4837 4838 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4839 return TokError("directive only supported for amdhsa OS"); 4840 4841 StringRef KernelName; 4842 if (getParser().parseIdentifier(KernelName)) 4843 return true; 4844 4845 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4846 4847 StringSet<> Seen; 4848 4849 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4850 4851 SMRange VGPRRange; 4852 uint64_t NextFreeVGPR = 0; 4853 uint64_t AccumOffset = 0; 4854 uint64_t SharedVGPRCount = 0; 4855 SMRange SGPRRange; 4856 uint64_t NextFreeSGPR = 0; 4857 4858 // Count the number of user SGPRs implied from the enabled feature bits. 4859 unsigned ImpliedUserSGPRCount = 0; 4860 4861 // Track if the asm explicitly contains the directive for the user SGPR 4862 // count. 4863 Optional<unsigned> ExplicitUserSGPRCount; 4864 bool ReserveVCC = true; 4865 bool ReserveFlatScr = true; 4866 Optional<bool> EnableWavefrontSize32; 4867 4868 while (true) { 4869 while (trySkipToken(AsmToken::EndOfStatement)); 4870 4871 StringRef ID; 4872 SMRange IDRange = getTok().getLocRange(); 4873 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4874 return true; 4875 4876 if (ID == ".end_amdhsa_kernel") 4877 break; 4878 4879 if (Seen.find(ID) != Seen.end()) 4880 return TokError(".amdhsa_ directives cannot be repeated"); 4881 Seen.insert(ID); 4882 4883 SMLoc ValStart = getLoc(); 4884 int64_t IVal; 4885 if (getParser().parseAbsoluteExpression(IVal)) 4886 return true; 4887 SMLoc ValEnd = getLoc(); 4888 SMRange ValRange = SMRange(ValStart, ValEnd); 4889 4890 if (IVal < 0) 4891 return OutOfRangeError(ValRange); 4892 4893 uint64_t Val = IVal; 4894 4895 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4896 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4897 return OutOfRangeError(RANGE); \ 4898 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4899 4900 if (ID == ".amdhsa_group_segment_fixed_size") { 4901 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4902 return OutOfRangeError(ValRange); 4903 KD.group_segment_fixed_size = Val; 4904 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4905 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4906 return OutOfRangeError(ValRange); 4907 KD.private_segment_fixed_size = Val; 4908 } else if (ID == ".amdhsa_kernarg_size") { 4909 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4910 return OutOfRangeError(ValRange); 4911 KD.kernarg_size = Val; 4912 } else if (ID == ".amdhsa_user_sgpr_count") { 4913 ExplicitUserSGPRCount = Val; 4914 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4915 if (hasArchitectedFlatScratch()) 4916 return Error(IDRange.Start, 4917 "directive is not supported with architected flat scratch", 4918 IDRange); 4919 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4920 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4921 Val, ValRange); 4922 if (Val) 4923 ImpliedUserSGPRCount += 4; 4924 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4925 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4926 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4927 ValRange); 4928 if (Val) 4929 ImpliedUserSGPRCount += 2; 4930 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4931 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4932 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4933 ValRange); 4934 if (Val) 4935 ImpliedUserSGPRCount += 2; 4936 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4937 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4938 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4939 Val, ValRange); 4940 if (Val) 4941 ImpliedUserSGPRCount += 2; 4942 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4943 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4944 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4945 ValRange); 4946 if (Val) 4947 ImpliedUserSGPRCount += 2; 4948 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4949 if (hasArchitectedFlatScratch()) 4950 return Error(IDRange.Start, 4951 "directive is not supported with architected flat scratch", 4952 IDRange); 4953 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4954 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4955 ValRange); 4956 if (Val) 4957 ImpliedUserSGPRCount += 2; 4958 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4959 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4960 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4961 Val, ValRange); 4962 if (Val) 4963 ImpliedUserSGPRCount += 1; 4964 } else if (ID == ".amdhsa_wavefront_size32") { 4965 if (IVersion.Major < 10) 4966 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4967 EnableWavefrontSize32 = Val; 4968 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4969 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4970 Val, ValRange); 4971 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4972 if (hasArchitectedFlatScratch()) 4973 return Error(IDRange.Start, 4974 "directive is not supported with architected flat scratch", 4975 IDRange); 4976 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4977 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4978 } else if (ID == ".amdhsa_enable_private_segment") { 4979 if (!hasArchitectedFlatScratch()) 4980 return Error( 4981 IDRange.Start, 4982 "directive is not supported without architected flat scratch", 4983 IDRange); 4984 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4985 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4986 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4987 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4988 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4989 ValRange); 4990 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4991 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4992 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4993 ValRange); 4994 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4995 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4996 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4997 ValRange); 4998 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4999 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5000 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 5001 ValRange); 5002 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 5003 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5004 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 5005 ValRange); 5006 } else if (ID == ".amdhsa_next_free_vgpr") { 5007 VGPRRange = ValRange; 5008 NextFreeVGPR = Val; 5009 } else if (ID == ".amdhsa_next_free_sgpr") { 5010 SGPRRange = ValRange; 5011 NextFreeSGPR = Val; 5012 } else if (ID == ".amdhsa_accum_offset") { 5013 if (!isGFX90A()) 5014 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5015 AccumOffset = Val; 5016 } else if (ID == ".amdhsa_reserve_vcc") { 5017 if (!isUInt<1>(Val)) 5018 return OutOfRangeError(ValRange); 5019 ReserveVCC = Val; 5020 } else if (ID == ".amdhsa_reserve_flat_scratch") { 5021 if (IVersion.Major < 7) 5022 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 5023 if (hasArchitectedFlatScratch()) 5024 return Error(IDRange.Start, 5025 "directive is not supported with architected flat scratch", 5026 IDRange); 5027 if (!isUInt<1>(Val)) 5028 return OutOfRangeError(ValRange); 5029 ReserveFlatScr = Val; 5030 } else if (ID == ".amdhsa_reserve_xnack_mask") { 5031 if (IVersion.Major < 8) 5032 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 5033 if (!isUInt<1>(Val)) 5034 return OutOfRangeError(ValRange); 5035 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5036 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5037 IDRange); 5038 } else if (ID == ".amdhsa_float_round_mode_32") { 5039 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5040 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 5041 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5042 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5043 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 5044 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5045 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5046 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5047 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5048 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5049 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5050 ValRange); 5051 } else if (ID == ".amdhsa_dx10_clamp") { 5052 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5053 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 5054 } else if (ID == ".amdhsa_ieee_mode") { 5055 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 5056 Val, ValRange); 5057 } else if (ID == ".amdhsa_fp16_overflow") { 5058 if (IVersion.Major < 9) 5059 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5060 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 5061 ValRange); 5062 } else if (ID == ".amdhsa_tg_split") { 5063 if (!isGFX90A()) 5064 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5065 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5066 ValRange); 5067 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5068 if (IVersion.Major < 10) 5069 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5070 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 5071 ValRange); 5072 } else if (ID == ".amdhsa_memory_ordered") { 5073 if (IVersion.Major < 10) 5074 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5075 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 5076 ValRange); 5077 } else if (ID == ".amdhsa_forward_progress") { 5078 if (IVersion.Major < 10) 5079 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5080 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 5081 ValRange); 5082 } else if (ID == ".amdhsa_shared_vgpr_count") { 5083 if (IVersion.Major < 10) 5084 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5085 SharedVGPRCount = Val; 5086 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5087 COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val, 5088 ValRange); 5089 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5090 PARSE_BITS_ENTRY( 5091 KD.compute_pgm_rsrc2, 5092 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5093 ValRange); 5094 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5095 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5096 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5097 Val, ValRange); 5098 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5099 PARSE_BITS_ENTRY( 5100 KD.compute_pgm_rsrc2, 5101 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5102 ValRange); 5103 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5104 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5105 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5106 Val, ValRange); 5107 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5108 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5109 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5110 Val, ValRange); 5111 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5112 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5113 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5114 Val, ValRange); 5115 } else if (ID == ".amdhsa_exception_int_div_zero") { 5116 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5117 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5118 Val, ValRange); 5119 } else { 5120 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5121 } 5122 5123 #undef PARSE_BITS_ENTRY 5124 } 5125 5126 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5127 return TokError(".amdhsa_next_free_vgpr directive is required"); 5128 5129 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5130 return TokError(".amdhsa_next_free_sgpr directive is required"); 5131 5132 unsigned VGPRBlocks; 5133 unsigned SGPRBlocks; 5134 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5135 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5136 EnableWavefrontSize32, NextFreeVGPR, 5137 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5138 SGPRBlocks)) 5139 return true; 5140 5141 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5142 VGPRBlocks)) 5143 return OutOfRangeError(VGPRRange); 5144 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5145 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5146 5147 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5148 SGPRBlocks)) 5149 return OutOfRangeError(SGPRRange); 5150 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5151 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5152 SGPRBlocks); 5153 5154 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5155 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5156 "enabled user SGPRs"); 5157 5158 unsigned UserSGPRCount = 5159 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5160 5161 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5162 return TokError("too many user SGPRs enabled"); 5163 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5164 UserSGPRCount); 5165 5166 if (isGFX90A()) { 5167 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5168 return TokError(".amdhsa_accum_offset directive is required"); 5169 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5170 return TokError("accum_offset should be in range [4..256] in " 5171 "increments of 4"); 5172 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5173 return TokError("accum_offset exceeds total VGPR allocation"); 5174 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5175 (AccumOffset / 4 - 1)); 5176 } 5177 5178 if (IVersion.Major == 10) { 5179 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5180 if (SharedVGPRCount && EnableWavefrontSize32) { 5181 return TokError("shared_vgpr_count directive not valid on " 5182 "wavefront size 32"); 5183 } 5184 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5185 return TokError("shared_vgpr_count*2 + " 5186 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5187 "exceed 63\n"); 5188 } 5189 } 5190 5191 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5192 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5193 ReserveFlatScr); 5194 return false; 5195 } 5196 5197 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5198 uint32_t Major; 5199 uint32_t Minor; 5200 5201 if (ParseDirectiveMajorMinor(Major, Minor)) 5202 return true; 5203 5204 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5205 return false; 5206 } 5207 5208 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5209 uint32_t Major; 5210 uint32_t Minor; 5211 uint32_t Stepping; 5212 StringRef VendorName; 5213 StringRef ArchName; 5214 5215 // If this directive has no arguments, then use the ISA version for the 5216 // targeted GPU. 5217 if (isToken(AsmToken::EndOfStatement)) { 5218 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5219 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5220 ISA.Stepping, 5221 "AMD", "AMDGPU"); 5222 return false; 5223 } 5224 5225 if (ParseDirectiveMajorMinor(Major, Minor)) 5226 return true; 5227 5228 if (!trySkipToken(AsmToken::Comma)) 5229 return TokError("stepping version number required, comma expected"); 5230 5231 if (ParseAsAbsoluteExpression(Stepping)) 5232 return TokError("invalid stepping version"); 5233 5234 if (!trySkipToken(AsmToken::Comma)) 5235 return TokError("vendor name required, comma expected"); 5236 5237 if (!parseString(VendorName, "invalid vendor name")) 5238 return true; 5239 5240 if (!trySkipToken(AsmToken::Comma)) 5241 return TokError("arch name required, comma expected"); 5242 5243 if (!parseString(ArchName, "invalid arch name")) 5244 return true; 5245 5246 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5247 VendorName, ArchName); 5248 return false; 5249 } 5250 5251 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5252 amd_kernel_code_t &Header) { 5253 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5254 // assembly for backwards compatibility. 5255 if (ID == "max_scratch_backing_memory_byte_size") { 5256 Parser.eatToEndOfStatement(); 5257 return false; 5258 } 5259 5260 SmallString<40> ErrStr; 5261 raw_svector_ostream Err(ErrStr); 5262 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5263 return TokError(Err.str()); 5264 } 5265 Lex(); 5266 5267 if (ID == "enable_wavefront_size32") { 5268 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5269 if (!isGFX10Plus()) 5270 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5271 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5272 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5273 } else { 5274 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5275 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5276 } 5277 } 5278 5279 if (ID == "wavefront_size") { 5280 if (Header.wavefront_size == 5) { 5281 if (!isGFX10Plus()) 5282 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5283 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5284 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5285 } else if (Header.wavefront_size == 6) { 5286 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5287 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5288 } 5289 } 5290 5291 if (ID == "enable_wgp_mode") { 5292 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5293 !isGFX10Plus()) 5294 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5295 } 5296 5297 if (ID == "enable_mem_ordered") { 5298 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5299 !isGFX10Plus()) 5300 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5301 } 5302 5303 if (ID == "enable_fwd_progress") { 5304 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5305 !isGFX10Plus()) 5306 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5307 } 5308 5309 return false; 5310 } 5311 5312 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5313 amd_kernel_code_t Header; 5314 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5315 5316 while (true) { 5317 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5318 // will set the current token to EndOfStatement. 5319 while(trySkipToken(AsmToken::EndOfStatement)); 5320 5321 StringRef ID; 5322 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5323 return true; 5324 5325 if (ID == ".end_amd_kernel_code_t") 5326 break; 5327 5328 if (ParseAMDKernelCodeTValue(ID, Header)) 5329 return true; 5330 } 5331 5332 getTargetStreamer().EmitAMDKernelCodeT(Header); 5333 5334 return false; 5335 } 5336 5337 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5338 StringRef KernelName; 5339 if (!parseId(KernelName, "expected symbol name")) 5340 return true; 5341 5342 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5343 ELF::STT_AMDGPU_HSA_KERNEL); 5344 5345 KernelScope.initialize(getContext()); 5346 return false; 5347 } 5348 5349 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5350 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5351 return Error(getLoc(), 5352 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5353 "architectures"); 5354 } 5355 5356 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5357 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5358 return Error(getParser().getTok().getLoc(), "target id must match options"); 5359 5360 getTargetStreamer().EmitISAVersion(); 5361 Lex(); 5362 5363 return false; 5364 } 5365 5366 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5367 const char *AssemblerDirectiveBegin; 5368 const char *AssemblerDirectiveEnd; 5369 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5370 isHsaAbiVersion3AndAbove(&getSTI()) 5371 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5372 HSAMD::V3::AssemblerDirectiveEnd) 5373 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5374 HSAMD::AssemblerDirectiveEnd); 5375 5376 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5377 return Error(getLoc(), 5378 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5379 "not available on non-amdhsa OSes")).str()); 5380 } 5381 5382 std::string HSAMetadataString; 5383 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5384 HSAMetadataString)) 5385 return true; 5386 5387 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5388 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5389 return Error(getLoc(), "invalid HSA metadata"); 5390 } else { 5391 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5392 return Error(getLoc(), "invalid HSA metadata"); 5393 } 5394 5395 return false; 5396 } 5397 5398 /// Common code to parse out a block of text (typically YAML) between start and 5399 /// end directives. 5400 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5401 const char *AssemblerDirectiveEnd, 5402 std::string &CollectString) { 5403 5404 raw_string_ostream CollectStream(CollectString); 5405 5406 getLexer().setSkipSpace(false); 5407 5408 bool FoundEnd = false; 5409 while (!isToken(AsmToken::Eof)) { 5410 while (isToken(AsmToken::Space)) { 5411 CollectStream << getTokenStr(); 5412 Lex(); 5413 } 5414 5415 if (trySkipId(AssemblerDirectiveEnd)) { 5416 FoundEnd = true; 5417 break; 5418 } 5419 5420 CollectStream << Parser.parseStringToEndOfStatement() 5421 << getContext().getAsmInfo()->getSeparatorString(); 5422 5423 Parser.eatToEndOfStatement(); 5424 } 5425 5426 getLexer().setSkipSpace(true); 5427 5428 if (isToken(AsmToken::Eof) && !FoundEnd) { 5429 return TokError(Twine("expected directive ") + 5430 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5431 } 5432 5433 CollectStream.flush(); 5434 return false; 5435 } 5436 5437 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5438 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5439 std::string String; 5440 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5441 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5442 return true; 5443 5444 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5445 if (!PALMetadata->setFromString(String)) 5446 return Error(getLoc(), "invalid PAL metadata"); 5447 return false; 5448 } 5449 5450 /// Parse the assembler directive for old linear-format PAL metadata. 5451 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5452 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5453 return Error(getLoc(), 5454 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5455 "not available on non-amdpal OSes")).str()); 5456 } 5457 5458 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5459 PALMetadata->setLegacy(); 5460 for (;;) { 5461 uint32_t Key, Value; 5462 if (ParseAsAbsoluteExpression(Key)) { 5463 return TokError(Twine("invalid value in ") + 5464 Twine(PALMD::AssemblerDirective)); 5465 } 5466 if (!trySkipToken(AsmToken::Comma)) { 5467 return TokError(Twine("expected an even number of values in ") + 5468 Twine(PALMD::AssemblerDirective)); 5469 } 5470 if (ParseAsAbsoluteExpression(Value)) { 5471 return TokError(Twine("invalid value in ") + 5472 Twine(PALMD::AssemblerDirective)); 5473 } 5474 PALMetadata->setRegister(Key, Value); 5475 if (!trySkipToken(AsmToken::Comma)) 5476 break; 5477 } 5478 return false; 5479 } 5480 5481 /// ParseDirectiveAMDGPULDS 5482 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5483 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5484 if (getParser().checkForValidSection()) 5485 return true; 5486 5487 StringRef Name; 5488 SMLoc NameLoc = getLoc(); 5489 if (getParser().parseIdentifier(Name)) 5490 return TokError("expected identifier in directive"); 5491 5492 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5493 if (parseToken(AsmToken::Comma, "expected ','")) 5494 return true; 5495 5496 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5497 5498 int64_t Size; 5499 SMLoc SizeLoc = getLoc(); 5500 if (getParser().parseAbsoluteExpression(Size)) 5501 return true; 5502 if (Size < 0) 5503 return Error(SizeLoc, "size must be non-negative"); 5504 if (Size > LocalMemorySize) 5505 return Error(SizeLoc, "size is too large"); 5506 5507 int64_t Alignment = 4; 5508 if (trySkipToken(AsmToken::Comma)) { 5509 SMLoc AlignLoc = getLoc(); 5510 if (getParser().parseAbsoluteExpression(Alignment)) 5511 return true; 5512 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5513 return Error(AlignLoc, "alignment must be a power of two"); 5514 5515 // Alignment larger than the size of LDS is possible in theory, as long 5516 // as the linker manages to place to symbol at address 0, but we do want 5517 // to make sure the alignment fits nicely into a 32-bit integer. 5518 if (Alignment >= 1u << 31) 5519 return Error(AlignLoc, "alignment is too large"); 5520 } 5521 5522 if (parseEOL()) 5523 return true; 5524 5525 Symbol->redefineIfPossible(); 5526 if (!Symbol->isUndefined()) 5527 return Error(NameLoc, "invalid symbol redefinition"); 5528 5529 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5530 return false; 5531 } 5532 5533 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5534 StringRef IDVal = DirectiveID.getString(); 5535 5536 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5537 if (IDVal == ".amdhsa_kernel") 5538 return ParseDirectiveAMDHSAKernel(); 5539 5540 // TODO: Restructure/combine with PAL metadata directive. 5541 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5542 return ParseDirectiveHSAMetadata(); 5543 } else { 5544 if (IDVal == ".hsa_code_object_version") 5545 return ParseDirectiveHSACodeObjectVersion(); 5546 5547 if (IDVal == ".hsa_code_object_isa") 5548 return ParseDirectiveHSACodeObjectISA(); 5549 5550 if (IDVal == ".amd_kernel_code_t") 5551 return ParseDirectiveAMDKernelCodeT(); 5552 5553 if (IDVal == ".amdgpu_hsa_kernel") 5554 return ParseDirectiveAMDGPUHsaKernel(); 5555 5556 if (IDVal == ".amd_amdgpu_isa") 5557 return ParseDirectiveISAVersion(); 5558 5559 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5560 return ParseDirectiveHSAMetadata(); 5561 } 5562 5563 if (IDVal == ".amdgcn_target") 5564 return ParseDirectiveAMDGCNTarget(); 5565 5566 if (IDVal == ".amdgpu_lds") 5567 return ParseDirectiveAMDGPULDS(); 5568 5569 if (IDVal == PALMD::AssemblerDirectiveBegin) 5570 return ParseDirectivePALMetadataBegin(); 5571 5572 if (IDVal == PALMD::AssemblerDirective) 5573 return ParseDirectivePALMetadata(); 5574 5575 return true; 5576 } 5577 5578 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5579 unsigned RegNo) { 5580 5581 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5582 return isGFX9Plus(); 5583 5584 // GFX10 has 2 more SGPRs 104 and 105. 5585 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5586 return hasSGPR104_SGPR105(); 5587 5588 switch (RegNo) { 5589 case AMDGPU::SRC_SHARED_BASE: 5590 case AMDGPU::SRC_SHARED_LIMIT: 5591 case AMDGPU::SRC_PRIVATE_BASE: 5592 case AMDGPU::SRC_PRIVATE_LIMIT: 5593 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5594 return isGFX9Plus(); 5595 case AMDGPU::TBA: 5596 case AMDGPU::TBA_LO: 5597 case AMDGPU::TBA_HI: 5598 case AMDGPU::TMA: 5599 case AMDGPU::TMA_LO: 5600 case AMDGPU::TMA_HI: 5601 return !isGFX9Plus(); 5602 case AMDGPU::XNACK_MASK: 5603 case AMDGPU::XNACK_MASK_LO: 5604 case AMDGPU::XNACK_MASK_HI: 5605 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5606 case AMDGPU::SGPR_NULL: 5607 return isGFX10Plus(); 5608 default: 5609 break; 5610 } 5611 5612 if (isCI()) 5613 return true; 5614 5615 if (isSI() || isGFX10Plus()) { 5616 // No flat_scr on SI. 5617 // On GFX10 flat scratch is not a valid register operand and can only be 5618 // accessed with s_setreg/s_getreg. 5619 switch (RegNo) { 5620 case AMDGPU::FLAT_SCR: 5621 case AMDGPU::FLAT_SCR_LO: 5622 case AMDGPU::FLAT_SCR_HI: 5623 return false; 5624 default: 5625 return true; 5626 } 5627 } 5628 5629 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5630 // SI/CI have. 5631 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5632 return hasSGPR102_SGPR103(); 5633 5634 return true; 5635 } 5636 5637 OperandMatchResultTy 5638 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5639 OperandMode Mode) { 5640 // Try to parse with a custom parser 5641 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5642 5643 // If we successfully parsed the operand or if there as an error parsing, 5644 // we are done. 5645 // 5646 // If we are parsing after we reach EndOfStatement then this means we 5647 // are appending default values to the Operands list. This is only done 5648 // by custom parser, so we shouldn't continue on to the generic parsing. 5649 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5650 isToken(AsmToken::EndOfStatement)) 5651 return ResTy; 5652 5653 SMLoc RBraceLoc; 5654 SMLoc LBraceLoc = getLoc(); 5655 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5656 unsigned Prefix = Operands.size(); 5657 5658 for (;;) { 5659 auto Loc = getLoc(); 5660 ResTy = parseReg(Operands); 5661 if (ResTy == MatchOperand_NoMatch) 5662 Error(Loc, "expected a register"); 5663 if (ResTy != MatchOperand_Success) 5664 return MatchOperand_ParseFail; 5665 5666 RBraceLoc = getLoc(); 5667 if (trySkipToken(AsmToken::RBrac)) 5668 break; 5669 5670 if (!skipToken(AsmToken::Comma, 5671 "expected a comma or a closing square bracket")) { 5672 return MatchOperand_ParseFail; 5673 } 5674 } 5675 5676 if (Operands.size() - Prefix > 1) { 5677 Operands.insert(Operands.begin() + Prefix, 5678 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5679 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5680 } 5681 5682 return MatchOperand_Success; 5683 } 5684 5685 return parseRegOrImm(Operands); 5686 } 5687 5688 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5689 // Clear any forced encodings from the previous instruction. 5690 setForcedEncodingSize(0); 5691 setForcedDPP(false); 5692 setForcedSDWA(false); 5693 5694 if (Name.endswith("_e64_dpp")) { 5695 setForcedDPP(true); 5696 setForcedEncodingSize(64); 5697 return Name.substr(0, Name.size() - 8); 5698 } else if (Name.endswith("_e64")) { 5699 setForcedEncodingSize(64); 5700 return Name.substr(0, Name.size() - 4); 5701 } else if (Name.endswith("_e32")) { 5702 setForcedEncodingSize(32); 5703 return Name.substr(0, Name.size() - 4); 5704 } else if (Name.endswith("_dpp")) { 5705 setForcedDPP(true); 5706 return Name.substr(0, Name.size() - 4); 5707 } else if (Name.endswith("_sdwa")) { 5708 setForcedSDWA(true); 5709 return Name.substr(0, Name.size() - 5); 5710 } 5711 return Name; 5712 } 5713 5714 static void applyMnemonicAliases(StringRef &Mnemonic, 5715 const FeatureBitset &Features, 5716 unsigned VariantID); 5717 5718 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5719 StringRef Name, 5720 SMLoc NameLoc, OperandVector &Operands) { 5721 // Add the instruction mnemonic 5722 Name = parseMnemonicSuffix(Name); 5723 5724 // If the target architecture uses MnemonicAlias, call it here to parse 5725 // operands correctly. 5726 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 5727 5728 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5729 5730 bool IsMIMG = Name.startswith("image_"); 5731 5732 while (!trySkipToken(AsmToken::EndOfStatement)) { 5733 OperandMode Mode = OperandMode_Default; 5734 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5735 Mode = OperandMode_NSA; 5736 CPolSeen = 0; 5737 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5738 5739 if (Res != MatchOperand_Success) { 5740 checkUnsupportedInstruction(Name, NameLoc); 5741 if (!Parser.hasPendingError()) { 5742 // FIXME: use real operand location rather than the current location. 5743 StringRef Msg = 5744 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5745 "not a valid operand."; 5746 Error(getLoc(), Msg); 5747 } 5748 while (!trySkipToken(AsmToken::EndOfStatement)) { 5749 lex(); 5750 } 5751 return true; 5752 } 5753 5754 // Eat the comma or space if there is one. 5755 trySkipToken(AsmToken::Comma); 5756 } 5757 5758 return false; 5759 } 5760 5761 //===----------------------------------------------------------------------===// 5762 // Utility functions 5763 //===----------------------------------------------------------------------===// 5764 5765 OperandMatchResultTy 5766 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5767 5768 if (!trySkipId(Prefix, AsmToken::Colon)) 5769 return MatchOperand_NoMatch; 5770 5771 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5772 } 5773 5774 OperandMatchResultTy 5775 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5776 AMDGPUOperand::ImmTy ImmTy, 5777 bool (*ConvertResult)(int64_t&)) { 5778 SMLoc S = getLoc(); 5779 int64_t Value = 0; 5780 5781 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5782 if (Res != MatchOperand_Success) 5783 return Res; 5784 5785 if (ConvertResult && !ConvertResult(Value)) { 5786 Error(S, "invalid " + StringRef(Prefix) + " value."); 5787 } 5788 5789 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5790 return MatchOperand_Success; 5791 } 5792 5793 OperandMatchResultTy 5794 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5795 OperandVector &Operands, 5796 AMDGPUOperand::ImmTy ImmTy, 5797 bool (*ConvertResult)(int64_t&)) { 5798 SMLoc S = getLoc(); 5799 if (!trySkipId(Prefix, AsmToken::Colon)) 5800 return MatchOperand_NoMatch; 5801 5802 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5803 return MatchOperand_ParseFail; 5804 5805 unsigned Val = 0; 5806 const unsigned MaxSize = 4; 5807 5808 // FIXME: How to verify the number of elements matches the number of src 5809 // operands? 5810 for (int I = 0; ; ++I) { 5811 int64_t Op; 5812 SMLoc Loc = getLoc(); 5813 if (!parseExpr(Op)) 5814 return MatchOperand_ParseFail; 5815 5816 if (Op != 0 && Op != 1) { 5817 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5818 return MatchOperand_ParseFail; 5819 } 5820 5821 Val |= (Op << I); 5822 5823 if (trySkipToken(AsmToken::RBrac)) 5824 break; 5825 5826 if (I + 1 == MaxSize) { 5827 Error(getLoc(), "expected a closing square bracket"); 5828 return MatchOperand_ParseFail; 5829 } 5830 5831 if (!skipToken(AsmToken::Comma, "expected a comma")) 5832 return MatchOperand_ParseFail; 5833 } 5834 5835 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5836 return MatchOperand_Success; 5837 } 5838 5839 OperandMatchResultTy 5840 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5841 AMDGPUOperand::ImmTy ImmTy) { 5842 int64_t Bit; 5843 SMLoc S = getLoc(); 5844 5845 if (trySkipId(Name)) { 5846 Bit = 1; 5847 } else if (trySkipId("no", Name)) { 5848 Bit = 0; 5849 } else { 5850 return MatchOperand_NoMatch; 5851 } 5852 5853 if (Name == "r128" && !hasMIMG_R128()) { 5854 Error(S, "r128 modifier is not supported on this GPU"); 5855 return MatchOperand_ParseFail; 5856 } 5857 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5858 Error(S, "a16 modifier is not supported on this GPU"); 5859 return MatchOperand_ParseFail; 5860 } 5861 5862 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5863 ImmTy = AMDGPUOperand::ImmTyR128A16; 5864 5865 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5866 return MatchOperand_Success; 5867 } 5868 5869 OperandMatchResultTy 5870 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5871 unsigned CPolOn = 0; 5872 unsigned CPolOff = 0; 5873 SMLoc S = getLoc(); 5874 5875 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5876 if (isGFX940() && !Mnemo.startswith("s_")) { 5877 if (trySkipId("sc0")) 5878 CPolOn = AMDGPU::CPol::SC0; 5879 else if (trySkipId("nosc0")) 5880 CPolOff = AMDGPU::CPol::SC0; 5881 else if (trySkipId("nt")) 5882 CPolOn = AMDGPU::CPol::NT; 5883 else if (trySkipId("nont")) 5884 CPolOff = AMDGPU::CPol::NT; 5885 else if (trySkipId("sc1")) 5886 CPolOn = AMDGPU::CPol::SC1; 5887 else if (trySkipId("nosc1")) 5888 CPolOff = AMDGPU::CPol::SC1; 5889 else 5890 return MatchOperand_NoMatch; 5891 } 5892 else if (trySkipId("glc")) 5893 CPolOn = AMDGPU::CPol::GLC; 5894 else if (trySkipId("noglc")) 5895 CPolOff = AMDGPU::CPol::GLC; 5896 else if (trySkipId("slc")) 5897 CPolOn = AMDGPU::CPol::SLC; 5898 else if (trySkipId("noslc")) 5899 CPolOff = AMDGPU::CPol::SLC; 5900 else if (trySkipId("dlc")) 5901 CPolOn = AMDGPU::CPol::DLC; 5902 else if (trySkipId("nodlc")) 5903 CPolOff = AMDGPU::CPol::DLC; 5904 else if (trySkipId("scc")) 5905 CPolOn = AMDGPU::CPol::SCC; 5906 else if (trySkipId("noscc")) 5907 CPolOff = AMDGPU::CPol::SCC; 5908 else 5909 return MatchOperand_NoMatch; 5910 5911 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5912 Error(S, "dlc modifier is not supported on this GPU"); 5913 return MatchOperand_ParseFail; 5914 } 5915 5916 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5917 Error(S, "scc modifier is not supported on this GPU"); 5918 return MatchOperand_ParseFail; 5919 } 5920 5921 if (CPolSeen & (CPolOn | CPolOff)) { 5922 Error(S, "duplicate cache policy modifier"); 5923 return MatchOperand_ParseFail; 5924 } 5925 5926 CPolSeen |= (CPolOn | CPolOff); 5927 5928 for (unsigned I = 1; I != Operands.size(); ++I) { 5929 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5930 if (Op.isCPol()) { 5931 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5932 return MatchOperand_Success; 5933 } 5934 } 5935 5936 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5937 AMDGPUOperand::ImmTyCPol)); 5938 5939 return MatchOperand_Success; 5940 } 5941 5942 static void addOptionalImmOperand( 5943 MCInst& Inst, const OperandVector& Operands, 5944 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5945 AMDGPUOperand::ImmTy ImmT, 5946 int64_t Default = 0) { 5947 auto i = OptionalIdx.find(ImmT); 5948 if (i != OptionalIdx.end()) { 5949 unsigned Idx = i->second; 5950 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5951 } else { 5952 Inst.addOperand(MCOperand::createImm(Default)); 5953 } 5954 } 5955 5956 OperandMatchResultTy 5957 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5958 StringRef &Value, 5959 SMLoc &StringLoc) { 5960 if (!trySkipId(Prefix, AsmToken::Colon)) 5961 return MatchOperand_NoMatch; 5962 5963 StringLoc = getLoc(); 5964 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5965 : MatchOperand_ParseFail; 5966 } 5967 5968 //===----------------------------------------------------------------------===// 5969 // MTBUF format 5970 //===----------------------------------------------------------------------===// 5971 5972 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5973 int64_t MaxVal, 5974 int64_t &Fmt) { 5975 int64_t Val; 5976 SMLoc Loc = getLoc(); 5977 5978 auto Res = parseIntWithPrefix(Pref, Val); 5979 if (Res == MatchOperand_ParseFail) 5980 return false; 5981 if (Res == MatchOperand_NoMatch) 5982 return true; 5983 5984 if (Val < 0 || Val > MaxVal) { 5985 Error(Loc, Twine("out of range ", StringRef(Pref))); 5986 return false; 5987 } 5988 5989 Fmt = Val; 5990 return true; 5991 } 5992 5993 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5994 // values to live in a joint format operand in the MCInst encoding. 5995 OperandMatchResultTy 5996 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5997 using namespace llvm::AMDGPU::MTBUFFormat; 5998 5999 int64_t Dfmt = DFMT_UNDEF; 6000 int64_t Nfmt = NFMT_UNDEF; 6001 6002 // dfmt and nfmt can appear in either order, and each is optional. 6003 for (int I = 0; I < 2; ++I) { 6004 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 6005 return MatchOperand_ParseFail; 6006 6007 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 6008 return MatchOperand_ParseFail; 6009 } 6010 // Skip optional comma between dfmt/nfmt 6011 // but guard against 2 commas following each other. 6012 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 6013 !peekToken().is(AsmToken::Comma)) { 6014 trySkipToken(AsmToken::Comma); 6015 } 6016 } 6017 6018 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 6019 return MatchOperand_NoMatch; 6020 6021 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6022 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6023 6024 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6025 return MatchOperand_Success; 6026 } 6027 6028 OperandMatchResultTy 6029 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 6030 using namespace llvm::AMDGPU::MTBUFFormat; 6031 6032 int64_t Fmt = UFMT_UNDEF; 6033 6034 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 6035 return MatchOperand_ParseFail; 6036 6037 if (Fmt == UFMT_UNDEF) 6038 return MatchOperand_NoMatch; 6039 6040 Format = Fmt; 6041 return MatchOperand_Success; 6042 } 6043 6044 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6045 int64_t &Nfmt, 6046 StringRef FormatStr, 6047 SMLoc Loc) { 6048 using namespace llvm::AMDGPU::MTBUFFormat; 6049 int64_t Format; 6050 6051 Format = getDfmt(FormatStr); 6052 if (Format != DFMT_UNDEF) { 6053 Dfmt = Format; 6054 return true; 6055 } 6056 6057 Format = getNfmt(FormatStr, getSTI()); 6058 if (Format != NFMT_UNDEF) { 6059 Nfmt = Format; 6060 return true; 6061 } 6062 6063 Error(Loc, "unsupported format"); 6064 return false; 6065 } 6066 6067 OperandMatchResultTy 6068 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6069 SMLoc FormatLoc, 6070 int64_t &Format) { 6071 using namespace llvm::AMDGPU::MTBUFFormat; 6072 6073 int64_t Dfmt = DFMT_UNDEF; 6074 int64_t Nfmt = NFMT_UNDEF; 6075 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6076 return MatchOperand_ParseFail; 6077 6078 if (trySkipToken(AsmToken::Comma)) { 6079 StringRef Str; 6080 SMLoc Loc = getLoc(); 6081 if (!parseId(Str, "expected a format string") || 6082 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 6083 return MatchOperand_ParseFail; 6084 } 6085 if (Dfmt == DFMT_UNDEF) { 6086 Error(Loc, "duplicate numeric format"); 6087 return MatchOperand_ParseFail; 6088 } else if (Nfmt == NFMT_UNDEF) { 6089 Error(Loc, "duplicate data format"); 6090 return MatchOperand_ParseFail; 6091 } 6092 } 6093 6094 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6095 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6096 6097 if (isGFX10Plus()) { 6098 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6099 if (Ufmt == UFMT_UNDEF) { 6100 Error(FormatLoc, "unsupported format"); 6101 return MatchOperand_ParseFail; 6102 } 6103 Format = Ufmt; 6104 } else { 6105 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6106 } 6107 6108 return MatchOperand_Success; 6109 } 6110 6111 OperandMatchResultTy 6112 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6113 SMLoc Loc, 6114 int64_t &Format) { 6115 using namespace llvm::AMDGPU::MTBUFFormat; 6116 6117 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6118 if (Id == UFMT_UNDEF) 6119 return MatchOperand_NoMatch; 6120 6121 if (!isGFX10Plus()) { 6122 Error(Loc, "unified format is not supported on this GPU"); 6123 return MatchOperand_ParseFail; 6124 } 6125 6126 Format = Id; 6127 return MatchOperand_Success; 6128 } 6129 6130 OperandMatchResultTy 6131 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6132 using namespace llvm::AMDGPU::MTBUFFormat; 6133 SMLoc Loc = getLoc(); 6134 6135 if (!parseExpr(Format)) 6136 return MatchOperand_ParseFail; 6137 if (!isValidFormatEncoding(Format, getSTI())) { 6138 Error(Loc, "out of range format"); 6139 return MatchOperand_ParseFail; 6140 } 6141 6142 return MatchOperand_Success; 6143 } 6144 6145 OperandMatchResultTy 6146 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6147 using namespace llvm::AMDGPU::MTBUFFormat; 6148 6149 if (!trySkipId("format", AsmToken::Colon)) 6150 return MatchOperand_NoMatch; 6151 6152 if (trySkipToken(AsmToken::LBrac)) { 6153 StringRef FormatStr; 6154 SMLoc Loc = getLoc(); 6155 if (!parseId(FormatStr, "expected a format string")) 6156 return MatchOperand_ParseFail; 6157 6158 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6159 if (Res == MatchOperand_NoMatch) 6160 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6161 if (Res != MatchOperand_Success) 6162 return Res; 6163 6164 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6165 return MatchOperand_ParseFail; 6166 6167 return MatchOperand_Success; 6168 } 6169 6170 return parseNumericFormat(Format); 6171 } 6172 6173 OperandMatchResultTy 6174 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6175 using namespace llvm::AMDGPU::MTBUFFormat; 6176 6177 int64_t Format = getDefaultFormatEncoding(getSTI()); 6178 OperandMatchResultTy Res; 6179 SMLoc Loc = getLoc(); 6180 6181 // Parse legacy format syntax. 6182 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6183 if (Res == MatchOperand_ParseFail) 6184 return Res; 6185 6186 bool FormatFound = (Res == MatchOperand_Success); 6187 6188 Operands.push_back( 6189 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6190 6191 if (FormatFound) 6192 trySkipToken(AsmToken::Comma); 6193 6194 if (isToken(AsmToken::EndOfStatement)) { 6195 // We are expecting an soffset operand, 6196 // but let matcher handle the error. 6197 return MatchOperand_Success; 6198 } 6199 6200 // Parse soffset. 6201 Res = parseRegOrImm(Operands); 6202 if (Res != MatchOperand_Success) 6203 return Res; 6204 6205 trySkipToken(AsmToken::Comma); 6206 6207 if (!FormatFound) { 6208 Res = parseSymbolicOrNumericFormat(Format); 6209 if (Res == MatchOperand_ParseFail) 6210 return Res; 6211 if (Res == MatchOperand_Success) { 6212 auto Size = Operands.size(); 6213 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6214 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6215 Op.setImm(Format); 6216 } 6217 return MatchOperand_Success; 6218 } 6219 6220 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6221 Error(getLoc(), "duplicate format"); 6222 return MatchOperand_ParseFail; 6223 } 6224 return MatchOperand_Success; 6225 } 6226 6227 //===----------------------------------------------------------------------===// 6228 // ds 6229 //===----------------------------------------------------------------------===// 6230 6231 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6232 const OperandVector &Operands) { 6233 OptionalImmIndexMap OptionalIdx; 6234 6235 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6236 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6237 6238 // Add the register arguments 6239 if (Op.isReg()) { 6240 Op.addRegOperands(Inst, 1); 6241 continue; 6242 } 6243 6244 // Handle optional arguments 6245 OptionalIdx[Op.getImmTy()] = i; 6246 } 6247 6248 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6249 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6250 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6251 6252 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6253 } 6254 6255 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6256 bool IsGdsHardcoded) { 6257 OptionalImmIndexMap OptionalIdx; 6258 6259 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6260 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6261 6262 // Add the register arguments 6263 if (Op.isReg()) { 6264 Op.addRegOperands(Inst, 1); 6265 continue; 6266 } 6267 6268 if (Op.isToken() && Op.getToken() == "gds") { 6269 IsGdsHardcoded = true; 6270 continue; 6271 } 6272 6273 // Handle optional arguments 6274 OptionalIdx[Op.getImmTy()] = i; 6275 } 6276 6277 AMDGPUOperand::ImmTy OffsetType = 6278 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6279 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6280 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6281 AMDGPUOperand::ImmTyOffset; 6282 6283 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6284 6285 if (!IsGdsHardcoded) { 6286 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6287 } 6288 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6289 } 6290 6291 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6292 OptionalImmIndexMap OptionalIdx; 6293 6294 unsigned OperandIdx[4]; 6295 unsigned EnMask = 0; 6296 int SrcIdx = 0; 6297 6298 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6299 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6300 6301 // Add the register arguments 6302 if (Op.isReg()) { 6303 assert(SrcIdx < 4); 6304 OperandIdx[SrcIdx] = Inst.size(); 6305 Op.addRegOperands(Inst, 1); 6306 ++SrcIdx; 6307 continue; 6308 } 6309 6310 if (Op.isOff()) { 6311 assert(SrcIdx < 4); 6312 OperandIdx[SrcIdx] = Inst.size(); 6313 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6314 ++SrcIdx; 6315 continue; 6316 } 6317 6318 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6319 Op.addImmOperands(Inst, 1); 6320 continue; 6321 } 6322 6323 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 6324 continue; 6325 6326 // Handle optional arguments 6327 OptionalIdx[Op.getImmTy()] = i; 6328 } 6329 6330 assert(SrcIdx == 4); 6331 6332 bool Compr = false; 6333 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6334 Compr = true; 6335 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6336 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6337 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6338 } 6339 6340 for (auto i = 0; i < SrcIdx; ++i) { 6341 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6342 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6343 } 6344 } 6345 6346 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6347 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6348 6349 Inst.addOperand(MCOperand::createImm(EnMask)); 6350 } 6351 6352 //===----------------------------------------------------------------------===// 6353 // s_waitcnt 6354 //===----------------------------------------------------------------------===// 6355 6356 static bool 6357 encodeCnt( 6358 const AMDGPU::IsaVersion ISA, 6359 int64_t &IntVal, 6360 int64_t CntVal, 6361 bool Saturate, 6362 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6363 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6364 { 6365 bool Failed = false; 6366 6367 IntVal = encode(ISA, IntVal, CntVal); 6368 if (CntVal != decode(ISA, IntVal)) { 6369 if (Saturate) { 6370 IntVal = encode(ISA, IntVal, -1); 6371 } else { 6372 Failed = true; 6373 } 6374 } 6375 return Failed; 6376 } 6377 6378 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6379 6380 SMLoc CntLoc = getLoc(); 6381 StringRef CntName = getTokenStr(); 6382 6383 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6384 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6385 return false; 6386 6387 int64_t CntVal; 6388 SMLoc ValLoc = getLoc(); 6389 if (!parseExpr(CntVal)) 6390 return false; 6391 6392 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6393 6394 bool Failed = true; 6395 bool Sat = CntName.endswith("_sat"); 6396 6397 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6398 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6399 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6400 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6401 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6402 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6403 } else { 6404 Error(CntLoc, "invalid counter name " + CntName); 6405 return false; 6406 } 6407 6408 if (Failed) { 6409 Error(ValLoc, "too large value for " + CntName); 6410 return false; 6411 } 6412 6413 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6414 return false; 6415 6416 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6417 if (isToken(AsmToken::EndOfStatement)) { 6418 Error(getLoc(), "expected a counter name"); 6419 return false; 6420 } 6421 } 6422 6423 return true; 6424 } 6425 6426 OperandMatchResultTy 6427 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6428 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6429 int64_t Waitcnt = getWaitcntBitMask(ISA); 6430 SMLoc S = getLoc(); 6431 6432 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6433 while (!isToken(AsmToken::EndOfStatement)) { 6434 if (!parseCnt(Waitcnt)) 6435 return MatchOperand_ParseFail; 6436 } 6437 } else { 6438 if (!parseExpr(Waitcnt)) 6439 return MatchOperand_ParseFail; 6440 } 6441 6442 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6443 return MatchOperand_Success; 6444 } 6445 6446 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6447 SMLoc FieldLoc = getLoc(); 6448 StringRef FieldName = getTokenStr(); 6449 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6450 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6451 return false; 6452 6453 SMLoc ValueLoc = getLoc(); 6454 StringRef ValueName = getTokenStr(); 6455 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6456 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6457 return false; 6458 6459 unsigned Shift; 6460 if (FieldName == "instid0") { 6461 Shift = 0; 6462 } else if (FieldName == "instskip") { 6463 Shift = 4; 6464 } else if (FieldName == "instid1") { 6465 Shift = 7; 6466 } else { 6467 Error(FieldLoc, "invalid field name " + FieldName); 6468 return false; 6469 } 6470 6471 int Value; 6472 if (Shift == 4) { 6473 // Parse values for instskip. 6474 Value = StringSwitch<int>(ValueName) 6475 .Case("SAME", 0) 6476 .Case("NEXT", 1) 6477 .Case("SKIP_1", 2) 6478 .Case("SKIP_2", 3) 6479 .Case("SKIP_3", 4) 6480 .Case("SKIP_4", 5) 6481 .Default(-1); 6482 } else { 6483 // Parse values for instid0 and instid1. 6484 Value = StringSwitch<int>(ValueName) 6485 .Case("NO_DEP", 0) 6486 .Case("VALU_DEP_1", 1) 6487 .Case("VALU_DEP_2", 2) 6488 .Case("VALU_DEP_3", 3) 6489 .Case("VALU_DEP_4", 4) 6490 .Case("TRANS32_DEP_1", 5) 6491 .Case("TRANS32_DEP_2", 6) 6492 .Case("TRANS32_DEP_3", 7) 6493 .Case("FMA_ACCUM_CYCLE_1", 8) 6494 .Case("SALU_CYCLE_1", 9) 6495 .Case("SALU_CYCLE_2", 10) 6496 .Case("SALU_CYCLE_3", 11) 6497 .Default(-1); 6498 } 6499 if (Value < 0) { 6500 Error(ValueLoc, "invalid value name " + ValueName); 6501 return false; 6502 } 6503 6504 Delay |= Value << Shift; 6505 return true; 6506 } 6507 6508 OperandMatchResultTy 6509 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) { 6510 int64_t Delay = 0; 6511 SMLoc S = getLoc(); 6512 6513 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6514 do { 6515 if (!parseDelay(Delay)) 6516 return MatchOperand_ParseFail; 6517 } while (trySkipToken(AsmToken::Pipe)); 6518 } else { 6519 if (!parseExpr(Delay)) 6520 return MatchOperand_ParseFail; 6521 } 6522 6523 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6524 return MatchOperand_Success; 6525 } 6526 6527 bool 6528 AMDGPUOperand::isSWaitCnt() const { 6529 return isImm(); 6530 } 6531 6532 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); } 6533 6534 //===----------------------------------------------------------------------===// 6535 // DepCtr 6536 //===----------------------------------------------------------------------===// 6537 6538 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6539 StringRef DepCtrName) { 6540 switch (ErrorId) { 6541 case OPR_ID_UNKNOWN: 6542 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6543 return; 6544 case OPR_ID_UNSUPPORTED: 6545 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6546 return; 6547 case OPR_ID_DUPLICATE: 6548 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6549 return; 6550 case OPR_VAL_INVALID: 6551 Error(Loc, Twine("invalid value for ", DepCtrName)); 6552 return; 6553 default: 6554 assert(false); 6555 } 6556 } 6557 6558 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6559 6560 using namespace llvm::AMDGPU::DepCtr; 6561 6562 SMLoc DepCtrLoc = getLoc(); 6563 StringRef DepCtrName = getTokenStr(); 6564 6565 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6566 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6567 return false; 6568 6569 int64_t ExprVal; 6570 if (!parseExpr(ExprVal)) 6571 return false; 6572 6573 unsigned PrevOprMask = UsedOprMask; 6574 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6575 6576 if (CntVal < 0) { 6577 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6578 return false; 6579 } 6580 6581 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6582 return false; 6583 6584 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6585 if (isToken(AsmToken::EndOfStatement)) { 6586 Error(getLoc(), "expected a counter name"); 6587 return false; 6588 } 6589 } 6590 6591 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6592 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6593 return true; 6594 } 6595 6596 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6597 using namespace llvm::AMDGPU::DepCtr; 6598 6599 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6600 SMLoc Loc = getLoc(); 6601 6602 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6603 unsigned UsedOprMask = 0; 6604 while (!isToken(AsmToken::EndOfStatement)) { 6605 if (!parseDepCtr(DepCtr, UsedOprMask)) 6606 return MatchOperand_ParseFail; 6607 } 6608 } else { 6609 if (!parseExpr(DepCtr)) 6610 return MatchOperand_ParseFail; 6611 } 6612 6613 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6614 return MatchOperand_Success; 6615 } 6616 6617 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6618 6619 //===----------------------------------------------------------------------===// 6620 // hwreg 6621 //===----------------------------------------------------------------------===// 6622 6623 bool 6624 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6625 OperandInfoTy &Offset, 6626 OperandInfoTy &Width) { 6627 using namespace llvm::AMDGPU::Hwreg; 6628 6629 // The register may be specified by name or using a numeric code 6630 HwReg.Loc = getLoc(); 6631 if (isToken(AsmToken::Identifier) && 6632 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6633 HwReg.IsSymbolic = true; 6634 lex(); // skip register name 6635 } else if (!parseExpr(HwReg.Id, "a register name")) { 6636 return false; 6637 } 6638 6639 if (trySkipToken(AsmToken::RParen)) 6640 return true; 6641 6642 // parse optional params 6643 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6644 return false; 6645 6646 Offset.Loc = getLoc(); 6647 if (!parseExpr(Offset.Id)) 6648 return false; 6649 6650 if (!skipToken(AsmToken::Comma, "expected a comma")) 6651 return false; 6652 6653 Width.Loc = getLoc(); 6654 return parseExpr(Width.Id) && 6655 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6656 } 6657 6658 bool 6659 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6660 const OperandInfoTy &Offset, 6661 const OperandInfoTy &Width) { 6662 6663 using namespace llvm::AMDGPU::Hwreg; 6664 6665 if (HwReg.IsSymbolic) { 6666 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6667 Error(HwReg.Loc, 6668 "specified hardware register is not supported on this GPU"); 6669 return false; 6670 } 6671 } else { 6672 if (!isValidHwreg(HwReg.Id)) { 6673 Error(HwReg.Loc, 6674 "invalid code of hardware register: only 6-bit values are legal"); 6675 return false; 6676 } 6677 } 6678 if (!isValidHwregOffset(Offset.Id)) { 6679 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6680 return false; 6681 } 6682 if (!isValidHwregWidth(Width.Id)) { 6683 Error(Width.Loc, 6684 "invalid bitfield width: only values from 1 to 32 are legal"); 6685 return false; 6686 } 6687 return true; 6688 } 6689 6690 OperandMatchResultTy 6691 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6692 using namespace llvm::AMDGPU::Hwreg; 6693 6694 int64_t ImmVal = 0; 6695 SMLoc Loc = getLoc(); 6696 6697 if (trySkipId("hwreg", AsmToken::LParen)) { 6698 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6699 OperandInfoTy Offset(OFFSET_DEFAULT_); 6700 OperandInfoTy Width(WIDTH_DEFAULT_); 6701 if (parseHwregBody(HwReg, Offset, Width) && 6702 validateHwreg(HwReg, Offset, Width)) { 6703 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6704 } else { 6705 return MatchOperand_ParseFail; 6706 } 6707 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6708 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6709 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6710 return MatchOperand_ParseFail; 6711 } 6712 } else { 6713 return MatchOperand_ParseFail; 6714 } 6715 6716 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6717 return MatchOperand_Success; 6718 } 6719 6720 bool AMDGPUOperand::isHwreg() const { 6721 return isImmTy(ImmTyHwreg); 6722 } 6723 6724 //===----------------------------------------------------------------------===// 6725 // sendmsg 6726 //===----------------------------------------------------------------------===// 6727 6728 bool 6729 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6730 OperandInfoTy &Op, 6731 OperandInfoTy &Stream) { 6732 using namespace llvm::AMDGPU::SendMsg; 6733 6734 Msg.Loc = getLoc(); 6735 if (isToken(AsmToken::Identifier) && 6736 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6737 Msg.IsSymbolic = true; 6738 lex(); // skip message name 6739 } else if (!parseExpr(Msg.Id, "a message name")) { 6740 return false; 6741 } 6742 6743 if (trySkipToken(AsmToken::Comma)) { 6744 Op.IsDefined = true; 6745 Op.Loc = getLoc(); 6746 if (isToken(AsmToken::Identifier) && 6747 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6748 lex(); // skip operation name 6749 } else if (!parseExpr(Op.Id, "an operation name")) { 6750 return false; 6751 } 6752 6753 if (trySkipToken(AsmToken::Comma)) { 6754 Stream.IsDefined = true; 6755 Stream.Loc = getLoc(); 6756 if (!parseExpr(Stream.Id)) 6757 return false; 6758 } 6759 } 6760 6761 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6762 } 6763 6764 bool 6765 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6766 const OperandInfoTy &Op, 6767 const OperandInfoTy &Stream) { 6768 using namespace llvm::AMDGPU::SendMsg; 6769 6770 // Validation strictness depends on whether message is specified 6771 // in a symbolic or in a numeric form. In the latter case 6772 // only encoding possibility is checked. 6773 bool Strict = Msg.IsSymbolic; 6774 6775 if (Strict) { 6776 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6777 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6778 return false; 6779 } 6780 } else { 6781 if (!isValidMsgId(Msg.Id, getSTI())) { 6782 Error(Msg.Loc, "invalid message id"); 6783 return false; 6784 } 6785 } 6786 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 6787 if (Op.IsDefined) { 6788 Error(Op.Loc, "message does not support operations"); 6789 } else { 6790 Error(Msg.Loc, "missing message operation"); 6791 } 6792 return false; 6793 } 6794 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6795 Error(Op.Loc, "invalid operation id"); 6796 return false; 6797 } 6798 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 6799 Stream.IsDefined) { 6800 Error(Stream.Loc, "message operation does not support streams"); 6801 return false; 6802 } 6803 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6804 Error(Stream.Loc, "invalid message stream id"); 6805 return false; 6806 } 6807 return true; 6808 } 6809 6810 OperandMatchResultTy 6811 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6812 using namespace llvm::AMDGPU::SendMsg; 6813 6814 int64_t ImmVal = 0; 6815 SMLoc Loc = getLoc(); 6816 6817 if (trySkipId("sendmsg", AsmToken::LParen)) { 6818 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6819 OperandInfoTy Op(OP_NONE_); 6820 OperandInfoTy Stream(STREAM_ID_NONE_); 6821 if (parseSendMsgBody(Msg, Op, Stream) && 6822 validateSendMsg(Msg, Op, Stream)) { 6823 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6824 } else { 6825 return MatchOperand_ParseFail; 6826 } 6827 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6828 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6829 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6830 return MatchOperand_ParseFail; 6831 } 6832 } else { 6833 return MatchOperand_ParseFail; 6834 } 6835 6836 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6837 return MatchOperand_Success; 6838 } 6839 6840 bool AMDGPUOperand::isSendMsg() const { 6841 return isImmTy(ImmTySendMsg); 6842 } 6843 6844 //===----------------------------------------------------------------------===// 6845 // v_interp 6846 //===----------------------------------------------------------------------===// 6847 6848 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6849 StringRef Str; 6850 SMLoc S = getLoc(); 6851 6852 if (!parseId(Str)) 6853 return MatchOperand_NoMatch; 6854 6855 int Slot = StringSwitch<int>(Str) 6856 .Case("p10", 0) 6857 .Case("p20", 1) 6858 .Case("p0", 2) 6859 .Default(-1); 6860 6861 if (Slot == -1) { 6862 Error(S, "invalid interpolation slot"); 6863 return MatchOperand_ParseFail; 6864 } 6865 6866 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6867 AMDGPUOperand::ImmTyInterpSlot)); 6868 return MatchOperand_Success; 6869 } 6870 6871 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6872 StringRef Str; 6873 SMLoc S = getLoc(); 6874 6875 if (!parseId(Str)) 6876 return MatchOperand_NoMatch; 6877 6878 if (!Str.startswith("attr")) { 6879 Error(S, "invalid interpolation attribute"); 6880 return MatchOperand_ParseFail; 6881 } 6882 6883 StringRef Chan = Str.take_back(2); 6884 int AttrChan = StringSwitch<int>(Chan) 6885 .Case(".x", 0) 6886 .Case(".y", 1) 6887 .Case(".z", 2) 6888 .Case(".w", 3) 6889 .Default(-1); 6890 if (AttrChan == -1) { 6891 Error(S, "invalid or missing interpolation attribute channel"); 6892 return MatchOperand_ParseFail; 6893 } 6894 6895 Str = Str.drop_back(2).drop_front(4); 6896 6897 uint8_t Attr; 6898 if (Str.getAsInteger(10, Attr)) { 6899 Error(S, "invalid or missing interpolation attribute number"); 6900 return MatchOperand_ParseFail; 6901 } 6902 6903 if (Attr > 63) { 6904 Error(S, "out of bounds interpolation attribute number"); 6905 return MatchOperand_ParseFail; 6906 } 6907 6908 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6909 6910 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6911 AMDGPUOperand::ImmTyInterpAttr)); 6912 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6913 AMDGPUOperand::ImmTyAttrChan)); 6914 return MatchOperand_Success; 6915 } 6916 6917 //===----------------------------------------------------------------------===// 6918 // exp 6919 //===----------------------------------------------------------------------===// 6920 6921 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6922 using namespace llvm::AMDGPU::Exp; 6923 6924 StringRef Str; 6925 SMLoc S = getLoc(); 6926 6927 if (!parseId(Str)) 6928 return MatchOperand_NoMatch; 6929 6930 unsigned Id = getTgtId(Str); 6931 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6932 Error(S, (Id == ET_INVALID) ? 6933 "invalid exp target" : 6934 "exp target is not supported on this GPU"); 6935 return MatchOperand_ParseFail; 6936 } 6937 6938 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6939 AMDGPUOperand::ImmTyExpTgt)); 6940 return MatchOperand_Success; 6941 } 6942 6943 //===----------------------------------------------------------------------===// 6944 // parser helpers 6945 //===----------------------------------------------------------------------===// 6946 6947 bool 6948 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6949 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6950 } 6951 6952 bool 6953 AMDGPUAsmParser::isId(const StringRef Id) const { 6954 return isId(getToken(), Id); 6955 } 6956 6957 bool 6958 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6959 return getTokenKind() == Kind; 6960 } 6961 6962 bool 6963 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6964 if (isId(Id)) { 6965 lex(); 6966 return true; 6967 } 6968 return false; 6969 } 6970 6971 bool 6972 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6973 if (isToken(AsmToken::Identifier)) { 6974 StringRef Tok = getTokenStr(); 6975 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6976 lex(); 6977 return true; 6978 } 6979 } 6980 return false; 6981 } 6982 6983 bool 6984 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6985 if (isId(Id) && peekToken().is(Kind)) { 6986 lex(); 6987 lex(); 6988 return true; 6989 } 6990 return false; 6991 } 6992 6993 bool 6994 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6995 if (isToken(Kind)) { 6996 lex(); 6997 return true; 6998 } 6999 return false; 7000 } 7001 7002 bool 7003 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 7004 const StringRef ErrMsg) { 7005 if (!trySkipToken(Kind)) { 7006 Error(getLoc(), ErrMsg); 7007 return false; 7008 } 7009 return true; 7010 } 7011 7012 bool 7013 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 7014 SMLoc S = getLoc(); 7015 7016 const MCExpr *Expr; 7017 if (Parser.parseExpression(Expr)) 7018 return false; 7019 7020 if (Expr->evaluateAsAbsolute(Imm)) 7021 return true; 7022 7023 if (Expected.empty()) { 7024 Error(S, "expected absolute expression"); 7025 } else { 7026 Error(S, Twine("expected ", Expected) + 7027 Twine(" or an absolute expression")); 7028 } 7029 return false; 7030 } 7031 7032 bool 7033 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 7034 SMLoc S = getLoc(); 7035 7036 const MCExpr *Expr; 7037 if (Parser.parseExpression(Expr)) 7038 return false; 7039 7040 int64_t IntVal; 7041 if (Expr->evaluateAsAbsolute(IntVal)) { 7042 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7043 } else { 7044 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7045 } 7046 return true; 7047 } 7048 7049 bool 7050 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7051 if (isToken(AsmToken::String)) { 7052 Val = getToken().getStringContents(); 7053 lex(); 7054 return true; 7055 } else { 7056 Error(getLoc(), ErrMsg); 7057 return false; 7058 } 7059 } 7060 7061 bool 7062 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7063 if (isToken(AsmToken::Identifier)) { 7064 Val = getTokenStr(); 7065 lex(); 7066 return true; 7067 } else { 7068 if (!ErrMsg.empty()) 7069 Error(getLoc(), ErrMsg); 7070 return false; 7071 } 7072 } 7073 7074 AsmToken 7075 AMDGPUAsmParser::getToken() const { 7076 return Parser.getTok(); 7077 } 7078 7079 AsmToken 7080 AMDGPUAsmParser::peekToken() { 7081 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 7082 } 7083 7084 void 7085 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7086 auto TokCount = getLexer().peekTokens(Tokens); 7087 7088 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7089 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7090 } 7091 7092 AsmToken::TokenKind 7093 AMDGPUAsmParser::getTokenKind() const { 7094 return getLexer().getKind(); 7095 } 7096 7097 SMLoc 7098 AMDGPUAsmParser::getLoc() const { 7099 return getToken().getLoc(); 7100 } 7101 7102 StringRef 7103 AMDGPUAsmParser::getTokenStr() const { 7104 return getToken().getString(); 7105 } 7106 7107 void 7108 AMDGPUAsmParser::lex() { 7109 Parser.Lex(); 7110 } 7111 7112 SMLoc 7113 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7114 const OperandVector &Operands) const { 7115 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7116 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7117 if (Test(Op)) 7118 return Op.getStartLoc(); 7119 } 7120 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7121 } 7122 7123 SMLoc 7124 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7125 const OperandVector &Operands) const { 7126 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7127 return getOperandLoc(Test, Operands); 7128 } 7129 7130 SMLoc 7131 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7132 const OperandVector &Operands) const { 7133 auto Test = [=](const AMDGPUOperand& Op) { 7134 return Op.isRegKind() && Op.getReg() == Reg; 7135 }; 7136 return getOperandLoc(Test, Operands); 7137 } 7138 7139 SMLoc 7140 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 7141 auto Test = [](const AMDGPUOperand& Op) { 7142 return Op.IsImmKindLiteral() || Op.isExpr(); 7143 }; 7144 return getOperandLoc(Test, Operands); 7145 } 7146 7147 SMLoc 7148 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7149 auto Test = [](const AMDGPUOperand& Op) { 7150 return Op.isImmKindConst(); 7151 }; 7152 return getOperandLoc(Test, Operands); 7153 } 7154 7155 //===----------------------------------------------------------------------===// 7156 // swizzle 7157 //===----------------------------------------------------------------------===// 7158 7159 LLVM_READNONE 7160 static unsigned 7161 encodeBitmaskPerm(const unsigned AndMask, 7162 const unsigned OrMask, 7163 const unsigned XorMask) { 7164 using namespace llvm::AMDGPU::Swizzle; 7165 7166 return BITMASK_PERM_ENC | 7167 (AndMask << BITMASK_AND_SHIFT) | 7168 (OrMask << BITMASK_OR_SHIFT) | 7169 (XorMask << BITMASK_XOR_SHIFT); 7170 } 7171 7172 bool 7173 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7174 const unsigned MinVal, 7175 const unsigned MaxVal, 7176 const StringRef ErrMsg, 7177 SMLoc &Loc) { 7178 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7179 return false; 7180 } 7181 Loc = getLoc(); 7182 if (!parseExpr(Op)) { 7183 return false; 7184 } 7185 if (Op < MinVal || Op > MaxVal) { 7186 Error(Loc, ErrMsg); 7187 return false; 7188 } 7189 7190 return true; 7191 } 7192 7193 bool 7194 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7195 const unsigned MinVal, 7196 const unsigned MaxVal, 7197 const StringRef ErrMsg) { 7198 SMLoc Loc; 7199 for (unsigned i = 0; i < OpNum; ++i) { 7200 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7201 return false; 7202 } 7203 7204 return true; 7205 } 7206 7207 bool 7208 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7209 using namespace llvm::AMDGPU::Swizzle; 7210 7211 int64_t Lane[LANE_NUM]; 7212 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7213 "expected a 2-bit lane id")) { 7214 Imm = QUAD_PERM_ENC; 7215 for (unsigned I = 0; I < LANE_NUM; ++I) { 7216 Imm |= Lane[I] << (LANE_SHIFT * I); 7217 } 7218 return true; 7219 } 7220 return false; 7221 } 7222 7223 bool 7224 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7225 using namespace llvm::AMDGPU::Swizzle; 7226 7227 SMLoc Loc; 7228 int64_t GroupSize; 7229 int64_t LaneIdx; 7230 7231 if (!parseSwizzleOperand(GroupSize, 7232 2, 32, 7233 "group size must be in the interval [2,32]", 7234 Loc)) { 7235 return false; 7236 } 7237 if (!isPowerOf2_64(GroupSize)) { 7238 Error(Loc, "group size must be a power of two"); 7239 return false; 7240 } 7241 if (parseSwizzleOperand(LaneIdx, 7242 0, GroupSize - 1, 7243 "lane id must be in the interval [0,group size - 1]", 7244 Loc)) { 7245 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7246 return true; 7247 } 7248 return false; 7249 } 7250 7251 bool 7252 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7253 using namespace llvm::AMDGPU::Swizzle; 7254 7255 SMLoc Loc; 7256 int64_t GroupSize; 7257 7258 if (!parseSwizzleOperand(GroupSize, 7259 2, 32, 7260 "group size must be in the interval [2,32]", 7261 Loc)) { 7262 return false; 7263 } 7264 if (!isPowerOf2_64(GroupSize)) { 7265 Error(Loc, "group size must be a power of two"); 7266 return false; 7267 } 7268 7269 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7270 return true; 7271 } 7272 7273 bool 7274 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7275 using namespace llvm::AMDGPU::Swizzle; 7276 7277 SMLoc Loc; 7278 int64_t GroupSize; 7279 7280 if (!parseSwizzleOperand(GroupSize, 7281 1, 16, 7282 "group size must be in the interval [1,16]", 7283 Loc)) { 7284 return false; 7285 } 7286 if (!isPowerOf2_64(GroupSize)) { 7287 Error(Loc, "group size must be a power of two"); 7288 return false; 7289 } 7290 7291 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7292 return true; 7293 } 7294 7295 bool 7296 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7297 using namespace llvm::AMDGPU::Swizzle; 7298 7299 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7300 return false; 7301 } 7302 7303 StringRef Ctl; 7304 SMLoc StrLoc = getLoc(); 7305 if (!parseString(Ctl)) { 7306 return false; 7307 } 7308 if (Ctl.size() != BITMASK_WIDTH) { 7309 Error(StrLoc, "expected a 5-character mask"); 7310 return false; 7311 } 7312 7313 unsigned AndMask = 0; 7314 unsigned OrMask = 0; 7315 unsigned XorMask = 0; 7316 7317 for (size_t i = 0; i < Ctl.size(); ++i) { 7318 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7319 switch(Ctl[i]) { 7320 default: 7321 Error(StrLoc, "invalid mask"); 7322 return false; 7323 case '0': 7324 break; 7325 case '1': 7326 OrMask |= Mask; 7327 break; 7328 case 'p': 7329 AndMask |= Mask; 7330 break; 7331 case 'i': 7332 AndMask |= Mask; 7333 XorMask |= Mask; 7334 break; 7335 } 7336 } 7337 7338 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7339 return true; 7340 } 7341 7342 bool 7343 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7344 7345 SMLoc OffsetLoc = getLoc(); 7346 7347 if (!parseExpr(Imm, "a swizzle macro")) { 7348 return false; 7349 } 7350 if (!isUInt<16>(Imm)) { 7351 Error(OffsetLoc, "expected a 16-bit offset"); 7352 return false; 7353 } 7354 return true; 7355 } 7356 7357 bool 7358 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7359 using namespace llvm::AMDGPU::Swizzle; 7360 7361 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7362 7363 SMLoc ModeLoc = getLoc(); 7364 bool Ok = false; 7365 7366 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7367 Ok = parseSwizzleQuadPerm(Imm); 7368 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7369 Ok = parseSwizzleBitmaskPerm(Imm); 7370 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7371 Ok = parseSwizzleBroadcast(Imm); 7372 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7373 Ok = parseSwizzleSwap(Imm); 7374 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7375 Ok = parseSwizzleReverse(Imm); 7376 } else { 7377 Error(ModeLoc, "expected a swizzle mode"); 7378 } 7379 7380 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7381 } 7382 7383 return false; 7384 } 7385 7386 OperandMatchResultTy 7387 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7388 SMLoc S = getLoc(); 7389 int64_t Imm = 0; 7390 7391 if (trySkipId("offset")) { 7392 7393 bool Ok = false; 7394 if (skipToken(AsmToken::Colon, "expected a colon")) { 7395 if (trySkipId("swizzle")) { 7396 Ok = parseSwizzleMacro(Imm); 7397 } else { 7398 Ok = parseSwizzleOffset(Imm); 7399 } 7400 } 7401 7402 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7403 7404 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7405 } else { 7406 // Swizzle "offset" operand is optional. 7407 // If it is omitted, try parsing other optional operands. 7408 return parseOptionalOpr(Operands); 7409 } 7410 } 7411 7412 bool 7413 AMDGPUOperand::isSwizzle() const { 7414 return isImmTy(ImmTySwizzle); 7415 } 7416 7417 //===----------------------------------------------------------------------===// 7418 // VGPR Index Mode 7419 //===----------------------------------------------------------------------===// 7420 7421 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7422 7423 using namespace llvm::AMDGPU::VGPRIndexMode; 7424 7425 if (trySkipToken(AsmToken::RParen)) { 7426 return OFF; 7427 } 7428 7429 int64_t Imm = 0; 7430 7431 while (true) { 7432 unsigned Mode = 0; 7433 SMLoc S = getLoc(); 7434 7435 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7436 if (trySkipId(IdSymbolic[ModeId])) { 7437 Mode = 1 << ModeId; 7438 break; 7439 } 7440 } 7441 7442 if (Mode == 0) { 7443 Error(S, (Imm == 0)? 7444 "expected a VGPR index mode or a closing parenthesis" : 7445 "expected a VGPR index mode"); 7446 return UNDEF; 7447 } 7448 7449 if (Imm & Mode) { 7450 Error(S, "duplicate VGPR index mode"); 7451 return UNDEF; 7452 } 7453 Imm |= Mode; 7454 7455 if (trySkipToken(AsmToken::RParen)) 7456 break; 7457 if (!skipToken(AsmToken::Comma, 7458 "expected a comma or a closing parenthesis")) 7459 return UNDEF; 7460 } 7461 7462 return Imm; 7463 } 7464 7465 OperandMatchResultTy 7466 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7467 7468 using namespace llvm::AMDGPU::VGPRIndexMode; 7469 7470 int64_t Imm = 0; 7471 SMLoc S = getLoc(); 7472 7473 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7474 Imm = parseGPRIdxMacro(); 7475 if (Imm == UNDEF) 7476 return MatchOperand_ParseFail; 7477 } else { 7478 if (getParser().parseAbsoluteExpression(Imm)) 7479 return MatchOperand_ParseFail; 7480 if (Imm < 0 || !isUInt<4>(Imm)) { 7481 Error(S, "invalid immediate: only 4-bit values are legal"); 7482 return MatchOperand_ParseFail; 7483 } 7484 } 7485 7486 Operands.push_back( 7487 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7488 return MatchOperand_Success; 7489 } 7490 7491 bool AMDGPUOperand::isGPRIdxMode() const { 7492 return isImmTy(ImmTyGprIdxMode); 7493 } 7494 7495 //===----------------------------------------------------------------------===// 7496 // sopp branch targets 7497 //===----------------------------------------------------------------------===// 7498 7499 OperandMatchResultTy 7500 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7501 7502 // Make sure we are not parsing something 7503 // that looks like a label or an expression but is not. 7504 // This will improve error messages. 7505 if (isRegister() || isModifier()) 7506 return MatchOperand_NoMatch; 7507 7508 if (!parseExpr(Operands)) 7509 return MatchOperand_ParseFail; 7510 7511 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7512 assert(Opr.isImm() || Opr.isExpr()); 7513 SMLoc Loc = Opr.getStartLoc(); 7514 7515 // Currently we do not support arbitrary expressions as branch targets. 7516 // Only labels and absolute expressions are accepted. 7517 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7518 Error(Loc, "expected an absolute expression or a label"); 7519 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7520 Error(Loc, "expected a 16-bit signed jump offset"); 7521 } 7522 7523 return MatchOperand_Success; 7524 } 7525 7526 //===----------------------------------------------------------------------===// 7527 // Boolean holding registers 7528 //===----------------------------------------------------------------------===// 7529 7530 OperandMatchResultTy 7531 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7532 return parseReg(Operands); 7533 } 7534 7535 //===----------------------------------------------------------------------===// 7536 // mubuf 7537 //===----------------------------------------------------------------------===// 7538 7539 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7540 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7541 } 7542 7543 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7544 const OperandVector &Operands, 7545 bool IsAtomic, 7546 bool IsLds) { 7547 OptionalImmIndexMap OptionalIdx; 7548 unsigned FirstOperandIdx = 1; 7549 bool IsAtomicReturn = false; 7550 7551 if (IsAtomic) { 7552 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7553 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7554 if (!Op.isCPol()) 7555 continue; 7556 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7557 break; 7558 } 7559 7560 if (!IsAtomicReturn) { 7561 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7562 if (NewOpc != -1) 7563 Inst.setOpcode(NewOpc); 7564 } 7565 7566 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7567 SIInstrFlags::IsAtomicRet; 7568 } 7569 7570 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7571 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7572 7573 // Add the register arguments 7574 if (Op.isReg()) { 7575 Op.addRegOperands(Inst, 1); 7576 // Insert a tied src for atomic return dst. 7577 // This cannot be postponed as subsequent calls to 7578 // addImmOperands rely on correct number of MC operands. 7579 if (IsAtomicReturn && i == FirstOperandIdx) 7580 Op.addRegOperands(Inst, 1); 7581 continue; 7582 } 7583 7584 // Handle the case where soffset is an immediate 7585 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7586 Op.addImmOperands(Inst, 1); 7587 continue; 7588 } 7589 7590 // Handle tokens like 'offen' which are sometimes hard-coded into the 7591 // asm string. There are no MCInst operands for these. 7592 if (Op.isToken()) { 7593 continue; 7594 } 7595 assert(Op.isImm()); 7596 7597 // Handle optional arguments 7598 OptionalIdx[Op.getImmTy()] = i; 7599 } 7600 7601 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7602 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7603 7604 if (!IsLds) { // tfe is not legal with lds opcodes 7605 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7606 } 7607 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7608 } 7609 7610 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7611 OptionalImmIndexMap OptionalIdx; 7612 7613 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7614 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7615 7616 // Add the register arguments 7617 if (Op.isReg()) { 7618 Op.addRegOperands(Inst, 1); 7619 continue; 7620 } 7621 7622 // Handle the case where soffset is an immediate 7623 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7624 Op.addImmOperands(Inst, 1); 7625 continue; 7626 } 7627 7628 // Handle tokens like 'offen' which are sometimes hard-coded into the 7629 // asm string. There are no MCInst operands for these. 7630 if (Op.isToken()) { 7631 continue; 7632 } 7633 assert(Op.isImm()); 7634 7635 // Handle optional arguments 7636 OptionalIdx[Op.getImmTy()] = i; 7637 } 7638 7639 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7640 AMDGPUOperand::ImmTyOffset); 7641 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7642 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7643 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7644 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7645 } 7646 7647 //===----------------------------------------------------------------------===// 7648 // mimg 7649 //===----------------------------------------------------------------------===// 7650 7651 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7652 bool IsAtomic) { 7653 unsigned I = 1; 7654 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7655 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7656 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7657 } 7658 7659 if (IsAtomic) { 7660 // Add src, same as dst 7661 assert(Desc.getNumDefs() == 1); 7662 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7663 } 7664 7665 OptionalImmIndexMap OptionalIdx; 7666 7667 for (unsigned E = Operands.size(); I != E; ++I) { 7668 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7669 7670 // Add the register arguments 7671 if (Op.isReg()) { 7672 Op.addRegOperands(Inst, 1); 7673 } else if (Op.isImmModifier()) { 7674 OptionalIdx[Op.getImmTy()] = I; 7675 } else if (!Op.isToken()) { 7676 llvm_unreachable("unexpected operand type"); 7677 } 7678 } 7679 7680 bool IsGFX10Plus = isGFX10Plus(); 7681 7682 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7683 if (IsGFX10Plus) 7684 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7685 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7686 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7687 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7688 if (IsGFX10Plus) 7689 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7690 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7691 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7692 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7693 if (!IsGFX10Plus) 7694 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7695 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7696 } 7697 7698 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7699 cvtMIMG(Inst, Operands, true); 7700 } 7701 7702 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7703 OptionalImmIndexMap OptionalIdx; 7704 bool IsAtomicReturn = false; 7705 7706 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7707 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7708 if (!Op.isCPol()) 7709 continue; 7710 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7711 break; 7712 } 7713 7714 if (!IsAtomicReturn) { 7715 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7716 if (NewOpc != -1) 7717 Inst.setOpcode(NewOpc); 7718 } 7719 7720 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7721 SIInstrFlags::IsAtomicRet; 7722 7723 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7724 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7725 7726 // Add the register arguments 7727 if (Op.isReg()) { 7728 Op.addRegOperands(Inst, 1); 7729 if (IsAtomicReturn && i == 1) 7730 Op.addRegOperands(Inst, 1); 7731 continue; 7732 } 7733 7734 // Handle the case where soffset is an immediate 7735 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7736 Op.addImmOperands(Inst, 1); 7737 continue; 7738 } 7739 7740 // Handle tokens like 'offen' which are sometimes hard-coded into the 7741 // asm string. There are no MCInst operands for these. 7742 if (Op.isToken()) { 7743 continue; 7744 } 7745 assert(Op.isImm()); 7746 7747 // Handle optional arguments 7748 OptionalIdx[Op.getImmTy()] = i; 7749 } 7750 7751 if ((int)Inst.getNumOperands() <= 7752 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7753 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7754 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7755 } 7756 7757 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7758 const OperandVector &Operands) { 7759 for (unsigned I = 1; I < Operands.size(); ++I) { 7760 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7761 if (Operand.isReg()) 7762 Operand.addRegOperands(Inst, 1); 7763 } 7764 7765 Inst.addOperand(MCOperand::createImm(1)); // a16 7766 } 7767 7768 //===----------------------------------------------------------------------===// 7769 // smrd 7770 //===----------------------------------------------------------------------===// 7771 7772 bool AMDGPUOperand::isSMRDOffset8() const { 7773 return isImm() && isUInt<8>(getImm()); 7774 } 7775 7776 bool AMDGPUOperand::isSMEMOffset() const { 7777 return isImmTy(ImmTyNone) || 7778 isImmTy(ImmTyOffset); // Offset range is checked later by validator. 7779 } 7780 7781 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7782 // 32-bit literals are only supported on CI and we only want to use them 7783 // when the offset is > 8-bits. 7784 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7785 } 7786 7787 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7788 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7789 } 7790 7791 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7792 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7793 } 7794 7795 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7796 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7797 } 7798 7799 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7800 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7801 } 7802 7803 //===----------------------------------------------------------------------===// 7804 // vop3 7805 //===----------------------------------------------------------------------===// 7806 7807 static bool ConvertOmodMul(int64_t &Mul) { 7808 if (Mul != 1 && Mul != 2 && Mul != 4) 7809 return false; 7810 7811 Mul >>= 1; 7812 return true; 7813 } 7814 7815 static bool ConvertOmodDiv(int64_t &Div) { 7816 if (Div == 1) { 7817 Div = 0; 7818 return true; 7819 } 7820 7821 if (Div == 2) { 7822 Div = 3; 7823 return true; 7824 } 7825 7826 return false; 7827 } 7828 7829 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7830 // This is intentional and ensures compatibility with sp3. 7831 // See bug 35397 for details. 7832 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7833 if (BoundCtrl == 0 || BoundCtrl == 1) { 7834 BoundCtrl = 1; 7835 return true; 7836 } 7837 return false; 7838 } 7839 7840 // Note: the order in this table matches the order of operands in AsmString. 7841 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7842 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7843 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7844 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7845 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7846 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7847 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7848 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7849 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7850 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7851 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7852 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7853 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7854 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7855 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7856 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7857 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7858 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7859 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7860 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7861 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7862 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7863 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7864 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7865 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7866 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7867 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7868 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7869 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7870 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7871 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7872 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7873 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7874 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7875 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7876 {"dpp8", AMDGPUOperand::ImmTyDPP8, false, nullptr}, 7877 {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr}, 7878 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7879 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7880 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7881 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7882 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7883 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7884 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}, 7885 {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr}, 7886 {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr} 7887 }; 7888 7889 void AMDGPUAsmParser::onBeginOfFile() { 7890 if (!getParser().getStreamer().getTargetStreamer() || 7891 getSTI().getTargetTriple().getArch() == Triple::r600) 7892 return; 7893 7894 if (!getTargetStreamer().getTargetID()) 7895 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7896 7897 if (isHsaAbiVersion3AndAbove(&getSTI())) 7898 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7899 } 7900 7901 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7902 7903 OperandMatchResultTy res = parseOptionalOpr(Operands); 7904 7905 // This is a hack to enable hardcoded mandatory operands which follow 7906 // optional operands. 7907 // 7908 // Current design assumes that all operands after the first optional operand 7909 // are also optional. However implementation of some instructions violates 7910 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7911 // 7912 // To alleviate this problem, we have to (implicitly) parse extra operands 7913 // to make sure autogenerated parser of custom operands never hit hardcoded 7914 // mandatory operands. 7915 7916 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7917 if (res != MatchOperand_Success || 7918 isToken(AsmToken::EndOfStatement)) 7919 break; 7920 7921 trySkipToken(AsmToken::Comma); 7922 res = parseOptionalOpr(Operands); 7923 } 7924 7925 return res; 7926 } 7927 7928 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7929 OperandMatchResultTy res; 7930 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7931 // try to parse any optional operand here 7932 if (Op.IsBit) { 7933 res = parseNamedBit(Op.Name, Operands, Op.Type); 7934 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7935 res = parseOModOperand(Operands); 7936 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7937 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7938 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7939 res = parseSDWASel(Operands, Op.Name, Op.Type); 7940 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7941 res = parseSDWADstUnused(Operands); 7942 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7943 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7944 Op.Type == AMDGPUOperand::ImmTyNegLo || 7945 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7946 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7947 Op.ConvertResult); 7948 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7949 res = parseDim(Operands); 7950 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7951 res = parseCPol(Operands); 7952 } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) { 7953 res = parseDPP8(Operands); 7954 } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) { 7955 res = parseDPPCtrl(Operands); 7956 } else { 7957 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7958 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 7959 res = parseOperandArrayWithPrefix("neg", Operands, 7960 AMDGPUOperand::ImmTyBLGP, 7961 nullptr); 7962 } 7963 } 7964 if (res != MatchOperand_NoMatch) { 7965 return res; 7966 } 7967 } 7968 return MatchOperand_NoMatch; 7969 } 7970 7971 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7972 StringRef Name = getTokenStr(); 7973 if (Name == "mul") { 7974 return parseIntWithPrefix("mul", Operands, 7975 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7976 } 7977 7978 if (Name == "div") { 7979 return parseIntWithPrefix("div", Operands, 7980 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7981 } 7982 7983 return MatchOperand_NoMatch; 7984 } 7985 7986 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7987 cvtVOP3P(Inst, Operands); 7988 7989 int Opc = Inst.getOpcode(); 7990 7991 int SrcNum; 7992 const int Ops[] = { AMDGPU::OpName::src0, 7993 AMDGPU::OpName::src1, 7994 AMDGPU::OpName::src2 }; 7995 for (SrcNum = 0; 7996 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7997 ++SrcNum); 7998 assert(SrcNum > 0); 7999 8000 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8001 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8002 8003 if ((OpSel & (1 << SrcNum)) != 0) { 8004 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 8005 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8006 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 8007 } 8008 } 8009 8010 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 8011 // 1. This operand is input modifiers 8012 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 8013 // 2. This is not last operand 8014 && Desc.NumOperands > (OpNum + 1) 8015 // 3. Next operand is register class 8016 && Desc.OpInfo[OpNum + 1].RegClass != -1 8017 // 4. Next register is not tied to any other operand 8018 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 8019 } 8020 8021 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 8022 { 8023 OptionalImmIndexMap OptionalIdx; 8024 unsigned Opc = Inst.getOpcode(); 8025 8026 unsigned I = 1; 8027 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8028 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8029 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8030 } 8031 8032 for (unsigned E = Operands.size(); I != E; ++I) { 8033 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8034 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8035 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8036 } else if (Op.isInterpSlot() || 8037 Op.isInterpAttr() || 8038 Op.isAttrChan()) { 8039 Inst.addOperand(MCOperand::createImm(Op.getImm())); 8040 } else if (Op.isImmModifier()) { 8041 OptionalIdx[Op.getImmTy()] = I; 8042 } else { 8043 llvm_unreachable("unhandled operand type"); 8044 } 8045 } 8046 8047 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 8048 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 8049 } 8050 8051 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8052 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8053 } 8054 8055 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8056 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8057 } 8058 } 8059 8060 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 8061 { 8062 OptionalImmIndexMap OptionalIdx; 8063 unsigned Opc = Inst.getOpcode(); 8064 8065 unsigned I = 1; 8066 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8067 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8068 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8069 } 8070 8071 for (unsigned E = Operands.size(); I != E; ++I) { 8072 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8073 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8074 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8075 } else if (Op.isImmModifier()) { 8076 OptionalIdx[Op.getImmTy()] = I; 8077 } else { 8078 llvm_unreachable("unhandled operand type"); 8079 } 8080 } 8081 8082 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8083 8084 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8085 if (OpSelIdx != -1) 8086 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8087 8088 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 8089 8090 if (OpSelIdx == -1) 8091 return; 8092 8093 const int Ops[] = { AMDGPU::OpName::src0, 8094 AMDGPU::OpName::src1, 8095 AMDGPU::OpName::src2 }; 8096 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8097 AMDGPU::OpName::src1_modifiers, 8098 AMDGPU::OpName::src2_modifiers }; 8099 8100 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8101 8102 for (int J = 0; J < 3; ++J) { 8103 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8104 if (OpIdx == -1) 8105 break; 8106 8107 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8108 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8109 8110 if ((OpSel & (1 << J)) != 0) 8111 ModVal |= SISrcMods::OP_SEL_0; 8112 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 8113 (OpSel & (1 << 3)) != 0) 8114 ModVal |= SISrcMods::DST_OP_SEL; 8115 8116 Inst.getOperand(ModIdx).setImm(ModVal); 8117 } 8118 } 8119 8120 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8121 OptionalImmIndexMap &OptionalIdx) { 8122 unsigned Opc = Inst.getOpcode(); 8123 8124 unsigned I = 1; 8125 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8126 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8127 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8128 } 8129 8130 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 8131 // This instruction has src modifiers 8132 for (unsigned E = Operands.size(); I != E; ++I) { 8133 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8134 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8135 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8136 } else if (Op.isImmModifier()) { 8137 OptionalIdx[Op.getImmTy()] = I; 8138 } else if (Op.isRegOrImm()) { 8139 Op.addRegOrImmOperands(Inst, 1); 8140 } else { 8141 llvm_unreachable("unhandled operand type"); 8142 } 8143 } 8144 } else { 8145 // No src modifiers 8146 for (unsigned E = Operands.size(); I != E; ++I) { 8147 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8148 if (Op.isMod()) { 8149 OptionalIdx[Op.getImmTy()] = I; 8150 } else { 8151 Op.addRegOrImmOperands(Inst, 1); 8152 } 8153 } 8154 } 8155 8156 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8157 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8158 } 8159 8160 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8161 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8162 } 8163 8164 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8165 // it has src2 register operand that is tied to dst operand 8166 // we don't allow modifiers for this operand in assembler so src2_modifiers 8167 // should be 0. 8168 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 8169 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 8170 Opc == AMDGPU::V_MAC_F32_e64_vi || 8171 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 8172 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 8173 Opc == AMDGPU::V_MAC_F16_e64_vi || 8174 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 8175 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 8176 Opc == AMDGPU::V_FMAC_F32_e64_vi || 8177 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 8178 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 8179 auto it = Inst.begin(); 8180 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8181 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8182 ++it; 8183 // Copy the operand to ensure it's not invalidated when Inst grows. 8184 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8185 } 8186 } 8187 8188 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8189 OptionalImmIndexMap OptionalIdx; 8190 cvtVOP3(Inst, Operands, OptionalIdx); 8191 } 8192 8193 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8194 OptionalImmIndexMap &OptIdx) { 8195 const int Opc = Inst.getOpcode(); 8196 const MCInstrDesc &Desc = MII.get(Opc); 8197 8198 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8199 8200 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 8201 assert(!IsPacked); 8202 Inst.addOperand(Inst.getOperand(0)); 8203 } 8204 8205 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8206 // instruction, and then figure out where to actually put the modifiers 8207 8208 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8209 if (OpSelIdx != -1) { 8210 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8211 } 8212 8213 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8214 if (OpSelHiIdx != -1) { 8215 int DefaultVal = IsPacked ? -1 : 0; 8216 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8217 DefaultVal); 8218 } 8219 8220 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8221 if (NegLoIdx != -1) { 8222 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8223 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8224 } 8225 8226 const int Ops[] = { AMDGPU::OpName::src0, 8227 AMDGPU::OpName::src1, 8228 AMDGPU::OpName::src2 }; 8229 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8230 AMDGPU::OpName::src1_modifiers, 8231 AMDGPU::OpName::src2_modifiers }; 8232 8233 unsigned OpSel = 0; 8234 unsigned OpSelHi = 0; 8235 unsigned NegLo = 0; 8236 unsigned NegHi = 0; 8237 8238 if (OpSelIdx != -1) 8239 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8240 8241 if (OpSelHiIdx != -1) 8242 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8243 8244 if (NegLoIdx != -1) { 8245 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8246 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8247 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8248 } 8249 8250 for (int J = 0; J < 3; ++J) { 8251 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8252 if (OpIdx == -1) 8253 break; 8254 8255 uint32_t ModVal = 0; 8256 8257 if ((OpSel & (1 << J)) != 0) 8258 ModVal |= SISrcMods::OP_SEL_0; 8259 8260 if ((OpSelHi & (1 << J)) != 0) 8261 ModVal |= SISrcMods::OP_SEL_1; 8262 8263 if ((NegLo & (1 << J)) != 0) 8264 ModVal |= SISrcMods::NEG; 8265 8266 if ((NegHi & (1 << J)) != 0) 8267 ModVal |= SISrcMods::NEG_HI; 8268 8269 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8270 8271 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8272 } 8273 } 8274 8275 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8276 OptionalImmIndexMap OptIdx; 8277 cvtVOP3(Inst, Operands, OptIdx); 8278 cvtVOP3P(Inst, Operands, OptIdx); 8279 } 8280 8281 //===----------------------------------------------------------------------===// 8282 // dpp 8283 //===----------------------------------------------------------------------===// 8284 8285 bool AMDGPUOperand::isDPP8() const { 8286 return isImmTy(ImmTyDPP8); 8287 } 8288 8289 bool AMDGPUOperand::isDPPCtrl() const { 8290 using namespace AMDGPU::DPP; 8291 8292 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8293 if (result) { 8294 int64_t Imm = getImm(); 8295 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8296 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8297 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8298 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8299 (Imm == DppCtrl::WAVE_SHL1) || 8300 (Imm == DppCtrl::WAVE_ROL1) || 8301 (Imm == DppCtrl::WAVE_SHR1) || 8302 (Imm == DppCtrl::WAVE_ROR1) || 8303 (Imm == DppCtrl::ROW_MIRROR) || 8304 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8305 (Imm == DppCtrl::BCAST15) || 8306 (Imm == DppCtrl::BCAST31) || 8307 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8308 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8309 } 8310 return false; 8311 } 8312 8313 //===----------------------------------------------------------------------===// 8314 // mAI 8315 //===----------------------------------------------------------------------===// 8316 8317 bool AMDGPUOperand::isBLGP() const { 8318 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8319 } 8320 8321 bool AMDGPUOperand::isCBSZ() const { 8322 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8323 } 8324 8325 bool AMDGPUOperand::isABID() const { 8326 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8327 } 8328 8329 bool AMDGPUOperand::isS16Imm() const { 8330 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8331 } 8332 8333 bool AMDGPUOperand::isU16Imm() const { 8334 return isImm() && isUInt<16>(getImm()); 8335 } 8336 8337 //===----------------------------------------------------------------------===// 8338 // dim 8339 //===----------------------------------------------------------------------===// 8340 8341 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8342 // We want to allow "dim:1D" etc., 8343 // but the initial 1 is tokenized as an integer. 8344 std::string Token; 8345 if (isToken(AsmToken::Integer)) { 8346 SMLoc Loc = getToken().getEndLoc(); 8347 Token = std::string(getTokenStr()); 8348 lex(); 8349 if (getLoc() != Loc) 8350 return false; 8351 } 8352 8353 StringRef Suffix; 8354 if (!parseId(Suffix)) 8355 return false; 8356 Token += Suffix; 8357 8358 StringRef DimId = Token; 8359 if (DimId.startswith("SQ_RSRC_IMG_")) 8360 DimId = DimId.drop_front(12); 8361 8362 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8363 if (!DimInfo) 8364 return false; 8365 8366 Encoding = DimInfo->Encoding; 8367 return true; 8368 } 8369 8370 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8371 if (!isGFX10Plus()) 8372 return MatchOperand_NoMatch; 8373 8374 SMLoc S = getLoc(); 8375 8376 if (!trySkipId("dim", AsmToken::Colon)) 8377 return MatchOperand_NoMatch; 8378 8379 unsigned Encoding; 8380 SMLoc Loc = getLoc(); 8381 if (!parseDimId(Encoding)) { 8382 Error(Loc, "invalid dim value"); 8383 return MatchOperand_ParseFail; 8384 } 8385 8386 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8387 AMDGPUOperand::ImmTyDim)); 8388 return MatchOperand_Success; 8389 } 8390 8391 //===----------------------------------------------------------------------===// 8392 // dpp 8393 //===----------------------------------------------------------------------===// 8394 8395 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8396 SMLoc S = getLoc(); 8397 8398 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8399 return MatchOperand_NoMatch; 8400 8401 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8402 8403 int64_t Sels[8]; 8404 8405 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8406 return MatchOperand_ParseFail; 8407 8408 for (size_t i = 0; i < 8; ++i) { 8409 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8410 return MatchOperand_ParseFail; 8411 8412 SMLoc Loc = getLoc(); 8413 if (getParser().parseAbsoluteExpression(Sels[i])) 8414 return MatchOperand_ParseFail; 8415 if (0 > Sels[i] || 7 < Sels[i]) { 8416 Error(Loc, "expected a 3-bit value"); 8417 return MatchOperand_ParseFail; 8418 } 8419 } 8420 8421 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8422 return MatchOperand_ParseFail; 8423 8424 unsigned DPP8 = 0; 8425 for (size_t i = 0; i < 8; ++i) 8426 DPP8 |= (Sels[i] << (i * 3)); 8427 8428 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8429 return MatchOperand_Success; 8430 } 8431 8432 bool 8433 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8434 const OperandVector &Operands) { 8435 if (Ctrl == "row_newbcast") 8436 return isGFX90A(); 8437 8438 if (Ctrl == "row_share" || 8439 Ctrl == "row_xmask") 8440 return isGFX10Plus(); 8441 8442 if (Ctrl == "wave_shl" || 8443 Ctrl == "wave_shr" || 8444 Ctrl == "wave_rol" || 8445 Ctrl == "wave_ror" || 8446 Ctrl == "row_bcast") 8447 return isVI() || isGFX9(); 8448 8449 return Ctrl == "row_mirror" || 8450 Ctrl == "row_half_mirror" || 8451 Ctrl == "quad_perm" || 8452 Ctrl == "row_shl" || 8453 Ctrl == "row_shr" || 8454 Ctrl == "row_ror"; 8455 } 8456 8457 int64_t 8458 AMDGPUAsmParser::parseDPPCtrlPerm() { 8459 // quad_perm:[%d,%d,%d,%d] 8460 8461 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8462 return -1; 8463 8464 int64_t Val = 0; 8465 for (int i = 0; i < 4; ++i) { 8466 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8467 return -1; 8468 8469 int64_t Temp; 8470 SMLoc Loc = getLoc(); 8471 if (getParser().parseAbsoluteExpression(Temp)) 8472 return -1; 8473 if (Temp < 0 || Temp > 3) { 8474 Error(Loc, "expected a 2-bit value"); 8475 return -1; 8476 } 8477 8478 Val += (Temp << i * 2); 8479 } 8480 8481 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8482 return -1; 8483 8484 return Val; 8485 } 8486 8487 int64_t 8488 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8489 using namespace AMDGPU::DPP; 8490 8491 // sel:%d 8492 8493 int64_t Val; 8494 SMLoc Loc = getLoc(); 8495 8496 if (getParser().parseAbsoluteExpression(Val)) 8497 return -1; 8498 8499 struct DppCtrlCheck { 8500 int64_t Ctrl; 8501 int Lo; 8502 int Hi; 8503 }; 8504 8505 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8506 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8507 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8508 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8509 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8510 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8511 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8512 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8513 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8514 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8515 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8516 .Default({-1, 0, 0}); 8517 8518 bool Valid; 8519 if (Check.Ctrl == -1) { 8520 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8521 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8522 } else { 8523 Valid = Check.Lo <= Val && Val <= Check.Hi; 8524 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8525 } 8526 8527 if (!Valid) { 8528 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8529 return -1; 8530 } 8531 8532 return Val; 8533 } 8534 8535 OperandMatchResultTy 8536 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8537 using namespace AMDGPU::DPP; 8538 8539 if (!isToken(AsmToken::Identifier) || 8540 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8541 return MatchOperand_NoMatch; 8542 8543 SMLoc S = getLoc(); 8544 int64_t Val = -1; 8545 StringRef Ctrl; 8546 8547 parseId(Ctrl); 8548 8549 if (Ctrl == "row_mirror") { 8550 Val = DppCtrl::ROW_MIRROR; 8551 } else if (Ctrl == "row_half_mirror") { 8552 Val = DppCtrl::ROW_HALF_MIRROR; 8553 } else { 8554 if (skipToken(AsmToken::Colon, "expected a colon")) { 8555 if (Ctrl == "quad_perm") { 8556 Val = parseDPPCtrlPerm(); 8557 } else { 8558 Val = parseDPPCtrlSel(Ctrl); 8559 } 8560 } 8561 } 8562 8563 if (Val == -1) 8564 return MatchOperand_ParseFail; 8565 8566 Operands.push_back( 8567 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8568 return MatchOperand_Success; 8569 } 8570 8571 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8572 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8573 } 8574 8575 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8576 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8577 } 8578 8579 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8580 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8581 } 8582 8583 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8584 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8585 } 8586 8587 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8588 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8589 } 8590 8591 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8592 OptionalImmIndexMap OptionalIdx; 8593 unsigned Opc = Inst.getOpcode(); 8594 bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8595 unsigned I = 1; 8596 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8597 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8598 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8599 } 8600 8601 int Fi = 0; 8602 for (unsigned E = Operands.size(); I != E; ++I) { 8603 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8604 MCOI::TIED_TO); 8605 if (TiedTo != -1) { 8606 assert((unsigned)TiedTo < Inst.getNumOperands()); 8607 // handle tied old or src2 for MAC instructions 8608 Inst.addOperand(Inst.getOperand(TiedTo)); 8609 } 8610 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8611 // Add the register arguments 8612 if (IsDPP8 && Op.isFI()) { 8613 Fi = Op.getImm(); 8614 } else if (HasModifiers && 8615 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8616 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8617 } else if (Op.isReg()) { 8618 Op.addRegOperands(Inst, 1); 8619 } else if (Op.isImm() && 8620 Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) { 8621 assert(!HasModifiers && "Case should be unreachable with modifiers"); 8622 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); 8623 Op.addImmOperands(Inst, 1); 8624 } else if (Op.isImm()) { 8625 OptionalIdx[Op.getImmTy()] = I; 8626 } else { 8627 llvm_unreachable("unhandled operand type"); 8628 } 8629 } 8630 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8631 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8632 } 8633 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8634 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8635 } 8636 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { 8637 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8638 } 8639 8640 if (IsDPP8) { 8641 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); 8642 using namespace llvm::AMDGPU::DPP; 8643 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8644 } else { 8645 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); 8646 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8647 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8648 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8649 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8650 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8651 } 8652 } 8653 } 8654 8655 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8656 OptionalImmIndexMap OptionalIdx; 8657 8658 unsigned Opc = Inst.getOpcode(); 8659 bool HasModifiers = 8660 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8661 unsigned I = 1; 8662 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8663 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8664 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8665 } 8666 8667 int Fi = 0; 8668 for (unsigned E = Operands.size(); I != E; ++I) { 8669 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8670 MCOI::TIED_TO); 8671 if (TiedTo != -1) { 8672 assert((unsigned)TiedTo < Inst.getNumOperands()); 8673 // handle tied old or src2 for MAC instructions 8674 Inst.addOperand(Inst.getOperand(TiedTo)); 8675 } 8676 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8677 // Add the register arguments 8678 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8679 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8680 // Skip it. 8681 continue; 8682 } 8683 8684 if (IsDPP8) { 8685 if (Op.isDPP8()) { 8686 Op.addImmOperands(Inst, 1); 8687 } else if (HasModifiers && 8688 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8689 Op.addRegWithFPInputModsOperands(Inst, 2); 8690 } else if (Op.isFI()) { 8691 Fi = Op.getImm(); 8692 } else if (Op.isReg()) { 8693 Op.addRegOperands(Inst, 1); 8694 } else { 8695 llvm_unreachable("Invalid operand type"); 8696 } 8697 } else { 8698 if (HasModifiers && 8699 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8700 Op.addRegWithFPInputModsOperands(Inst, 2); 8701 } else if (Op.isReg()) { 8702 Op.addRegOperands(Inst, 1); 8703 } else if (Op.isDPPCtrl()) { 8704 Op.addImmOperands(Inst, 1); 8705 } else if (Op.isImm()) { 8706 // Handle optional arguments 8707 OptionalIdx[Op.getImmTy()] = I; 8708 } else { 8709 llvm_unreachable("Invalid operand type"); 8710 } 8711 } 8712 } 8713 8714 if (IsDPP8) { 8715 using namespace llvm::AMDGPU::DPP; 8716 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8717 } else { 8718 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8719 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8720 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8721 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8722 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8723 } 8724 } 8725 } 8726 8727 //===----------------------------------------------------------------------===// 8728 // sdwa 8729 //===----------------------------------------------------------------------===// 8730 8731 OperandMatchResultTy 8732 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8733 AMDGPUOperand::ImmTy Type) { 8734 using namespace llvm::AMDGPU::SDWA; 8735 8736 SMLoc S = getLoc(); 8737 StringRef Value; 8738 OperandMatchResultTy res; 8739 8740 SMLoc StringLoc; 8741 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8742 if (res != MatchOperand_Success) { 8743 return res; 8744 } 8745 8746 int64_t Int; 8747 Int = StringSwitch<int64_t>(Value) 8748 .Case("BYTE_0", SdwaSel::BYTE_0) 8749 .Case("BYTE_1", SdwaSel::BYTE_1) 8750 .Case("BYTE_2", SdwaSel::BYTE_2) 8751 .Case("BYTE_3", SdwaSel::BYTE_3) 8752 .Case("WORD_0", SdwaSel::WORD_0) 8753 .Case("WORD_1", SdwaSel::WORD_1) 8754 .Case("DWORD", SdwaSel::DWORD) 8755 .Default(0xffffffff); 8756 8757 if (Int == 0xffffffff) { 8758 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8759 return MatchOperand_ParseFail; 8760 } 8761 8762 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8763 return MatchOperand_Success; 8764 } 8765 8766 OperandMatchResultTy 8767 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8768 using namespace llvm::AMDGPU::SDWA; 8769 8770 SMLoc S = getLoc(); 8771 StringRef Value; 8772 OperandMatchResultTy res; 8773 8774 SMLoc StringLoc; 8775 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8776 if (res != MatchOperand_Success) { 8777 return res; 8778 } 8779 8780 int64_t Int; 8781 Int = StringSwitch<int64_t>(Value) 8782 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8783 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8784 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8785 .Default(0xffffffff); 8786 8787 if (Int == 0xffffffff) { 8788 Error(StringLoc, "invalid dst_unused value"); 8789 return MatchOperand_ParseFail; 8790 } 8791 8792 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8793 return MatchOperand_Success; 8794 } 8795 8796 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8797 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8798 } 8799 8800 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8801 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8802 } 8803 8804 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8805 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8806 } 8807 8808 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8809 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8810 } 8811 8812 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8813 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8814 } 8815 8816 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8817 uint64_t BasicInstType, 8818 bool SkipDstVcc, 8819 bool SkipSrcVcc) { 8820 using namespace llvm::AMDGPU::SDWA; 8821 8822 OptionalImmIndexMap OptionalIdx; 8823 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8824 bool SkippedVcc = false; 8825 8826 unsigned I = 1; 8827 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8828 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8829 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8830 } 8831 8832 for (unsigned E = Operands.size(); I != E; ++I) { 8833 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8834 if (SkipVcc && !SkippedVcc && Op.isReg() && 8835 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8836 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8837 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8838 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8839 // Skip VCC only if we didn't skip it on previous iteration. 8840 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8841 if (BasicInstType == SIInstrFlags::VOP2 && 8842 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8843 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8844 SkippedVcc = true; 8845 continue; 8846 } else if (BasicInstType == SIInstrFlags::VOPC && 8847 Inst.getNumOperands() == 0) { 8848 SkippedVcc = true; 8849 continue; 8850 } 8851 } 8852 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8853 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8854 } else if (Op.isImm()) { 8855 // Handle optional arguments 8856 OptionalIdx[Op.getImmTy()] = I; 8857 } else { 8858 llvm_unreachable("Invalid operand type"); 8859 } 8860 SkippedVcc = false; 8861 } 8862 8863 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8864 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8865 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8866 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8867 switch (BasicInstType) { 8868 case SIInstrFlags::VOP1: 8869 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8870 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8871 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8872 } 8873 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8874 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8875 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8876 break; 8877 8878 case SIInstrFlags::VOP2: 8879 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8880 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8881 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8882 } 8883 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8884 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8885 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8886 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8887 break; 8888 8889 case SIInstrFlags::VOPC: 8890 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8891 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8892 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8893 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8894 break; 8895 8896 default: 8897 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8898 } 8899 } 8900 8901 // special case v_mac_{f16, f32}: 8902 // it has src2 register operand that is tied to dst operand 8903 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8904 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8905 auto it = Inst.begin(); 8906 std::advance( 8907 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8908 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8909 } 8910 } 8911 8912 //===----------------------------------------------------------------------===// 8913 // mAI 8914 //===----------------------------------------------------------------------===// 8915 8916 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8917 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8918 } 8919 8920 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8921 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8922 } 8923 8924 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8925 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8926 } 8927 8928 /// Force static initialization. 8929 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8930 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8931 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8932 } 8933 8934 #define GET_REGISTER_MATCHER 8935 #define GET_MATCHER_IMPLEMENTATION 8936 #define GET_MNEMONIC_SPELL_CHECKER 8937 #define GET_MNEMONIC_CHECKER 8938 #include "AMDGPUGenAsmMatcher.inc" 8939 8940 // This function should be defined after auto-generated include so that we have 8941 // MatchClassKind enum defined 8942 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8943 unsigned Kind) { 8944 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8945 // But MatchInstructionImpl() expects to meet token and fails to validate 8946 // operand. This method checks if we are given immediate operand but expect to 8947 // get corresponding token. 8948 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8949 switch (Kind) { 8950 case MCK_addr64: 8951 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8952 case MCK_gds: 8953 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8954 case MCK_lds: 8955 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8956 case MCK_idxen: 8957 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8958 case MCK_offen: 8959 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8960 case MCK_SSrcB32: 8961 // When operands have expression values, they will return true for isToken, 8962 // because it is not possible to distinguish between a token and an 8963 // expression at parse time. MatchInstructionImpl() will always try to 8964 // match an operand as a token, when isToken returns true, and when the 8965 // name of the expression is not a valid token, the match will fail, 8966 // so we need to handle it here. 8967 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8968 case MCK_SSrcF32: 8969 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8970 case MCK_SoppBrTarget: 8971 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8972 case MCK_VReg32OrOff: 8973 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8974 case MCK_InterpSlot: 8975 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8976 case MCK_Attr: 8977 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8978 case MCK_AttrChan: 8979 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8980 case MCK_ImmSMEMOffset: 8981 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8982 case MCK_SReg_64: 8983 case MCK_SReg_64_XEXEC: 8984 // Null is defined as a 32-bit register but 8985 // it should also be enabled with 64-bit operands. 8986 // The following code enables it for SReg_64 operands 8987 // used as source and destination. Remaining source 8988 // operands are handled in isInlinableImm. 8989 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8990 default: 8991 return Match_InvalidOperand; 8992 } 8993 } 8994 8995 //===----------------------------------------------------------------------===// 8996 // endpgm 8997 //===----------------------------------------------------------------------===// 8998 8999 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 9000 SMLoc S = getLoc(); 9001 int64_t Imm = 0; 9002 9003 if (!parseExpr(Imm)) { 9004 // The operand is optional, if not present default to 0 9005 Imm = 0; 9006 } 9007 9008 if (!isUInt<16>(Imm)) { 9009 Error(S, "expected a 16-bit value"); 9010 return MatchOperand_ParseFail; 9011 } 9012 9013 Operands.push_back( 9014 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 9015 return MatchOperand_Success; 9016 } 9017 9018 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 9019 9020 //===----------------------------------------------------------------------===// 9021 // LDSDIR 9022 //===----------------------------------------------------------------------===// 9023 9024 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const { 9025 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST); 9026 } 9027 9028 bool AMDGPUOperand::isWaitVDST() const { 9029 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); 9030 } 9031 9032 //===----------------------------------------------------------------------===// 9033 // VINTERP 9034 //===----------------------------------------------------------------------===// 9035 9036 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const { 9037 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP); 9038 } 9039 9040 bool AMDGPUOperand::isWaitEXP() const { 9041 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); 9042 } 9043