1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCParser/MCAsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 32 #include "llvm/MC/MCSymbol.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/AMDGPUMetadata.h" 35 #include "llvm/Support/AMDHSAKernelDescriptor.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/MachineValueType.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Support/TargetParser.h" 40 41 using namespace llvm; 42 using namespace llvm::AMDGPU; 43 using namespace llvm::amdhsa; 44 45 namespace { 46 47 class AMDGPUAsmParser; 48 49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 50 51 //===----------------------------------------------------------------------===// 52 // Operand 53 //===----------------------------------------------------------------------===// 54 55 class AMDGPUOperand : public MCParsedAsmOperand { 56 enum KindTy { 57 Token, 58 Immediate, 59 Register, 60 Expression 61 } Kind; 62 63 SMLoc StartLoc, EndLoc; 64 const AMDGPUAsmParser *AsmParser; 65 66 public: 67 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 68 : Kind(Kind_), AsmParser(AsmParser_) {} 69 70 using Ptr = std::unique_ptr<AMDGPUOperand>; 71 72 struct Modifiers { 73 bool Abs = false; 74 bool Neg = false; 75 bool Sext = false; 76 77 bool hasFPModifiers() const { return Abs || Neg; } 78 bool hasIntModifiers() const { return Sext; } 79 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 80 81 int64_t getFPModifiersOperand() const { 82 int64_t Operand = 0; 83 Operand |= Abs ? SISrcMods::ABS : 0u; 84 Operand |= Neg ? SISrcMods::NEG : 0u; 85 return Operand; 86 } 87 88 int64_t getIntModifiersOperand() const { 89 int64_t Operand = 0; 90 Operand |= Sext ? SISrcMods::SEXT : 0u; 91 return Operand; 92 } 93 94 int64_t getModifiersOperand() const { 95 assert(!(hasFPModifiers() && hasIntModifiers()) 96 && "fp and int modifiers should not be used simultaneously"); 97 if (hasFPModifiers()) { 98 return getFPModifiersOperand(); 99 } else if (hasIntModifiers()) { 100 return getIntModifiersOperand(); 101 } else { 102 return 0; 103 } 104 } 105 106 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 107 }; 108 109 enum ImmTy { 110 ImmTyNone, 111 ImmTyGDS, 112 ImmTyLDS, 113 ImmTyOffen, 114 ImmTyIdxen, 115 ImmTyAddr64, 116 ImmTyOffset, 117 ImmTyInstOffset, 118 ImmTyOffset0, 119 ImmTyOffset1, 120 ImmTyCPol, 121 ImmTySWZ, 122 ImmTyTFE, 123 ImmTyD16, 124 ImmTyClampSI, 125 ImmTyOModSI, 126 ImmTyDPP8, 127 ImmTyDppCtrl, 128 ImmTyDppRowMask, 129 ImmTyDppBankMask, 130 ImmTyDppBoundCtrl, 131 ImmTyDppFi, 132 ImmTySdwaDstSel, 133 ImmTySdwaSrc0Sel, 134 ImmTySdwaSrc1Sel, 135 ImmTySdwaDstUnused, 136 ImmTyDMask, 137 ImmTyDim, 138 ImmTyUNorm, 139 ImmTyDA, 140 ImmTyR128A16, 141 ImmTyA16, 142 ImmTyLWE, 143 ImmTyExpTgt, 144 ImmTyExpCompr, 145 ImmTyExpVM, 146 ImmTyFORMAT, 147 ImmTyHwreg, 148 ImmTyOff, 149 ImmTySendMsg, 150 ImmTyInterpSlot, 151 ImmTyInterpAttr, 152 ImmTyAttrChan, 153 ImmTyOpSel, 154 ImmTyOpSelHi, 155 ImmTyNegLo, 156 ImmTyNegHi, 157 ImmTySwizzle, 158 ImmTyGprIdxMode, 159 ImmTyHigh, 160 ImmTyBLGP, 161 ImmTyCBSZ, 162 ImmTyABID, 163 ImmTyEndpgm, 164 ImmTyWaitVDST, 165 ImmTyWaitEXP, 166 }; 167 168 enum ImmKindTy { 169 ImmKindTyNone, 170 ImmKindTyLiteral, 171 ImmKindTyConst, 172 }; 173 174 private: 175 struct TokOp { 176 const char *Data; 177 unsigned Length; 178 }; 179 180 struct ImmOp { 181 int64_t Val; 182 ImmTy Type; 183 bool IsFPImm; 184 mutable ImmKindTy Kind; 185 Modifiers Mods; 186 }; 187 188 struct RegOp { 189 unsigned RegNo; 190 Modifiers Mods; 191 }; 192 193 union { 194 TokOp Tok; 195 ImmOp Imm; 196 RegOp Reg; 197 const MCExpr *Expr; 198 }; 199 200 public: 201 bool isToken() const override { 202 if (Kind == Token) 203 return true; 204 205 // When parsing operands, we can't always tell if something was meant to be 206 // a token, like 'gds', or an expression that references a global variable. 207 // In this case, we assume the string is an expression, and if we need to 208 // interpret is a token, then we treat the symbol name as the token. 209 return isSymbolRefExpr(); 210 } 211 212 bool isSymbolRefExpr() const { 213 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 214 } 215 216 bool isImm() const override { 217 return Kind == Immediate; 218 } 219 220 void setImmKindNone() const { 221 assert(isImm()); 222 Imm.Kind = ImmKindTyNone; 223 } 224 225 void setImmKindLiteral() const { 226 assert(isImm()); 227 Imm.Kind = ImmKindTyLiteral; 228 } 229 230 void setImmKindConst() const { 231 assert(isImm()); 232 Imm.Kind = ImmKindTyConst; 233 } 234 235 bool IsImmKindLiteral() const { 236 return isImm() && Imm.Kind == ImmKindTyLiteral; 237 } 238 239 bool isImmKindConst() const { 240 return isImm() && Imm.Kind == ImmKindTyConst; 241 } 242 243 bool isInlinableImm(MVT type) const; 244 bool isLiteralImm(MVT type) const; 245 246 bool isRegKind() const { 247 return Kind == Register; 248 } 249 250 bool isReg() const override { 251 return isRegKind() && !hasModifiers(); 252 } 253 254 bool isRegOrInline(unsigned RCID, MVT type) const { 255 return isRegClass(RCID) || isInlinableImm(type); 256 } 257 258 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 259 return isRegOrInline(RCID, type) || isLiteralImm(type); 260 } 261 262 bool isRegOrImmWithInt16InputMods() const { 263 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 264 } 265 266 bool isRegOrImmWithInt32InputMods() const { 267 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 268 } 269 270 bool isRegOrImmWithInt64InputMods() const { 271 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 272 } 273 274 bool isRegOrImmWithFP16InputMods() const { 275 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 276 } 277 278 bool isRegOrImmWithFP32InputMods() const { 279 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 280 } 281 282 bool isRegOrImmWithFP64InputMods() const { 283 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 284 } 285 286 bool isVReg() const { 287 return isRegClass(AMDGPU::VGPR_32RegClassID) || 288 isRegClass(AMDGPU::VReg_64RegClassID) || 289 isRegClass(AMDGPU::VReg_96RegClassID) || 290 isRegClass(AMDGPU::VReg_128RegClassID) || 291 isRegClass(AMDGPU::VReg_160RegClassID) || 292 isRegClass(AMDGPU::VReg_192RegClassID) || 293 isRegClass(AMDGPU::VReg_256RegClassID) || 294 isRegClass(AMDGPU::VReg_512RegClassID) || 295 isRegClass(AMDGPU::VReg_1024RegClassID); 296 } 297 298 bool isVReg32() const { 299 return isRegClass(AMDGPU::VGPR_32RegClassID); 300 } 301 302 bool isVReg32OrOff() const { 303 return isOff() || isVReg32(); 304 } 305 306 bool isNull() const { 307 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 308 } 309 310 bool isVRegWithInputMods() const; 311 312 bool isSDWAOperand(MVT type) const; 313 bool isSDWAFP16Operand() const; 314 bool isSDWAFP32Operand() const; 315 bool isSDWAInt16Operand() const; 316 bool isSDWAInt32Operand() const; 317 318 bool isImmTy(ImmTy ImmT) const { 319 return isImm() && Imm.Type == ImmT; 320 } 321 322 bool isImmModifier() const { 323 return isImm() && Imm.Type != ImmTyNone; 324 } 325 326 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 327 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 328 bool isDMask() const { return isImmTy(ImmTyDMask); } 329 bool isDim() const { return isImmTy(ImmTyDim); } 330 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 331 bool isDA() const { return isImmTy(ImmTyDA); } 332 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 333 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 334 bool isLWE() const { return isImmTy(ImmTyLWE); } 335 bool isOff() const { return isImmTy(ImmTyOff); } 336 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 337 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 338 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 339 bool isOffen() const { return isImmTy(ImmTyOffen); } 340 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 341 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 342 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 343 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 344 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 345 346 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 347 bool isGDS() const { return isImmTy(ImmTyGDS); } 348 bool isLDS() const { return isImmTy(ImmTyLDS); } 349 bool isCPol() const { return isImmTy(ImmTyCPol); } 350 bool isSWZ() const { return isImmTy(ImmTySWZ); } 351 bool isTFE() const { return isImmTy(ImmTyTFE); } 352 bool isD16() const { return isImmTy(ImmTyD16); } 353 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 354 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 355 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 356 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 357 bool isFI() const { return isImmTy(ImmTyDppFi); } 358 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 359 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 360 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 361 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 362 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 363 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 364 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 365 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 366 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 367 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 368 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 369 bool isHigh() const { return isImmTy(ImmTyHigh); } 370 371 bool isMod() const { 372 return isClampSI() || isOModSI(); 373 } 374 375 bool isRegOrImm() const { 376 return isReg() || isImm(); 377 } 378 379 bool isRegClass(unsigned RCID) const; 380 381 bool isInlineValue() const; 382 383 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 384 return isRegOrInline(RCID, type) && !hasModifiers(); 385 } 386 387 bool isSCSrcB16() const { 388 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 389 } 390 391 bool isSCSrcV2B16() const { 392 return isSCSrcB16(); 393 } 394 395 bool isSCSrcB32() const { 396 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 397 } 398 399 bool isSCSrcB64() const { 400 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 401 } 402 403 bool isBoolReg() const; 404 405 bool isSCSrcF16() const { 406 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 407 } 408 409 bool isSCSrcV2F16() const { 410 return isSCSrcF16(); 411 } 412 413 bool isSCSrcF32() const { 414 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 415 } 416 417 bool isSCSrcF64() const { 418 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 419 } 420 421 bool isSSrcB32() const { 422 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 423 } 424 425 bool isSSrcB16() const { 426 return isSCSrcB16() || isLiteralImm(MVT::i16); 427 } 428 429 bool isSSrcV2B16() const { 430 llvm_unreachable("cannot happen"); 431 return isSSrcB16(); 432 } 433 434 bool isSSrcB64() const { 435 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 436 // See isVSrc64(). 437 return isSCSrcB64() || isLiteralImm(MVT::i64); 438 } 439 440 bool isSSrcF32() const { 441 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 442 } 443 444 bool isSSrcF64() const { 445 return isSCSrcB64() || isLiteralImm(MVT::f64); 446 } 447 448 bool isSSrcF16() const { 449 return isSCSrcB16() || isLiteralImm(MVT::f16); 450 } 451 452 bool isSSrcV2F16() const { 453 llvm_unreachable("cannot happen"); 454 return isSSrcF16(); 455 } 456 457 bool isSSrcV2FP32() const { 458 llvm_unreachable("cannot happen"); 459 return isSSrcF32(); 460 } 461 462 bool isSCSrcV2FP32() const { 463 llvm_unreachable("cannot happen"); 464 return isSCSrcF32(); 465 } 466 467 bool isSSrcV2INT32() const { 468 llvm_unreachable("cannot happen"); 469 return isSSrcB32(); 470 } 471 472 bool isSCSrcV2INT32() const { 473 llvm_unreachable("cannot happen"); 474 return isSCSrcB32(); 475 } 476 477 bool isSSrcOrLdsB32() const { 478 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 479 isLiteralImm(MVT::i32) || isExpr(); 480 } 481 482 bool isVCSrcB32() const { 483 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 484 } 485 486 bool isVCSrcB64() const { 487 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 488 } 489 490 bool isVCSrcB16() const { 491 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 492 } 493 494 bool isVCSrcV2B16() const { 495 return isVCSrcB16(); 496 } 497 498 bool isVCSrcF32() const { 499 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 500 } 501 502 bool isVCSrcF64() const { 503 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 504 } 505 506 bool isVCSrcF16() const { 507 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 508 } 509 510 bool isVCSrcV2F16() const { 511 return isVCSrcF16(); 512 } 513 514 bool isVSrcB32() const { 515 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 516 } 517 518 bool isVSrcB64() const { 519 return isVCSrcF64() || isLiteralImm(MVT::i64); 520 } 521 522 bool isVSrcB16() const { 523 return isVCSrcB16() || isLiteralImm(MVT::i16); 524 } 525 526 bool isVSrcV2B16() const { 527 return isVSrcB16() || isLiteralImm(MVT::v2i16); 528 } 529 530 bool isVCSrcV2FP32() const { 531 return isVCSrcF64(); 532 } 533 534 bool isVSrcV2FP32() const { 535 return isVSrcF64() || isLiteralImm(MVT::v2f32); 536 } 537 538 bool isVCSrcV2INT32() const { 539 return isVCSrcB64(); 540 } 541 542 bool isVSrcV2INT32() const { 543 return isVSrcB64() || isLiteralImm(MVT::v2i32); 544 } 545 546 bool isVSrcF32() const { 547 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 548 } 549 550 bool isVSrcF64() const { 551 return isVCSrcF64() || isLiteralImm(MVT::f64); 552 } 553 554 bool isVSrcF16() const { 555 return isVCSrcF16() || isLiteralImm(MVT::f16); 556 } 557 558 bool isVSrcV2F16() const { 559 return isVSrcF16() || isLiteralImm(MVT::v2f16); 560 } 561 562 bool isVISrcB32() const { 563 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 564 } 565 566 bool isVISrcB16() const { 567 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 568 } 569 570 bool isVISrcV2B16() const { 571 return isVISrcB16(); 572 } 573 574 bool isVISrcF32() const { 575 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 576 } 577 578 bool isVISrcF16() const { 579 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 580 } 581 582 bool isVISrcV2F16() const { 583 return isVISrcF16() || isVISrcB32(); 584 } 585 586 bool isVISrc_64B64() const { 587 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 588 } 589 590 bool isVISrc_64F64() const { 591 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 592 } 593 594 bool isVISrc_64V2FP32() const { 595 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 596 } 597 598 bool isVISrc_64V2INT32() const { 599 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 600 } 601 602 bool isVISrc_256B64() const { 603 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 604 } 605 606 bool isVISrc_256F64() const { 607 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 608 } 609 610 bool isVISrc_128B16() const { 611 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 612 } 613 614 bool isVISrc_128V2B16() const { 615 return isVISrc_128B16(); 616 } 617 618 bool isVISrc_128B32() const { 619 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 620 } 621 622 bool isVISrc_128F32() const { 623 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 624 } 625 626 bool isVISrc_256V2FP32() const { 627 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 628 } 629 630 bool isVISrc_256V2INT32() const { 631 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 632 } 633 634 bool isVISrc_512B32() const { 635 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 636 } 637 638 bool isVISrc_512B16() const { 639 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 640 } 641 642 bool isVISrc_512V2B16() const { 643 return isVISrc_512B16(); 644 } 645 646 bool isVISrc_512F32() const { 647 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 648 } 649 650 bool isVISrc_512F16() const { 651 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 652 } 653 654 bool isVISrc_512V2F16() const { 655 return isVISrc_512F16() || isVISrc_512B32(); 656 } 657 658 bool isVISrc_1024B32() const { 659 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 660 } 661 662 bool isVISrc_1024B16() const { 663 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 664 } 665 666 bool isVISrc_1024V2B16() const { 667 return isVISrc_1024B16(); 668 } 669 670 bool isVISrc_1024F32() const { 671 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 672 } 673 674 bool isVISrc_1024F16() const { 675 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 676 } 677 678 bool isVISrc_1024V2F16() const { 679 return isVISrc_1024F16() || isVISrc_1024B32(); 680 } 681 682 bool isAISrcB32() const { 683 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 684 } 685 686 bool isAISrcB16() const { 687 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 688 } 689 690 bool isAISrcV2B16() const { 691 return isAISrcB16(); 692 } 693 694 bool isAISrcF32() const { 695 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 696 } 697 698 bool isAISrcF16() const { 699 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 700 } 701 702 bool isAISrcV2F16() const { 703 return isAISrcF16() || isAISrcB32(); 704 } 705 706 bool isAISrc_64B64() const { 707 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 708 } 709 710 bool isAISrc_64F64() const { 711 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 712 } 713 714 bool isAISrc_128B32() const { 715 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 716 } 717 718 bool isAISrc_128B16() const { 719 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 720 } 721 722 bool isAISrc_128V2B16() const { 723 return isAISrc_128B16(); 724 } 725 726 bool isAISrc_128F32() const { 727 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 728 } 729 730 bool isAISrc_128F16() const { 731 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 732 } 733 734 bool isAISrc_128V2F16() const { 735 return isAISrc_128F16() || isAISrc_128B32(); 736 } 737 738 bool isVISrc_128F16() const { 739 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 740 } 741 742 bool isVISrc_128V2F16() const { 743 return isVISrc_128F16() || isVISrc_128B32(); 744 } 745 746 bool isAISrc_256B64() const { 747 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 748 } 749 750 bool isAISrc_256F64() const { 751 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 752 } 753 754 bool isAISrc_512B32() const { 755 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 756 } 757 758 bool isAISrc_512B16() const { 759 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 760 } 761 762 bool isAISrc_512V2B16() const { 763 return isAISrc_512B16(); 764 } 765 766 bool isAISrc_512F32() const { 767 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 768 } 769 770 bool isAISrc_512F16() const { 771 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 772 } 773 774 bool isAISrc_512V2F16() const { 775 return isAISrc_512F16() || isAISrc_512B32(); 776 } 777 778 bool isAISrc_1024B32() const { 779 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 780 } 781 782 bool isAISrc_1024B16() const { 783 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 784 } 785 786 bool isAISrc_1024V2B16() const { 787 return isAISrc_1024B16(); 788 } 789 790 bool isAISrc_1024F32() const { 791 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 792 } 793 794 bool isAISrc_1024F16() const { 795 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 796 } 797 798 bool isAISrc_1024V2F16() const { 799 return isAISrc_1024F16() || isAISrc_1024B32(); 800 } 801 802 bool isKImmFP32() const { 803 return isLiteralImm(MVT::f32); 804 } 805 806 bool isKImmFP16() const { 807 return isLiteralImm(MVT::f16); 808 } 809 810 bool isMem() const override { 811 return false; 812 } 813 814 bool isExpr() const { 815 return Kind == Expression; 816 } 817 818 bool isSoppBrTarget() const { 819 return isExpr() || isImm(); 820 } 821 822 bool isSWaitCnt() const; 823 bool isDepCtr() const; 824 bool isSDelayAlu() const; 825 bool isHwreg() const; 826 bool isSendMsg() const; 827 bool isSwizzle() const; 828 bool isSMRDOffset8() const; 829 bool isSMEMOffset() const; 830 bool isSMRDLiteralOffset() const; 831 bool isDPP8() const; 832 bool isDPPCtrl() const; 833 bool isBLGP() const; 834 bool isCBSZ() const; 835 bool isABID() const; 836 bool isGPRIdxMode() const; 837 bool isS16Imm() const; 838 bool isU16Imm() const; 839 bool isEndpgm() const; 840 bool isWaitVDST() const; 841 bool isWaitEXP() const; 842 843 StringRef getExpressionAsToken() const { 844 assert(isExpr()); 845 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 846 return S->getSymbol().getName(); 847 } 848 849 StringRef getToken() const { 850 assert(isToken()); 851 852 if (Kind == Expression) 853 return getExpressionAsToken(); 854 855 return StringRef(Tok.Data, Tok.Length); 856 } 857 858 int64_t getImm() const { 859 assert(isImm()); 860 return Imm.Val; 861 } 862 863 void setImm(int64_t Val) { 864 assert(isImm()); 865 Imm.Val = Val; 866 } 867 868 ImmTy getImmTy() const { 869 assert(isImm()); 870 return Imm.Type; 871 } 872 873 unsigned getReg() const override { 874 assert(isRegKind()); 875 return Reg.RegNo; 876 } 877 878 SMLoc getStartLoc() const override { 879 return StartLoc; 880 } 881 882 SMLoc getEndLoc() const override { 883 return EndLoc; 884 } 885 886 SMRange getLocRange() const { 887 return SMRange(StartLoc, EndLoc); 888 } 889 890 Modifiers getModifiers() const { 891 assert(isRegKind() || isImmTy(ImmTyNone)); 892 return isRegKind() ? Reg.Mods : Imm.Mods; 893 } 894 895 void setModifiers(Modifiers Mods) { 896 assert(isRegKind() || isImmTy(ImmTyNone)); 897 if (isRegKind()) 898 Reg.Mods = Mods; 899 else 900 Imm.Mods = Mods; 901 } 902 903 bool hasModifiers() const { 904 return getModifiers().hasModifiers(); 905 } 906 907 bool hasFPModifiers() const { 908 return getModifiers().hasFPModifiers(); 909 } 910 911 bool hasIntModifiers() const { 912 return getModifiers().hasIntModifiers(); 913 } 914 915 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 916 917 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 918 919 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 920 921 template <unsigned Bitwidth> 922 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 923 924 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 925 addKImmFPOperands<16>(Inst, N); 926 } 927 928 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 929 addKImmFPOperands<32>(Inst, N); 930 } 931 932 void addRegOperands(MCInst &Inst, unsigned N) const; 933 934 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 935 addRegOperands(Inst, N); 936 } 937 938 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 939 if (isRegKind()) 940 addRegOperands(Inst, N); 941 else if (isExpr()) 942 Inst.addOperand(MCOperand::createExpr(Expr)); 943 else 944 addImmOperands(Inst, N); 945 } 946 947 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 948 Modifiers Mods = getModifiers(); 949 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 950 if (isRegKind()) { 951 addRegOperands(Inst, N); 952 } else { 953 addImmOperands(Inst, N, false); 954 } 955 } 956 957 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 958 assert(!hasIntModifiers()); 959 addRegOrImmWithInputModsOperands(Inst, N); 960 } 961 962 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 963 assert(!hasFPModifiers()); 964 addRegOrImmWithInputModsOperands(Inst, N); 965 } 966 967 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 968 Modifiers Mods = getModifiers(); 969 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 970 assert(isRegKind()); 971 addRegOperands(Inst, N); 972 } 973 974 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 975 assert(!hasIntModifiers()); 976 addRegWithInputModsOperands(Inst, N); 977 } 978 979 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 980 assert(!hasFPModifiers()); 981 addRegWithInputModsOperands(Inst, N); 982 } 983 984 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 985 if (isImm()) 986 addImmOperands(Inst, N); 987 else { 988 assert(isExpr()); 989 Inst.addOperand(MCOperand::createExpr(Expr)); 990 } 991 } 992 993 static void printImmTy(raw_ostream& OS, ImmTy Type) { 994 switch (Type) { 995 case ImmTyNone: OS << "None"; break; 996 case ImmTyGDS: OS << "GDS"; break; 997 case ImmTyLDS: OS << "LDS"; break; 998 case ImmTyOffen: OS << "Offen"; break; 999 case ImmTyIdxen: OS << "Idxen"; break; 1000 case ImmTyAddr64: OS << "Addr64"; break; 1001 case ImmTyOffset: OS << "Offset"; break; 1002 case ImmTyInstOffset: OS << "InstOffset"; break; 1003 case ImmTyOffset0: OS << "Offset0"; break; 1004 case ImmTyOffset1: OS << "Offset1"; break; 1005 case ImmTyCPol: OS << "CPol"; break; 1006 case ImmTySWZ: OS << "SWZ"; break; 1007 case ImmTyTFE: OS << "TFE"; break; 1008 case ImmTyD16: OS << "D16"; break; 1009 case ImmTyFORMAT: OS << "FORMAT"; break; 1010 case ImmTyClampSI: OS << "ClampSI"; break; 1011 case ImmTyOModSI: OS << "OModSI"; break; 1012 case ImmTyDPP8: OS << "DPP8"; break; 1013 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1014 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1015 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1016 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1017 case ImmTyDppFi: OS << "FI"; break; 1018 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1019 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1020 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1021 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1022 case ImmTyDMask: OS << "DMask"; break; 1023 case ImmTyDim: OS << "Dim"; break; 1024 case ImmTyUNorm: OS << "UNorm"; break; 1025 case ImmTyDA: OS << "DA"; break; 1026 case ImmTyR128A16: OS << "R128A16"; break; 1027 case ImmTyA16: OS << "A16"; break; 1028 case ImmTyLWE: OS << "LWE"; break; 1029 case ImmTyOff: OS << "Off"; break; 1030 case ImmTyExpTgt: OS << "ExpTgt"; break; 1031 case ImmTyExpCompr: OS << "ExpCompr"; break; 1032 case ImmTyExpVM: OS << "ExpVM"; break; 1033 case ImmTyHwreg: OS << "Hwreg"; break; 1034 case ImmTySendMsg: OS << "SendMsg"; break; 1035 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1036 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1037 case ImmTyAttrChan: OS << "AttrChan"; break; 1038 case ImmTyOpSel: OS << "OpSel"; break; 1039 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1040 case ImmTyNegLo: OS << "NegLo"; break; 1041 case ImmTyNegHi: OS << "NegHi"; break; 1042 case ImmTySwizzle: OS << "Swizzle"; break; 1043 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1044 case ImmTyHigh: OS << "High"; break; 1045 case ImmTyBLGP: OS << "BLGP"; break; 1046 case ImmTyCBSZ: OS << "CBSZ"; break; 1047 case ImmTyABID: OS << "ABID"; break; 1048 case ImmTyEndpgm: OS << "Endpgm"; break; 1049 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1050 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1051 } 1052 } 1053 1054 void print(raw_ostream &OS) const override { 1055 switch (Kind) { 1056 case Register: 1057 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1058 break; 1059 case Immediate: 1060 OS << '<' << getImm(); 1061 if (getImmTy() != ImmTyNone) { 1062 OS << " type: "; printImmTy(OS, getImmTy()); 1063 } 1064 OS << " mods: " << Imm.Mods << '>'; 1065 break; 1066 case Token: 1067 OS << '\'' << getToken() << '\''; 1068 break; 1069 case Expression: 1070 OS << "<expr " << *Expr << '>'; 1071 break; 1072 } 1073 } 1074 1075 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1076 int64_t Val, SMLoc Loc, 1077 ImmTy Type = ImmTyNone, 1078 bool IsFPImm = false) { 1079 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1080 Op->Imm.Val = Val; 1081 Op->Imm.IsFPImm = IsFPImm; 1082 Op->Imm.Kind = ImmKindTyNone; 1083 Op->Imm.Type = Type; 1084 Op->Imm.Mods = Modifiers(); 1085 Op->StartLoc = Loc; 1086 Op->EndLoc = Loc; 1087 return Op; 1088 } 1089 1090 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1091 StringRef Str, SMLoc Loc, 1092 bool HasExplicitEncodingSize = true) { 1093 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1094 Res->Tok.Data = Str.data(); 1095 Res->Tok.Length = Str.size(); 1096 Res->StartLoc = Loc; 1097 Res->EndLoc = Loc; 1098 return Res; 1099 } 1100 1101 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1102 unsigned RegNo, SMLoc S, 1103 SMLoc E) { 1104 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1105 Op->Reg.RegNo = RegNo; 1106 Op->Reg.Mods = Modifiers(); 1107 Op->StartLoc = S; 1108 Op->EndLoc = E; 1109 return Op; 1110 } 1111 1112 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1113 const class MCExpr *Expr, SMLoc S) { 1114 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1115 Op->Expr = Expr; 1116 Op->StartLoc = S; 1117 Op->EndLoc = S; 1118 return Op; 1119 } 1120 }; 1121 1122 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1123 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1124 return OS; 1125 } 1126 1127 //===----------------------------------------------------------------------===// 1128 // AsmParser 1129 //===----------------------------------------------------------------------===// 1130 1131 // Holds info related to the current kernel, e.g. count of SGPRs used. 1132 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1133 // .amdgpu_hsa_kernel or at EOF. 1134 class KernelScopeInfo { 1135 int SgprIndexUnusedMin = -1; 1136 int VgprIndexUnusedMin = -1; 1137 int AgprIndexUnusedMin = -1; 1138 MCContext *Ctx = nullptr; 1139 MCSubtargetInfo const *MSTI = nullptr; 1140 1141 void usesSgprAt(int i) { 1142 if (i >= SgprIndexUnusedMin) { 1143 SgprIndexUnusedMin = ++i; 1144 if (Ctx) { 1145 MCSymbol* const Sym = 1146 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1147 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1148 } 1149 } 1150 } 1151 1152 void usesVgprAt(int i) { 1153 if (i >= VgprIndexUnusedMin) { 1154 VgprIndexUnusedMin = ++i; 1155 if (Ctx) { 1156 MCSymbol* const Sym = 1157 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1158 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1159 VgprIndexUnusedMin); 1160 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1161 } 1162 } 1163 } 1164 1165 void usesAgprAt(int i) { 1166 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1167 if (!hasMAIInsts(*MSTI)) 1168 return; 1169 1170 if (i >= AgprIndexUnusedMin) { 1171 AgprIndexUnusedMin = ++i; 1172 if (Ctx) { 1173 MCSymbol* const Sym = 1174 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1175 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1176 1177 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1178 MCSymbol* const vSym = 1179 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1180 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1181 VgprIndexUnusedMin); 1182 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1183 } 1184 } 1185 } 1186 1187 public: 1188 KernelScopeInfo() = default; 1189 1190 void initialize(MCContext &Context) { 1191 Ctx = &Context; 1192 MSTI = Ctx->getSubtargetInfo(); 1193 1194 usesSgprAt(SgprIndexUnusedMin = -1); 1195 usesVgprAt(VgprIndexUnusedMin = -1); 1196 if (hasMAIInsts(*MSTI)) { 1197 usesAgprAt(AgprIndexUnusedMin = -1); 1198 } 1199 } 1200 1201 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1202 unsigned RegWidth) { 1203 switch (RegKind) { 1204 case IS_SGPR: 1205 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1206 break; 1207 case IS_AGPR: 1208 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1209 break; 1210 case IS_VGPR: 1211 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1212 break; 1213 default: 1214 break; 1215 } 1216 } 1217 }; 1218 1219 class AMDGPUAsmParser : public MCTargetAsmParser { 1220 MCAsmParser &Parser; 1221 1222 // Number of extra operands parsed after the first optional operand. 1223 // This may be necessary to skip hardcoded mandatory operands. 1224 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1225 1226 unsigned ForcedEncodingSize = 0; 1227 bool ForcedDPP = false; 1228 bool ForcedSDWA = false; 1229 KernelScopeInfo KernelScope; 1230 unsigned CPolSeen; 1231 1232 /// @name Auto-generated Match Functions 1233 /// { 1234 1235 #define GET_ASSEMBLER_HEADER 1236 #include "AMDGPUGenAsmMatcher.inc" 1237 1238 /// } 1239 1240 private: 1241 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1242 bool OutOfRangeError(SMRange Range); 1243 /// Calculate VGPR/SGPR blocks required for given target, reserved 1244 /// registers, and user-specified NextFreeXGPR values. 1245 /// 1246 /// \param Features [in] Target features, used for bug corrections. 1247 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1248 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1249 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1250 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1251 /// descriptor field, if valid. 1252 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1253 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1254 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1255 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1256 /// \param VGPRBlocks [out] Result VGPR block count. 1257 /// \param SGPRBlocks [out] Result SGPR block count. 1258 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1259 bool FlatScrUsed, bool XNACKUsed, 1260 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1261 SMRange VGPRRange, unsigned NextFreeSGPR, 1262 SMRange SGPRRange, unsigned &VGPRBlocks, 1263 unsigned &SGPRBlocks); 1264 bool ParseDirectiveAMDGCNTarget(); 1265 bool ParseDirectiveAMDHSAKernel(); 1266 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1267 bool ParseDirectiveHSACodeObjectVersion(); 1268 bool ParseDirectiveHSACodeObjectISA(); 1269 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1270 bool ParseDirectiveAMDKernelCodeT(); 1271 // TODO: Possibly make subtargetHasRegister const. 1272 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1273 bool ParseDirectiveAMDGPUHsaKernel(); 1274 1275 bool ParseDirectiveISAVersion(); 1276 bool ParseDirectiveHSAMetadata(); 1277 bool ParseDirectivePALMetadataBegin(); 1278 bool ParseDirectivePALMetadata(); 1279 bool ParseDirectiveAMDGPULDS(); 1280 1281 /// Common code to parse out a block of text (typically YAML) between start and 1282 /// end directives. 1283 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1284 const char *AssemblerDirectiveEnd, 1285 std::string &CollectString); 1286 1287 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1288 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1289 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1290 unsigned &RegNum, unsigned &RegWidth, 1291 bool RestoreOnFailure = false); 1292 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1293 unsigned &RegNum, unsigned &RegWidth, 1294 SmallVectorImpl<AsmToken> &Tokens); 1295 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1296 unsigned &RegWidth, 1297 SmallVectorImpl<AsmToken> &Tokens); 1298 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1299 unsigned &RegWidth, 1300 SmallVectorImpl<AsmToken> &Tokens); 1301 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1302 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1303 bool ParseRegRange(unsigned& Num, unsigned& Width); 1304 unsigned getRegularReg(RegisterKind RegKind, 1305 unsigned RegNum, 1306 unsigned RegWidth, 1307 SMLoc Loc); 1308 1309 bool isRegister(); 1310 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1311 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1312 void initializeGprCountSymbol(RegisterKind RegKind); 1313 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1314 unsigned RegWidth); 1315 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1316 bool IsAtomic, bool IsLds = false); 1317 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1318 bool IsGdsHardcoded); 1319 1320 public: 1321 enum AMDGPUMatchResultTy { 1322 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1323 }; 1324 enum OperandMode { 1325 OperandMode_Default, 1326 OperandMode_NSA, 1327 }; 1328 1329 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1330 1331 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1332 const MCInstrInfo &MII, 1333 const MCTargetOptions &Options) 1334 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1335 MCAsmParserExtension::Initialize(Parser); 1336 1337 if (getFeatureBits().none()) { 1338 // Set default features. 1339 copySTI().ToggleFeature("southern-islands"); 1340 } 1341 1342 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1343 1344 { 1345 // TODO: make those pre-defined variables read-only. 1346 // Currently there is none suitable machinery in the core llvm-mc for this. 1347 // MCSymbol::isRedefinable is intended for another purpose, and 1348 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1349 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1350 MCContext &Ctx = getContext(); 1351 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1352 MCSymbol *Sym = 1353 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1354 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1355 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1356 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1357 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1358 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1359 } else { 1360 MCSymbol *Sym = 1361 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1362 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1363 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1364 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1365 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1366 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1367 } 1368 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1369 initializeGprCountSymbol(IS_VGPR); 1370 initializeGprCountSymbol(IS_SGPR); 1371 } else 1372 KernelScope.initialize(getContext()); 1373 } 1374 } 1375 1376 bool hasMIMG_R128() const { 1377 return AMDGPU::hasMIMG_R128(getSTI()); 1378 } 1379 1380 bool hasPackedD16() const { 1381 return AMDGPU::hasPackedD16(getSTI()); 1382 } 1383 1384 bool hasGFX10A16() const { 1385 return AMDGPU::hasGFX10A16(getSTI()); 1386 } 1387 1388 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1389 1390 bool isSI() const { 1391 return AMDGPU::isSI(getSTI()); 1392 } 1393 1394 bool isCI() const { 1395 return AMDGPU::isCI(getSTI()); 1396 } 1397 1398 bool isVI() const { 1399 return AMDGPU::isVI(getSTI()); 1400 } 1401 1402 bool isGFX9() const { 1403 return AMDGPU::isGFX9(getSTI()); 1404 } 1405 1406 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1407 bool isGFX90A() const { 1408 return AMDGPU::isGFX90A(getSTI()); 1409 } 1410 1411 bool isGFX940() const { 1412 return AMDGPU::isGFX940(getSTI()); 1413 } 1414 1415 bool isGFX9Plus() const { 1416 return AMDGPU::isGFX9Plus(getSTI()); 1417 } 1418 1419 bool isGFX10() const { 1420 return AMDGPU::isGFX10(getSTI()); 1421 } 1422 1423 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1424 1425 bool isGFX11() const { 1426 return AMDGPU::isGFX11(getSTI()); 1427 } 1428 1429 bool isGFX11Plus() const { 1430 return AMDGPU::isGFX11Plus(getSTI()); 1431 } 1432 1433 bool isGFX10_BEncoding() const { 1434 return AMDGPU::isGFX10_BEncoding(getSTI()); 1435 } 1436 1437 bool hasInv2PiInlineImm() const { 1438 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1439 } 1440 1441 bool hasFlatOffsets() const { 1442 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1443 } 1444 1445 bool hasArchitectedFlatScratch() const { 1446 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1447 } 1448 1449 bool hasSGPR102_SGPR103() const { 1450 return !isVI() && !isGFX9(); 1451 } 1452 1453 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1454 1455 bool hasIntClamp() const { 1456 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1457 } 1458 1459 AMDGPUTargetStreamer &getTargetStreamer() { 1460 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1461 return static_cast<AMDGPUTargetStreamer &>(TS); 1462 } 1463 1464 const MCRegisterInfo *getMRI() const { 1465 // We need this const_cast because for some reason getContext() is not const 1466 // in MCAsmParser. 1467 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1468 } 1469 1470 const MCInstrInfo *getMII() const { 1471 return &MII; 1472 } 1473 1474 const FeatureBitset &getFeatureBits() const { 1475 return getSTI().getFeatureBits(); 1476 } 1477 1478 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1479 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1480 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1481 1482 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1483 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1484 bool isForcedDPP() const { return ForcedDPP; } 1485 bool isForcedSDWA() const { return ForcedSDWA; } 1486 ArrayRef<unsigned> getMatchedVariants() const; 1487 StringRef getMatchedVariantName() const; 1488 1489 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1490 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1491 bool RestoreOnFailure); 1492 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1493 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1494 SMLoc &EndLoc) override; 1495 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1496 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1497 unsigned Kind) override; 1498 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1499 OperandVector &Operands, MCStreamer &Out, 1500 uint64_t &ErrorInfo, 1501 bool MatchingInlineAsm) override; 1502 bool ParseDirective(AsmToken DirectiveID) override; 1503 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1504 OperandMode Mode = OperandMode_Default); 1505 StringRef parseMnemonicSuffix(StringRef Name); 1506 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1507 SMLoc NameLoc, OperandVector &Operands) override; 1508 //bool ProcessInstruction(MCInst &Inst); 1509 1510 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1511 1512 OperandMatchResultTy 1513 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1514 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1515 bool (*ConvertResult)(int64_t &) = nullptr); 1516 1517 OperandMatchResultTy 1518 parseOperandArrayWithPrefix(const char *Prefix, 1519 OperandVector &Operands, 1520 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1521 bool (*ConvertResult)(int64_t&) = nullptr); 1522 1523 OperandMatchResultTy 1524 parseNamedBit(StringRef Name, OperandVector &Operands, 1525 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1526 OperandMatchResultTy parseCPol(OperandVector &Operands); 1527 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1528 StringRef &Value, 1529 SMLoc &StringLoc); 1530 1531 bool isModifier(); 1532 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1533 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1534 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1535 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1536 bool parseSP3NegModifier(); 1537 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1538 OperandMatchResultTy parseReg(OperandVector &Operands); 1539 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1540 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1541 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1542 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1543 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1544 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1545 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1546 OperandMatchResultTy parseUfmt(int64_t &Format); 1547 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1548 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1549 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1550 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1551 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1552 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1553 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1554 1555 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1556 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1557 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1558 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1559 1560 bool parseCnt(int64_t &IntVal); 1561 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1562 1563 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1564 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1565 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1566 1567 bool parseDelay(int64_t &Delay); 1568 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands); 1569 1570 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1571 1572 private: 1573 struct OperandInfoTy { 1574 SMLoc Loc; 1575 int64_t Id; 1576 bool IsSymbolic = false; 1577 bool IsDefined = false; 1578 1579 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1580 }; 1581 1582 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1583 bool validateSendMsg(const OperandInfoTy &Msg, 1584 const OperandInfoTy &Op, 1585 const OperandInfoTy &Stream); 1586 1587 bool parseHwregBody(OperandInfoTy &HwReg, 1588 OperandInfoTy &Offset, 1589 OperandInfoTy &Width); 1590 bool validateHwreg(const OperandInfoTy &HwReg, 1591 const OperandInfoTy &Offset, 1592 const OperandInfoTy &Width); 1593 1594 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1595 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1596 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1597 1598 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1599 const OperandVector &Operands) const; 1600 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1601 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1602 SMLoc getLitLoc(const OperandVector &Operands) const; 1603 SMLoc getConstLoc(const OperandVector &Operands) const; 1604 1605 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1606 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1607 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1608 bool validateSOPLiteral(const MCInst &Inst) const; 1609 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1610 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1611 bool validateIntClampSupported(const MCInst &Inst); 1612 bool validateMIMGAtomicDMask(const MCInst &Inst); 1613 bool validateMIMGGatherDMask(const MCInst &Inst); 1614 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1615 Optional<StringRef> validateMIMGDataSize(const MCInst &Inst); 1616 bool validateMIMGAddrSize(const MCInst &Inst); 1617 bool validateMIMGD16(const MCInst &Inst); 1618 bool validateMIMGDim(const MCInst &Inst); 1619 bool validateMIMGMSAA(const MCInst &Inst); 1620 bool validateOpSel(const MCInst &Inst); 1621 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1622 bool validateVccOperand(unsigned Reg) const; 1623 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1624 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1625 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1626 bool validateAGPRLdSt(const MCInst &Inst) const; 1627 bool validateVGPRAlign(const MCInst &Inst) const; 1628 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1629 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1630 bool validateDivScale(const MCInst &Inst); 1631 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1632 const SMLoc &IDLoc); 1633 bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands, 1634 const SMLoc &IDLoc); 1635 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1636 unsigned getConstantBusLimit(unsigned Opcode) const; 1637 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1638 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1639 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1640 1641 bool isSupportedMnemo(StringRef Mnemo, 1642 const FeatureBitset &FBS); 1643 bool isSupportedMnemo(StringRef Mnemo, 1644 const FeatureBitset &FBS, 1645 ArrayRef<unsigned> Variants); 1646 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1647 1648 bool isId(const StringRef Id) const; 1649 bool isId(const AsmToken &Token, const StringRef Id) const; 1650 bool isToken(const AsmToken::TokenKind Kind) const; 1651 bool trySkipId(const StringRef Id); 1652 bool trySkipId(const StringRef Pref, const StringRef Id); 1653 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1654 bool trySkipToken(const AsmToken::TokenKind Kind); 1655 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1656 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1657 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1658 1659 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1660 AsmToken::TokenKind getTokenKind() const; 1661 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1662 bool parseExpr(OperandVector &Operands); 1663 StringRef getTokenStr() const; 1664 AsmToken peekToken(); 1665 AsmToken getToken() const; 1666 SMLoc getLoc() const; 1667 void lex(); 1668 1669 public: 1670 void onBeginOfFile() override; 1671 1672 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1673 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1674 1675 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1676 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1677 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1678 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1679 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1680 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1681 1682 bool parseSwizzleOperand(int64_t &Op, 1683 const unsigned MinVal, 1684 const unsigned MaxVal, 1685 const StringRef ErrMsg, 1686 SMLoc &Loc); 1687 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1688 const unsigned MinVal, 1689 const unsigned MaxVal, 1690 const StringRef ErrMsg); 1691 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1692 bool parseSwizzleOffset(int64_t &Imm); 1693 bool parseSwizzleMacro(int64_t &Imm); 1694 bool parseSwizzleQuadPerm(int64_t &Imm); 1695 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1696 bool parseSwizzleBroadcast(int64_t &Imm); 1697 bool parseSwizzleSwap(int64_t &Imm); 1698 bool parseSwizzleReverse(int64_t &Imm); 1699 1700 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1701 int64_t parseGPRIdxMacro(); 1702 1703 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1704 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1705 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1706 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1707 1708 AMDGPUOperand::Ptr defaultCPol() const; 1709 1710 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1711 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1712 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1713 AMDGPUOperand::Ptr defaultFlatOffset() const; 1714 1715 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1716 1717 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1718 OptionalImmIndexMap &OptionalIdx); 1719 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1720 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1721 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1722 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1723 OptionalImmIndexMap &OptionalIdx); 1724 1725 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1726 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1727 1728 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1729 bool IsAtomic = false); 1730 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1731 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1732 1733 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1734 1735 bool parseDimId(unsigned &Encoding); 1736 OperandMatchResultTy parseDim(OperandVector &Operands); 1737 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1738 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1739 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1740 int64_t parseDPPCtrlSel(StringRef Ctrl); 1741 int64_t parseDPPCtrlPerm(); 1742 AMDGPUOperand::Ptr defaultRowMask() const; 1743 AMDGPUOperand::Ptr defaultBankMask() const; 1744 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1745 AMDGPUOperand::Ptr defaultFI() const; 1746 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1747 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1748 1749 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1750 AMDGPUOperand::ImmTy Type); 1751 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1752 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1753 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1754 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1755 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1756 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1757 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1758 uint64_t BasicInstType, 1759 bool SkipDstVcc = false, 1760 bool SkipSrcVcc = false); 1761 1762 AMDGPUOperand::Ptr defaultBLGP() const; 1763 AMDGPUOperand::Ptr defaultCBSZ() const; 1764 AMDGPUOperand::Ptr defaultABID() const; 1765 1766 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1767 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1768 1769 AMDGPUOperand::Ptr defaultWaitVDST() const; 1770 AMDGPUOperand::Ptr defaultWaitEXP() const; 1771 }; 1772 1773 struct OptionalOperand { 1774 const char *Name; 1775 AMDGPUOperand::ImmTy Type; 1776 bool IsBit; 1777 bool (*ConvertResult)(int64_t&); 1778 }; 1779 1780 } // end anonymous namespace 1781 1782 // May be called with integer type with equivalent bitwidth. 1783 static const fltSemantics *getFltSemantics(unsigned Size) { 1784 switch (Size) { 1785 case 4: 1786 return &APFloat::IEEEsingle(); 1787 case 8: 1788 return &APFloat::IEEEdouble(); 1789 case 2: 1790 return &APFloat::IEEEhalf(); 1791 default: 1792 llvm_unreachable("unsupported fp type"); 1793 } 1794 } 1795 1796 static const fltSemantics *getFltSemantics(MVT VT) { 1797 return getFltSemantics(VT.getSizeInBits() / 8); 1798 } 1799 1800 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1801 switch (OperandType) { 1802 case AMDGPU::OPERAND_REG_IMM_INT32: 1803 case AMDGPU::OPERAND_REG_IMM_FP32: 1804 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1805 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1806 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1807 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1808 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1809 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1810 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1811 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1812 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1813 case AMDGPU::OPERAND_KIMM32: 1814 return &APFloat::IEEEsingle(); 1815 case AMDGPU::OPERAND_REG_IMM_INT64: 1816 case AMDGPU::OPERAND_REG_IMM_FP64: 1817 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1818 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1819 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1820 return &APFloat::IEEEdouble(); 1821 case AMDGPU::OPERAND_REG_IMM_INT16: 1822 case AMDGPU::OPERAND_REG_IMM_FP16: 1823 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1824 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1825 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1826 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1827 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1828 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1829 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1830 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1831 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1832 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1833 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1834 case AMDGPU::OPERAND_KIMM16: 1835 return &APFloat::IEEEhalf(); 1836 default: 1837 llvm_unreachable("unsupported fp type"); 1838 } 1839 } 1840 1841 //===----------------------------------------------------------------------===// 1842 // Operand 1843 //===----------------------------------------------------------------------===// 1844 1845 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1846 bool Lost; 1847 1848 // Convert literal to single precision 1849 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1850 APFloat::rmNearestTiesToEven, 1851 &Lost); 1852 // We allow precision lost but not overflow or underflow 1853 if (Status != APFloat::opOK && 1854 Lost && 1855 ((Status & APFloat::opOverflow) != 0 || 1856 (Status & APFloat::opUnderflow) != 0)) { 1857 return false; 1858 } 1859 1860 return true; 1861 } 1862 1863 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1864 return isUIntN(Size, Val) || isIntN(Size, Val); 1865 } 1866 1867 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1868 if (VT.getScalarType() == MVT::i16) { 1869 // FP immediate values are broken. 1870 return isInlinableIntLiteral(Val); 1871 } 1872 1873 // f16/v2f16 operands work correctly for all values. 1874 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1875 } 1876 1877 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1878 1879 // This is a hack to enable named inline values like 1880 // shared_base with both 32-bit and 64-bit operands. 1881 // Note that these values are defined as 1882 // 32-bit operands only. 1883 if (isInlineValue()) { 1884 return true; 1885 } 1886 1887 if (!isImmTy(ImmTyNone)) { 1888 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1889 return false; 1890 } 1891 // TODO: We should avoid using host float here. It would be better to 1892 // check the float bit values which is what a few other places do. 1893 // We've had bot failures before due to weird NaN support on mips hosts. 1894 1895 APInt Literal(64, Imm.Val); 1896 1897 if (Imm.IsFPImm) { // We got fp literal token 1898 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1899 return AMDGPU::isInlinableLiteral64(Imm.Val, 1900 AsmParser->hasInv2PiInlineImm()); 1901 } 1902 1903 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1904 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1905 return false; 1906 1907 if (type.getScalarSizeInBits() == 16) { 1908 return isInlineableLiteralOp16( 1909 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1910 type, AsmParser->hasInv2PiInlineImm()); 1911 } 1912 1913 // Check if single precision literal is inlinable 1914 return AMDGPU::isInlinableLiteral32( 1915 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1916 AsmParser->hasInv2PiInlineImm()); 1917 } 1918 1919 // We got int literal token. 1920 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1921 return AMDGPU::isInlinableLiteral64(Imm.Val, 1922 AsmParser->hasInv2PiInlineImm()); 1923 } 1924 1925 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1926 return false; 1927 } 1928 1929 if (type.getScalarSizeInBits() == 16) { 1930 return isInlineableLiteralOp16( 1931 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1932 type, AsmParser->hasInv2PiInlineImm()); 1933 } 1934 1935 return AMDGPU::isInlinableLiteral32( 1936 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1937 AsmParser->hasInv2PiInlineImm()); 1938 } 1939 1940 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1941 // Check that this immediate can be added as literal 1942 if (!isImmTy(ImmTyNone)) { 1943 return false; 1944 } 1945 1946 if (!Imm.IsFPImm) { 1947 // We got int literal token. 1948 1949 if (type == MVT::f64 && hasFPModifiers()) { 1950 // Cannot apply fp modifiers to int literals preserving the same semantics 1951 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1952 // disable these cases. 1953 return false; 1954 } 1955 1956 unsigned Size = type.getSizeInBits(); 1957 if (Size == 64) 1958 Size = 32; 1959 1960 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1961 // types. 1962 return isSafeTruncation(Imm.Val, Size); 1963 } 1964 1965 // We got fp literal token 1966 if (type == MVT::f64) { // Expected 64-bit fp operand 1967 // We would set low 64-bits of literal to zeroes but we accept this literals 1968 return true; 1969 } 1970 1971 if (type == MVT::i64) { // Expected 64-bit int operand 1972 // We don't allow fp literals in 64-bit integer instructions. It is 1973 // unclear how we should encode them. 1974 return false; 1975 } 1976 1977 // We allow fp literals with f16x2 operands assuming that the specified 1978 // literal goes into the lower half and the upper half is zero. We also 1979 // require that the literal may be losslessly converted to f16. 1980 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1981 (type == MVT::v2i16)? MVT::i16 : 1982 (type == MVT::v2f32)? MVT::f32 : type; 1983 1984 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1985 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1986 } 1987 1988 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1989 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1990 } 1991 1992 bool AMDGPUOperand::isVRegWithInputMods() const { 1993 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1994 // GFX90A allows DPP on 64-bit operands. 1995 (isRegClass(AMDGPU::VReg_64RegClassID) && 1996 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1997 } 1998 1999 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 2000 if (AsmParser->isVI()) 2001 return isVReg32(); 2002 else if (AsmParser->isGFX9Plus()) 2003 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2004 else 2005 return false; 2006 } 2007 2008 bool AMDGPUOperand::isSDWAFP16Operand() const { 2009 return isSDWAOperand(MVT::f16); 2010 } 2011 2012 bool AMDGPUOperand::isSDWAFP32Operand() const { 2013 return isSDWAOperand(MVT::f32); 2014 } 2015 2016 bool AMDGPUOperand::isSDWAInt16Operand() const { 2017 return isSDWAOperand(MVT::i16); 2018 } 2019 2020 bool AMDGPUOperand::isSDWAInt32Operand() const { 2021 return isSDWAOperand(MVT::i32); 2022 } 2023 2024 bool AMDGPUOperand::isBoolReg() const { 2025 auto FB = AsmParser->getFeatureBits(); 2026 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2027 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2028 } 2029 2030 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2031 { 2032 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2033 assert(Size == 2 || Size == 4 || Size == 8); 2034 2035 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2036 2037 if (Imm.Mods.Abs) { 2038 Val &= ~FpSignMask; 2039 } 2040 if (Imm.Mods.Neg) { 2041 Val ^= FpSignMask; 2042 } 2043 2044 return Val; 2045 } 2046 2047 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2048 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2049 Inst.getNumOperands())) { 2050 addLiteralImmOperand(Inst, Imm.Val, 2051 ApplyModifiers & 2052 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2053 } else { 2054 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2055 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2056 setImmKindNone(); 2057 } 2058 } 2059 2060 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2061 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2062 auto OpNum = Inst.getNumOperands(); 2063 // Check that this operand accepts literals 2064 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2065 2066 if (ApplyModifiers) { 2067 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2068 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2069 Val = applyInputFPModifiers(Val, Size); 2070 } 2071 2072 APInt Literal(64, Val); 2073 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2074 2075 if (Imm.IsFPImm) { // We got fp literal token 2076 switch (OpTy) { 2077 case AMDGPU::OPERAND_REG_IMM_INT64: 2078 case AMDGPU::OPERAND_REG_IMM_FP64: 2079 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2080 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2081 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2082 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2083 AsmParser->hasInv2PiInlineImm())) { 2084 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2085 setImmKindConst(); 2086 return; 2087 } 2088 2089 // Non-inlineable 2090 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2091 // For fp operands we check if low 32 bits are zeros 2092 if (Literal.getLoBits(32) != 0) { 2093 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2094 "Can't encode literal as exact 64-bit floating-point operand. " 2095 "Low 32-bits will be set to zero"); 2096 } 2097 2098 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2099 setImmKindLiteral(); 2100 return; 2101 } 2102 2103 // We don't allow fp literals in 64-bit integer instructions. It is 2104 // unclear how we should encode them. This case should be checked earlier 2105 // in predicate methods (isLiteralImm()) 2106 llvm_unreachable("fp literal in 64-bit integer instruction."); 2107 2108 case AMDGPU::OPERAND_REG_IMM_INT32: 2109 case AMDGPU::OPERAND_REG_IMM_FP32: 2110 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2111 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2112 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2113 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2114 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2115 case AMDGPU::OPERAND_REG_IMM_INT16: 2116 case AMDGPU::OPERAND_REG_IMM_FP16: 2117 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2118 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2119 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2120 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2121 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2122 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2123 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2124 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2125 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2126 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2127 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2128 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2129 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2130 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2131 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2132 case AMDGPU::OPERAND_KIMM32: 2133 case AMDGPU::OPERAND_KIMM16: { 2134 bool lost; 2135 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2136 // Convert literal to single precision 2137 FPLiteral.convert(*getOpFltSemantics(OpTy), 2138 APFloat::rmNearestTiesToEven, &lost); 2139 // We allow precision lost but not overflow or underflow. This should be 2140 // checked earlier in isLiteralImm() 2141 2142 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2143 Inst.addOperand(MCOperand::createImm(ImmVal)); 2144 setImmKindLiteral(); 2145 return; 2146 } 2147 default: 2148 llvm_unreachable("invalid operand size"); 2149 } 2150 2151 return; 2152 } 2153 2154 // We got int literal token. 2155 // Only sign extend inline immediates. 2156 switch (OpTy) { 2157 case AMDGPU::OPERAND_REG_IMM_INT32: 2158 case AMDGPU::OPERAND_REG_IMM_FP32: 2159 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2160 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2161 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2162 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2163 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2164 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2165 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2166 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2167 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2168 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2169 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2170 if (isSafeTruncation(Val, 32) && 2171 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2172 AsmParser->hasInv2PiInlineImm())) { 2173 Inst.addOperand(MCOperand::createImm(Val)); 2174 setImmKindConst(); 2175 return; 2176 } 2177 2178 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2179 setImmKindLiteral(); 2180 return; 2181 2182 case AMDGPU::OPERAND_REG_IMM_INT64: 2183 case AMDGPU::OPERAND_REG_IMM_FP64: 2184 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2185 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2186 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2187 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2188 Inst.addOperand(MCOperand::createImm(Val)); 2189 setImmKindConst(); 2190 return; 2191 } 2192 2193 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2194 setImmKindLiteral(); 2195 return; 2196 2197 case AMDGPU::OPERAND_REG_IMM_INT16: 2198 case AMDGPU::OPERAND_REG_IMM_FP16: 2199 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2200 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2201 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2202 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2203 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2204 if (isSafeTruncation(Val, 16) && 2205 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2206 AsmParser->hasInv2PiInlineImm())) { 2207 Inst.addOperand(MCOperand::createImm(Val)); 2208 setImmKindConst(); 2209 return; 2210 } 2211 2212 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2213 setImmKindLiteral(); 2214 return; 2215 2216 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2217 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2218 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2219 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2220 assert(isSafeTruncation(Val, 16)); 2221 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2222 AsmParser->hasInv2PiInlineImm())); 2223 2224 Inst.addOperand(MCOperand::createImm(Val)); 2225 return; 2226 } 2227 case AMDGPU::OPERAND_KIMM32: 2228 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2229 setImmKindNone(); 2230 return; 2231 case AMDGPU::OPERAND_KIMM16: 2232 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2233 setImmKindNone(); 2234 return; 2235 default: 2236 llvm_unreachable("invalid operand size"); 2237 } 2238 } 2239 2240 template <unsigned Bitwidth> 2241 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2242 APInt Literal(64, Imm.Val); 2243 setImmKindNone(); 2244 2245 if (!Imm.IsFPImm) { 2246 // We got int literal token. 2247 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2248 return; 2249 } 2250 2251 bool Lost; 2252 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2253 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2254 APFloat::rmNearestTiesToEven, &Lost); 2255 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2256 } 2257 2258 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2259 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2260 } 2261 2262 static bool isInlineValue(unsigned Reg) { 2263 switch (Reg) { 2264 case AMDGPU::SRC_SHARED_BASE: 2265 case AMDGPU::SRC_SHARED_LIMIT: 2266 case AMDGPU::SRC_PRIVATE_BASE: 2267 case AMDGPU::SRC_PRIVATE_LIMIT: 2268 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2269 return true; 2270 case AMDGPU::SRC_VCCZ: 2271 case AMDGPU::SRC_EXECZ: 2272 case AMDGPU::SRC_SCC: 2273 return true; 2274 case AMDGPU::SGPR_NULL: 2275 return true; 2276 default: 2277 return false; 2278 } 2279 } 2280 2281 bool AMDGPUOperand::isInlineValue() const { 2282 return isRegKind() && ::isInlineValue(getReg()); 2283 } 2284 2285 //===----------------------------------------------------------------------===// 2286 // AsmParser 2287 //===----------------------------------------------------------------------===// 2288 2289 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2290 if (Is == IS_VGPR) { 2291 switch (RegWidth) { 2292 default: return -1; 2293 case 32: 2294 return AMDGPU::VGPR_32RegClassID; 2295 case 64: 2296 return AMDGPU::VReg_64RegClassID; 2297 case 96: 2298 return AMDGPU::VReg_96RegClassID; 2299 case 128: 2300 return AMDGPU::VReg_128RegClassID; 2301 case 160: 2302 return AMDGPU::VReg_160RegClassID; 2303 case 192: 2304 return AMDGPU::VReg_192RegClassID; 2305 case 224: 2306 return AMDGPU::VReg_224RegClassID; 2307 case 256: 2308 return AMDGPU::VReg_256RegClassID; 2309 case 512: 2310 return AMDGPU::VReg_512RegClassID; 2311 case 1024: 2312 return AMDGPU::VReg_1024RegClassID; 2313 } 2314 } else if (Is == IS_TTMP) { 2315 switch (RegWidth) { 2316 default: return -1; 2317 case 32: 2318 return AMDGPU::TTMP_32RegClassID; 2319 case 64: 2320 return AMDGPU::TTMP_64RegClassID; 2321 case 128: 2322 return AMDGPU::TTMP_128RegClassID; 2323 case 256: 2324 return AMDGPU::TTMP_256RegClassID; 2325 case 512: 2326 return AMDGPU::TTMP_512RegClassID; 2327 } 2328 } else if (Is == IS_SGPR) { 2329 switch (RegWidth) { 2330 default: return -1; 2331 case 32: 2332 return AMDGPU::SGPR_32RegClassID; 2333 case 64: 2334 return AMDGPU::SGPR_64RegClassID; 2335 case 96: 2336 return AMDGPU::SGPR_96RegClassID; 2337 case 128: 2338 return AMDGPU::SGPR_128RegClassID; 2339 case 160: 2340 return AMDGPU::SGPR_160RegClassID; 2341 case 192: 2342 return AMDGPU::SGPR_192RegClassID; 2343 case 224: 2344 return AMDGPU::SGPR_224RegClassID; 2345 case 256: 2346 return AMDGPU::SGPR_256RegClassID; 2347 case 512: 2348 return AMDGPU::SGPR_512RegClassID; 2349 } 2350 } else if (Is == IS_AGPR) { 2351 switch (RegWidth) { 2352 default: return -1; 2353 case 32: 2354 return AMDGPU::AGPR_32RegClassID; 2355 case 64: 2356 return AMDGPU::AReg_64RegClassID; 2357 case 96: 2358 return AMDGPU::AReg_96RegClassID; 2359 case 128: 2360 return AMDGPU::AReg_128RegClassID; 2361 case 160: 2362 return AMDGPU::AReg_160RegClassID; 2363 case 192: 2364 return AMDGPU::AReg_192RegClassID; 2365 case 224: 2366 return AMDGPU::AReg_224RegClassID; 2367 case 256: 2368 return AMDGPU::AReg_256RegClassID; 2369 case 512: 2370 return AMDGPU::AReg_512RegClassID; 2371 case 1024: 2372 return AMDGPU::AReg_1024RegClassID; 2373 } 2374 } 2375 return -1; 2376 } 2377 2378 static unsigned getSpecialRegForName(StringRef RegName) { 2379 return StringSwitch<unsigned>(RegName) 2380 .Case("exec", AMDGPU::EXEC) 2381 .Case("vcc", AMDGPU::VCC) 2382 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2383 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2384 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2385 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2386 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2387 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2388 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2389 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2390 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2391 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2392 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2393 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2394 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2395 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2396 .Case("m0", AMDGPU::M0) 2397 .Case("vccz", AMDGPU::SRC_VCCZ) 2398 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2399 .Case("execz", AMDGPU::SRC_EXECZ) 2400 .Case("src_execz", AMDGPU::SRC_EXECZ) 2401 .Case("scc", AMDGPU::SRC_SCC) 2402 .Case("src_scc", AMDGPU::SRC_SCC) 2403 .Case("tba", AMDGPU::TBA) 2404 .Case("tma", AMDGPU::TMA) 2405 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2406 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2407 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2408 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2409 .Case("vcc_lo", AMDGPU::VCC_LO) 2410 .Case("vcc_hi", AMDGPU::VCC_HI) 2411 .Case("exec_lo", AMDGPU::EXEC_LO) 2412 .Case("exec_hi", AMDGPU::EXEC_HI) 2413 .Case("tma_lo", AMDGPU::TMA_LO) 2414 .Case("tma_hi", AMDGPU::TMA_HI) 2415 .Case("tba_lo", AMDGPU::TBA_LO) 2416 .Case("tba_hi", AMDGPU::TBA_HI) 2417 .Case("pc", AMDGPU::PC_REG) 2418 .Case("null", AMDGPU::SGPR_NULL) 2419 .Default(AMDGPU::NoRegister); 2420 } 2421 2422 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2423 SMLoc &EndLoc, bool RestoreOnFailure) { 2424 auto R = parseRegister(); 2425 if (!R) return true; 2426 assert(R->isReg()); 2427 RegNo = R->getReg(); 2428 StartLoc = R->getStartLoc(); 2429 EndLoc = R->getEndLoc(); 2430 return false; 2431 } 2432 2433 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2434 SMLoc &EndLoc) { 2435 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2436 } 2437 2438 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2439 SMLoc &StartLoc, 2440 SMLoc &EndLoc) { 2441 bool Result = 2442 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2443 bool PendingErrors = getParser().hasPendingError(); 2444 getParser().clearPendingErrors(); 2445 if (PendingErrors) 2446 return MatchOperand_ParseFail; 2447 if (Result) 2448 return MatchOperand_NoMatch; 2449 return MatchOperand_Success; 2450 } 2451 2452 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2453 RegisterKind RegKind, unsigned Reg1, 2454 SMLoc Loc) { 2455 switch (RegKind) { 2456 case IS_SPECIAL: 2457 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2458 Reg = AMDGPU::EXEC; 2459 RegWidth = 64; 2460 return true; 2461 } 2462 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2463 Reg = AMDGPU::FLAT_SCR; 2464 RegWidth = 64; 2465 return true; 2466 } 2467 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2468 Reg = AMDGPU::XNACK_MASK; 2469 RegWidth = 64; 2470 return true; 2471 } 2472 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2473 Reg = AMDGPU::VCC; 2474 RegWidth = 64; 2475 return true; 2476 } 2477 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2478 Reg = AMDGPU::TBA; 2479 RegWidth = 64; 2480 return true; 2481 } 2482 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2483 Reg = AMDGPU::TMA; 2484 RegWidth = 64; 2485 return true; 2486 } 2487 Error(Loc, "register does not fit in the list"); 2488 return false; 2489 case IS_VGPR: 2490 case IS_SGPR: 2491 case IS_AGPR: 2492 case IS_TTMP: 2493 if (Reg1 != Reg + RegWidth / 32) { 2494 Error(Loc, "registers in a list must have consecutive indices"); 2495 return false; 2496 } 2497 RegWidth += 32; 2498 return true; 2499 default: 2500 llvm_unreachable("unexpected register kind"); 2501 } 2502 } 2503 2504 struct RegInfo { 2505 StringLiteral Name; 2506 RegisterKind Kind; 2507 }; 2508 2509 static constexpr RegInfo RegularRegisters[] = { 2510 {{"v"}, IS_VGPR}, 2511 {{"s"}, IS_SGPR}, 2512 {{"ttmp"}, IS_TTMP}, 2513 {{"acc"}, IS_AGPR}, 2514 {{"a"}, IS_AGPR}, 2515 }; 2516 2517 static bool isRegularReg(RegisterKind Kind) { 2518 return Kind == IS_VGPR || 2519 Kind == IS_SGPR || 2520 Kind == IS_TTMP || 2521 Kind == IS_AGPR; 2522 } 2523 2524 static const RegInfo* getRegularRegInfo(StringRef Str) { 2525 for (const RegInfo &Reg : RegularRegisters) 2526 if (Str.startswith(Reg.Name)) 2527 return &Reg; 2528 return nullptr; 2529 } 2530 2531 static bool getRegNum(StringRef Str, unsigned& Num) { 2532 return !Str.getAsInteger(10, Num); 2533 } 2534 2535 bool 2536 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2537 const AsmToken &NextToken) const { 2538 2539 // A list of consecutive registers: [s0,s1,s2,s3] 2540 if (Token.is(AsmToken::LBrac)) 2541 return true; 2542 2543 if (!Token.is(AsmToken::Identifier)) 2544 return false; 2545 2546 // A single register like s0 or a range of registers like s[0:1] 2547 2548 StringRef Str = Token.getString(); 2549 const RegInfo *Reg = getRegularRegInfo(Str); 2550 if (Reg) { 2551 StringRef RegName = Reg->Name; 2552 StringRef RegSuffix = Str.substr(RegName.size()); 2553 if (!RegSuffix.empty()) { 2554 unsigned Num; 2555 // A single register with an index: rXX 2556 if (getRegNum(RegSuffix, Num)) 2557 return true; 2558 } else { 2559 // A range of registers: r[XX:YY]. 2560 if (NextToken.is(AsmToken::LBrac)) 2561 return true; 2562 } 2563 } 2564 2565 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2566 } 2567 2568 bool 2569 AMDGPUAsmParser::isRegister() 2570 { 2571 return isRegister(getToken(), peekToken()); 2572 } 2573 2574 unsigned 2575 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2576 unsigned RegNum, 2577 unsigned RegWidth, 2578 SMLoc Loc) { 2579 2580 assert(isRegularReg(RegKind)); 2581 2582 unsigned AlignSize = 1; 2583 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2584 // SGPR and TTMP registers must be aligned. 2585 // Max required alignment is 4 dwords. 2586 AlignSize = std::min(RegWidth / 32, 4u); 2587 } 2588 2589 if (RegNum % AlignSize != 0) { 2590 Error(Loc, "invalid register alignment"); 2591 return AMDGPU::NoRegister; 2592 } 2593 2594 unsigned RegIdx = RegNum / AlignSize; 2595 int RCID = getRegClass(RegKind, RegWidth); 2596 if (RCID == -1) { 2597 Error(Loc, "invalid or unsupported register size"); 2598 return AMDGPU::NoRegister; 2599 } 2600 2601 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2602 const MCRegisterClass RC = TRI->getRegClass(RCID); 2603 if (RegIdx >= RC.getNumRegs()) { 2604 Error(Loc, "register index is out of range"); 2605 return AMDGPU::NoRegister; 2606 } 2607 2608 return RC.getRegister(RegIdx); 2609 } 2610 2611 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2612 int64_t RegLo, RegHi; 2613 if (!skipToken(AsmToken::LBrac, "missing register index")) 2614 return false; 2615 2616 SMLoc FirstIdxLoc = getLoc(); 2617 SMLoc SecondIdxLoc; 2618 2619 if (!parseExpr(RegLo)) 2620 return false; 2621 2622 if (trySkipToken(AsmToken::Colon)) { 2623 SecondIdxLoc = getLoc(); 2624 if (!parseExpr(RegHi)) 2625 return false; 2626 } else { 2627 RegHi = RegLo; 2628 } 2629 2630 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2631 return false; 2632 2633 if (!isUInt<32>(RegLo)) { 2634 Error(FirstIdxLoc, "invalid register index"); 2635 return false; 2636 } 2637 2638 if (!isUInt<32>(RegHi)) { 2639 Error(SecondIdxLoc, "invalid register index"); 2640 return false; 2641 } 2642 2643 if (RegLo > RegHi) { 2644 Error(FirstIdxLoc, "first register index should not exceed second index"); 2645 return false; 2646 } 2647 2648 Num = static_cast<unsigned>(RegLo); 2649 RegWidth = 32 * ((RegHi - RegLo) + 1); 2650 return true; 2651 } 2652 2653 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2654 unsigned &RegNum, unsigned &RegWidth, 2655 SmallVectorImpl<AsmToken> &Tokens) { 2656 assert(isToken(AsmToken::Identifier)); 2657 unsigned Reg = getSpecialRegForName(getTokenStr()); 2658 if (Reg) { 2659 RegNum = 0; 2660 RegWidth = 32; 2661 RegKind = IS_SPECIAL; 2662 Tokens.push_back(getToken()); 2663 lex(); // skip register name 2664 } 2665 return Reg; 2666 } 2667 2668 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2669 unsigned &RegNum, unsigned &RegWidth, 2670 SmallVectorImpl<AsmToken> &Tokens) { 2671 assert(isToken(AsmToken::Identifier)); 2672 StringRef RegName = getTokenStr(); 2673 auto Loc = getLoc(); 2674 2675 const RegInfo *RI = getRegularRegInfo(RegName); 2676 if (!RI) { 2677 Error(Loc, "invalid register name"); 2678 return AMDGPU::NoRegister; 2679 } 2680 2681 Tokens.push_back(getToken()); 2682 lex(); // skip register name 2683 2684 RegKind = RI->Kind; 2685 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2686 if (!RegSuffix.empty()) { 2687 // Single 32-bit register: vXX. 2688 if (!getRegNum(RegSuffix, RegNum)) { 2689 Error(Loc, "invalid register index"); 2690 return AMDGPU::NoRegister; 2691 } 2692 RegWidth = 32; 2693 } else { 2694 // Range of registers: v[XX:YY]. ":YY" is optional. 2695 if (!ParseRegRange(RegNum, RegWidth)) 2696 return AMDGPU::NoRegister; 2697 } 2698 2699 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2700 } 2701 2702 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2703 unsigned &RegWidth, 2704 SmallVectorImpl<AsmToken> &Tokens) { 2705 unsigned Reg = AMDGPU::NoRegister; 2706 auto ListLoc = getLoc(); 2707 2708 if (!skipToken(AsmToken::LBrac, 2709 "expected a register or a list of registers")) { 2710 return AMDGPU::NoRegister; 2711 } 2712 2713 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2714 2715 auto Loc = getLoc(); 2716 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2717 return AMDGPU::NoRegister; 2718 if (RegWidth != 32) { 2719 Error(Loc, "expected a single 32-bit register"); 2720 return AMDGPU::NoRegister; 2721 } 2722 2723 for (; trySkipToken(AsmToken::Comma); ) { 2724 RegisterKind NextRegKind; 2725 unsigned NextReg, NextRegNum, NextRegWidth; 2726 Loc = getLoc(); 2727 2728 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2729 NextRegNum, NextRegWidth, 2730 Tokens)) { 2731 return AMDGPU::NoRegister; 2732 } 2733 if (NextRegWidth != 32) { 2734 Error(Loc, "expected a single 32-bit register"); 2735 return AMDGPU::NoRegister; 2736 } 2737 if (NextRegKind != RegKind) { 2738 Error(Loc, "registers in a list must be of the same kind"); 2739 return AMDGPU::NoRegister; 2740 } 2741 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2742 return AMDGPU::NoRegister; 2743 } 2744 2745 if (!skipToken(AsmToken::RBrac, 2746 "expected a comma or a closing square bracket")) { 2747 return AMDGPU::NoRegister; 2748 } 2749 2750 if (isRegularReg(RegKind)) 2751 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2752 2753 return Reg; 2754 } 2755 2756 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2757 unsigned &RegNum, unsigned &RegWidth, 2758 SmallVectorImpl<AsmToken> &Tokens) { 2759 auto Loc = getLoc(); 2760 Reg = AMDGPU::NoRegister; 2761 2762 if (isToken(AsmToken::Identifier)) { 2763 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2764 if (Reg == AMDGPU::NoRegister) 2765 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2766 } else { 2767 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2768 } 2769 2770 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2771 if (Reg == AMDGPU::NoRegister) { 2772 assert(Parser.hasPendingError()); 2773 return false; 2774 } 2775 2776 if (!subtargetHasRegister(*TRI, Reg)) { 2777 if (Reg == AMDGPU::SGPR_NULL) { 2778 Error(Loc, "'null' operand is not supported on this GPU"); 2779 } else { 2780 Error(Loc, "register not available on this GPU"); 2781 } 2782 return false; 2783 } 2784 2785 return true; 2786 } 2787 2788 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2789 unsigned &RegNum, unsigned &RegWidth, 2790 bool RestoreOnFailure /*=false*/) { 2791 Reg = AMDGPU::NoRegister; 2792 2793 SmallVector<AsmToken, 1> Tokens; 2794 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2795 if (RestoreOnFailure) { 2796 while (!Tokens.empty()) { 2797 getLexer().UnLex(Tokens.pop_back_val()); 2798 } 2799 } 2800 return true; 2801 } 2802 return false; 2803 } 2804 2805 Optional<StringRef> 2806 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2807 switch (RegKind) { 2808 case IS_VGPR: 2809 return StringRef(".amdgcn.next_free_vgpr"); 2810 case IS_SGPR: 2811 return StringRef(".amdgcn.next_free_sgpr"); 2812 default: 2813 return None; 2814 } 2815 } 2816 2817 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2818 auto SymbolName = getGprCountSymbolName(RegKind); 2819 assert(SymbolName && "initializing invalid register kind"); 2820 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2821 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2822 } 2823 2824 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2825 unsigned DwordRegIndex, 2826 unsigned RegWidth) { 2827 // Symbols are only defined for GCN targets 2828 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2829 return true; 2830 2831 auto SymbolName = getGprCountSymbolName(RegKind); 2832 if (!SymbolName) 2833 return true; 2834 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2835 2836 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2837 int64_t OldCount; 2838 2839 if (!Sym->isVariable()) 2840 return !Error(getLoc(), 2841 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2842 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2843 return !Error( 2844 getLoc(), 2845 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2846 2847 if (OldCount <= NewMax) 2848 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2849 2850 return true; 2851 } 2852 2853 std::unique_ptr<AMDGPUOperand> 2854 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2855 const auto &Tok = getToken(); 2856 SMLoc StartLoc = Tok.getLoc(); 2857 SMLoc EndLoc = Tok.getEndLoc(); 2858 RegisterKind RegKind; 2859 unsigned Reg, RegNum, RegWidth; 2860 2861 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2862 return nullptr; 2863 } 2864 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2865 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2866 return nullptr; 2867 } else 2868 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2869 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2870 } 2871 2872 OperandMatchResultTy 2873 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2874 // TODO: add syntactic sugar for 1/(2*PI) 2875 2876 assert(!isRegister()); 2877 assert(!isModifier()); 2878 2879 const auto& Tok = getToken(); 2880 const auto& NextTok = peekToken(); 2881 bool IsReal = Tok.is(AsmToken::Real); 2882 SMLoc S = getLoc(); 2883 bool Negate = false; 2884 2885 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2886 lex(); 2887 IsReal = true; 2888 Negate = true; 2889 } 2890 2891 if (IsReal) { 2892 // Floating-point expressions are not supported. 2893 // Can only allow floating-point literals with an 2894 // optional sign. 2895 2896 StringRef Num = getTokenStr(); 2897 lex(); 2898 2899 APFloat RealVal(APFloat::IEEEdouble()); 2900 auto roundMode = APFloat::rmNearestTiesToEven; 2901 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2902 return MatchOperand_ParseFail; 2903 } 2904 if (Negate) 2905 RealVal.changeSign(); 2906 2907 Operands.push_back( 2908 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2909 AMDGPUOperand::ImmTyNone, true)); 2910 2911 return MatchOperand_Success; 2912 2913 } else { 2914 int64_t IntVal; 2915 const MCExpr *Expr; 2916 SMLoc S = getLoc(); 2917 2918 if (HasSP3AbsModifier) { 2919 // This is a workaround for handling expressions 2920 // as arguments of SP3 'abs' modifier, for example: 2921 // |1.0| 2922 // |-1| 2923 // |1+x| 2924 // This syntax is not compatible with syntax of standard 2925 // MC expressions (due to the trailing '|'). 2926 SMLoc EndLoc; 2927 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2928 return MatchOperand_ParseFail; 2929 } else { 2930 if (Parser.parseExpression(Expr)) 2931 return MatchOperand_ParseFail; 2932 } 2933 2934 if (Expr->evaluateAsAbsolute(IntVal)) { 2935 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2936 } else { 2937 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2938 } 2939 2940 return MatchOperand_Success; 2941 } 2942 2943 return MatchOperand_NoMatch; 2944 } 2945 2946 OperandMatchResultTy 2947 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2948 if (!isRegister()) 2949 return MatchOperand_NoMatch; 2950 2951 if (auto R = parseRegister()) { 2952 assert(R->isReg()); 2953 Operands.push_back(std::move(R)); 2954 return MatchOperand_Success; 2955 } 2956 return MatchOperand_ParseFail; 2957 } 2958 2959 OperandMatchResultTy 2960 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2961 auto res = parseReg(Operands); 2962 if (res != MatchOperand_NoMatch) { 2963 return res; 2964 } else if (isModifier()) { 2965 return MatchOperand_NoMatch; 2966 } else { 2967 return parseImm(Operands, HasSP3AbsMod); 2968 } 2969 } 2970 2971 bool 2972 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2973 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2974 const auto &str = Token.getString(); 2975 return str == "abs" || str == "neg" || str == "sext"; 2976 } 2977 return false; 2978 } 2979 2980 bool 2981 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2982 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2983 } 2984 2985 bool 2986 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2987 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2988 } 2989 2990 bool 2991 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2992 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2993 } 2994 2995 // Check if this is an operand modifier or an opcode modifier 2996 // which may look like an expression but it is not. We should 2997 // avoid parsing these modifiers as expressions. Currently 2998 // recognized sequences are: 2999 // |...| 3000 // abs(...) 3001 // neg(...) 3002 // sext(...) 3003 // -reg 3004 // -|...| 3005 // -abs(...) 3006 // name:... 3007 // Note that simple opcode modifiers like 'gds' may be parsed as 3008 // expressions; this is a special case. See getExpressionAsToken. 3009 // 3010 bool 3011 AMDGPUAsmParser::isModifier() { 3012 3013 AsmToken Tok = getToken(); 3014 AsmToken NextToken[2]; 3015 peekTokens(NextToken); 3016 3017 return isOperandModifier(Tok, NextToken[0]) || 3018 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3019 isOpcodeModifierWithVal(Tok, NextToken[0]); 3020 } 3021 3022 // Check if the current token is an SP3 'neg' modifier. 3023 // Currently this modifier is allowed in the following context: 3024 // 3025 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3026 // 2. Before an 'abs' modifier: -abs(...) 3027 // 3. Before an SP3 'abs' modifier: -|...| 3028 // 3029 // In all other cases "-" is handled as a part 3030 // of an expression that follows the sign. 3031 // 3032 // Note: When "-" is followed by an integer literal, 3033 // this is interpreted as integer negation rather 3034 // than a floating-point NEG modifier applied to N. 3035 // Beside being contr-intuitive, such use of floating-point 3036 // NEG modifier would have resulted in different meaning 3037 // of integer literals used with VOP1/2/C and VOP3, 3038 // for example: 3039 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3040 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3041 // Negative fp literals with preceding "-" are 3042 // handled likewise for uniformity 3043 // 3044 bool 3045 AMDGPUAsmParser::parseSP3NegModifier() { 3046 3047 AsmToken NextToken[2]; 3048 peekTokens(NextToken); 3049 3050 if (isToken(AsmToken::Minus) && 3051 (isRegister(NextToken[0], NextToken[1]) || 3052 NextToken[0].is(AsmToken::Pipe) || 3053 isId(NextToken[0], "abs"))) { 3054 lex(); 3055 return true; 3056 } 3057 3058 return false; 3059 } 3060 3061 OperandMatchResultTy 3062 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3063 bool AllowImm) { 3064 bool Neg, SP3Neg; 3065 bool Abs, SP3Abs; 3066 SMLoc Loc; 3067 3068 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3069 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3070 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3071 return MatchOperand_ParseFail; 3072 } 3073 3074 SP3Neg = parseSP3NegModifier(); 3075 3076 Loc = getLoc(); 3077 Neg = trySkipId("neg"); 3078 if (Neg && SP3Neg) { 3079 Error(Loc, "expected register or immediate"); 3080 return MatchOperand_ParseFail; 3081 } 3082 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3083 return MatchOperand_ParseFail; 3084 3085 Abs = trySkipId("abs"); 3086 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3087 return MatchOperand_ParseFail; 3088 3089 Loc = getLoc(); 3090 SP3Abs = trySkipToken(AsmToken::Pipe); 3091 if (Abs && SP3Abs) { 3092 Error(Loc, "expected register or immediate"); 3093 return MatchOperand_ParseFail; 3094 } 3095 3096 OperandMatchResultTy Res; 3097 if (AllowImm) { 3098 Res = parseRegOrImm(Operands, SP3Abs); 3099 } else { 3100 Res = parseReg(Operands); 3101 } 3102 if (Res != MatchOperand_Success) { 3103 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3104 } 3105 3106 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3107 return MatchOperand_ParseFail; 3108 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3109 return MatchOperand_ParseFail; 3110 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3111 return MatchOperand_ParseFail; 3112 3113 AMDGPUOperand::Modifiers Mods; 3114 Mods.Abs = Abs || SP3Abs; 3115 Mods.Neg = Neg || SP3Neg; 3116 3117 if (Mods.hasFPModifiers()) { 3118 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3119 if (Op.isExpr()) { 3120 Error(Op.getStartLoc(), "expected an absolute expression"); 3121 return MatchOperand_ParseFail; 3122 } 3123 Op.setModifiers(Mods); 3124 } 3125 return MatchOperand_Success; 3126 } 3127 3128 OperandMatchResultTy 3129 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3130 bool AllowImm) { 3131 bool Sext = trySkipId("sext"); 3132 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3133 return MatchOperand_ParseFail; 3134 3135 OperandMatchResultTy Res; 3136 if (AllowImm) { 3137 Res = parseRegOrImm(Operands); 3138 } else { 3139 Res = parseReg(Operands); 3140 } 3141 if (Res != MatchOperand_Success) { 3142 return Sext? MatchOperand_ParseFail : Res; 3143 } 3144 3145 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3146 return MatchOperand_ParseFail; 3147 3148 AMDGPUOperand::Modifiers Mods; 3149 Mods.Sext = Sext; 3150 3151 if (Mods.hasIntModifiers()) { 3152 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3153 if (Op.isExpr()) { 3154 Error(Op.getStartLoc(), "expected an absolute expression"); 3155 return MatchOperand_ParseFail; 3156 } 3157 Op.setModifiers(Mods); 3158 } 3159 3160 return MatchOperand_Success; 3161 } 3162 3163 OperandMatchResultTy 3164 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3165 return parseRegOrImmWithFPInputMods(Operands, false); 3166 } 3167 3168 OperandMatchResultTy 3169 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3170 return parseRegOrImmWithIntInputMods(Operands, false); 3171 } 3172 3173 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3174 auto Loc = getLoc(); 3175 if (trySkipId("off")) { 3176 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3177 AMDGPUOperand::ImmTyOff, false)); 3178 return MatchOperand_Success; 3179 } 3180 3181 if (!isRegister()) 3182 return MatchOperand_NoMatch; 3183 3184 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3185 if (Reg) { 3186 Operands.push_back(std::move(Reg)); 3187 return MatchOperand_Success; 3188 } 3189 3190 return MatchOperand_ParseFail; 3191 3192 } 3193 3194 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3195 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3196 3197 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3198 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3199 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3200 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3201 return Match_InvalidOperand; 3202 3203 if ((TSFlags & SIInstrFlags::VOP3) && 3204 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3205 getForcedEncodingSize() != 64) 3206 return Match_PreferE32; 3207 3208 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3209 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3210 // v_mac_f32/16 allow only dst_sel == DWORD; 3211 auto OpNum = 3212 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3213 const auto &Op = Inst.getOperand(OpNum); 3214 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3215 return Match_InvalidOperand; 3216 } 3217 } 3218 3219 return Match_Success; 3220 } 3221 3222 static ArrayRef<unsigned> getAllVariants() { 3223 static const unsigned Variants[] = { 3224 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3225 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3226 }; 3227 3228 return makeArrayRef(Variants); 3229 } 3230 3231 // What asm variants we should check 3232 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3233 if (getForcedEncodingSize() == 32) { 3234 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3235 return makeArrayRef(Variants); 3236 } 3237 3238 if (isForcedVOP3()) { 3239 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3240 return makeArrayRef(Variants); 3241 } 3242 3243 if (isForcedSDWA()) { 3244 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3245 AMDGPUAsmVariants::SDWA9}; 3246 return makeArrayRef(Variants); 3247 } 3248 3249 if (isForcedDPP()) { 3250 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3251 return makeArrayRef(Variants); 3252 } 3253 3254 return getAllVariants(); 3255 } 3256 3257 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3258 if (getForcedEncodingSize() == 32) 3259 return "e32"; 3260 3261 if (isForcedVOP3()) 3262 return "e64"; 3263 3264 if (isForcedSDWA()) 3265 return "sdwa"; 3266 3267 if (isForcedDPP()) 3268 return "dpp"; 3269 3270 return ""; 3271 } 3272 3273 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3274 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3275 const unsigned Num = Desc.getNumImplicitUses(); 3276 for (unsigned i = 0; i < Num; ++i) { 3277 unsigned Reg = Desc.ImplicitUses[i]; 3278 switch (Reg) { 3279 case AMDGPU::FLAT_SCR: 3280 case AMDGPU::VCC: 3281 case AMDGPU::VCC_LO: 3282 case AMDGPU::VCC_HI: 3283 case AMDGPU::M0: 3284 return Reg; 3285 default: 3286 break; 3287 } 3288 } 3289 return AMDGPU::NoRegister; 3290 } 3291 3292 // NB: This code is correct only when used to check constant 3293 // bus limitations because GFX7 support no f16 inline constants. 3294 // Note that there are no cases when a GFX7 opcode violates 3295 // constant bus limitations due to the use of an f16 constant. 3296 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3297 unsigned OpIdx) const { 3298 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3299 3300 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3301 return false; 3302 } 3303 3304 const MCOperand &MO = Inst.getOperand(OpIdx); 3305 3306 int64_t Val = MO.getImm(); 3307 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3308 3309 switch (OpSize) { // expected operand size 3310 case 8: 3311 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3312 case 4: 3313 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3314 case 2: { 3315 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3316 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3317 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3318 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3319 return AMDGPU::isInlinableIntLiteral(Val); 3320 3321 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3322 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3323 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3324 return AMDGPU::isInlinableIntLiteralV216(Val); 3325 3326 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3327 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3328 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3329 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3330 3331 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3332 } 3333 default: 3334 llvm_unreachable("invalid operand size"); 3335 } 3336 } 3337 3338 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3339 if (!isGFX10Plus()) 3340 return 1; 3341 3342 switch (Opcode) { 3343 // 64-bit shift instructions can use only one scalar value input 3344 case AMDGPU::V_LSHLREV_B64_e64: 3345 case AMDGPU::V_LSHLREV_B64_gfx10: 3346 case AMDGPU::V_LSHLREV_B64_e64_gfx11: 3347 case AMDGPU::V_LSHRREV_B64_e64: 3348 case AMDGPU::V_LSHRREV_B64_gfx10: 3349 case AMDGPU::V_LSHRREV_B64_e64_gfx11: 3350 case AMDGPU::V_ASHRREV_I64_e64: 3351 case AMDGPU::V_ASHRREV_I64_gfx10: 3352 case AMDGPU::V_ASHRREV_I64_e64_gfx11: 3353 case AMDGPU::V_LSHL_B64_e64: 3354 case AMDGPU::V_LSHR_B64_e64: 3355 case AMDGPU::V_ASHR_I64_e64: 3356 return 1; 3357 default: 3358 return 2; 3359 } 3360 } 3361 3362 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3363 const MCOperand &MO = Inst.getOperand(OpIdx); 3364 if (MO.isImm()) { 3365 return !isInlineConstant(Inst, OpIdx); 3366 } else if (MO.isReg()) { 3367 auto Reg = MO.getReg(); 3368 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3369 auto PReg = mc2PseudoReg(Reg); 3370 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3371 } else { 3372 return true; 3373 } 3374 } 3375 3376 bool 3377 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3378 const OperandVector &Operands) { 3379 const unsigned Opcode = Inst.getOpcode(); 3380 const MCInstrDesc &Desc = MII.get(Opcode); 3381 unsigned LastSGPR = AMDGPU::NoRegister; 3382 unsigned ConstantBusUseCount = 0; 3383 unsigned NumLiterals = 0; 3384 unsigned LiteralSize; 3385 3386 if (Desc.TSFlags & 3387 (SIInstrFlags::VOPC | 3388 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3389 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3390 SIInstrFlags::SDWA)) { 3391 // Check special imm operands (used by madmk, etc) 3392 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3393 ++NumLiterals; 3394 LiteralSize = 4; 3395 } 3396 3397 SmallDenseSet<unsigned> SGPRsUsed; 3398 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3399 if (SGPRUsed != AMDGPU::NoRegister) { 3400 SGPRsUsed.insert(SGPRUsed); 3401 ++ConstantBusUseCount; 3402 } 3403 3404 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3405 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3406 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3407 3408 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3409 3410 for (int OpIdx : OpIndices) { 3411 if (OpIdx == -1) break; 3412 3413 const MCOperand &MO = Inst.getOperand(OpIdx); 3414 if (usesConstantBus(Inst, OpIdx)) { 3415 if (MO.isReg()) { 3416 LastSGPR = mc2PseudoReg(MO.getReg()); 3417 // Pairs of registers with a partial intersections like these 3418 // s0, s[0:1] 3419 // flat_scratch_lo, flat_scratch 3420 // flat_scratch_lo, flat_scratch_hi 3421 // are theoretically valid but they are disabled anyway. 3422 // Note that this code mimics SIInstrInfo::verifyInstruction 3423 if (!SGPRsUsed.count(LastSGPR)) { 3424 SGPRsUsed.insert(LastSGPR); 3425 ++ConstantBusUseCount; 3426 } 3427 } else { // Expression or a literal 3428 3429 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3430 continue; // special operand like VINTERP attr_chan 3431 3432 // An instruction may use only one literal. 3433 // This has been validated on the previous step. 3434 // See validateVOPLiteral. 3435 // This literal may be used as more than one operand. 3436 // If all these operands are of the same size, 3437 // this literal counts as one scalar value. 3438 // Otherwise it counts as 2 scalar values. 3439 // See "GFX10 Shader Programming", section 3.6.2.3. 3440 3441 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3442 if (Size < 4) Size = 4; 3443 3444 if (NumLiterals == 0) { 3445 NumLiterals = 1; 3446 LiteralSize = Size; 3447 } else if (LiteralSize != Size) { 3448 NumLiterals = 2; 3449 } 3450 } 3451 } 3452 } 3453 } 3454 ConstantBusUseCount += NumLiterals; 3455 3456 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3457 return true; 3458 3459 SMLoc LitLoc = getLitLoc(Operands); 3460 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3461 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3462 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3463 return false; 3464 } 3465 3466 bool 3467 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3468 const OperandVector &Operands) { 3469 const unsigned Opcode = Inst.getOpcode(); 3470 const MCInstrDesc &Desc = MII.get(Opcode); 3471 3472 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3473 if (DstIdx == -1 || 3474 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3475 return true; 3476 } 3477 3478 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3479 3480 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3481 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3482 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3483 3484 assert(DstIdx != -1); 3485 const MCOperand &Dst = Inst.getOperand(DstIdx); 3486 assert(Dst.isReg()); 3487 3488 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3489 3490 for (int SrcIdx : SrcIndices) { 3491 if (SrcIdx == -1) break; 3492 const MCOperand &Src = Inst.getOperand(SrcIdx); 3493 if (Src.isReg()) { 3494 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3495 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3496 Error(getRegLoc(SrcReg, Operands), 3497 "destination must be different than all sources"); 3498 return false; 3499 } 3500 } 3501 } 3502 3503 return true; 3504 } 3505 3506 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3507 3508 const unsigned Opc = Inst.getOpcode(); 3509 const MCInstrDesc &Desc = MII.get(Opc); 3510 3511 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3512 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3513 assert(ClampIdx != -1); 3514 return Inst.getOperand(ClampIdx).getImm() == 0; 3515 } 3516 3517 return true; 3518 } 3519 3520 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3521 3522 const unsigned Opc = Inst.getOpcode(); 3523 const MCInstrDesc &Desc = MII.get(Opc); 3524 3525 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3526 return None; 3527 3528 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3529 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3530 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3531 3532 assert(VDataIdx != -1); 3533 3534 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3535 return None; 3536 3537 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3538 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3539 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3540 if (DMask == 0) 3541 DMask = 1; 3542 3543 bool isPackedD16 = false; 3544 unsigned DataSize = 3545 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3546 if (hasPackedD16()) { 3547 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3548 isPackedD16 = D16Idx >= 0; 3549 if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) 3550 DataSize = (DataSize + 1) / 2; 3551 } 3552 3553 if ((VDataSize / 4) == DataSize + TFESize) 3554 return None; 3555 3556 return StringRef(isPackedD16 3557 ? "image data size does not match dmask, d16 and tfe" 3558 : "image data size does not match dmask and tfe"); 3559 } 3560 3561 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3562 const unsigned Opc = Inst.getOpcode(); 3563 const MCInstrDesc &Desc = MII.get(Opc); 3564 3565 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3566 return true; 3567 3568 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3569 3570 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3571 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3572 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3573 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3574 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3575 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3576 3577 assert(VAddr0Idx != -1); 3578 assert(SrsrcIdx != -1); 3579 assert(SrsrcIdx > VAddr0Idx); 3580 3581 if (DimIdx == -1) 3582 return true; // intersect_ray 3583 3584 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3585 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3586 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3587 unsigned ActualAddrSize = 3588 IsNSA ? SrsrcIdx - VAddr0Idx 3589 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3590 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3591 3592 unsigned ExpectedAddrSize = 3593 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3594 3595 if (!IsNSA) { 3596 if (ExpectedAddrSize > 8) 3597 ExpectedAddrSize = 16; 3598 3599 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3600 // This provides backward compatibility for assembly created 3601 // before 160b/192b/224b types were directly supported. 3602 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3603 return true; 3604 } 3605 3606 return ActualAddrSize == ExpectedAddrSize; 3607 } 3608 3609 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3610 3611 const unsigned Opc = Inst.getOpcode(); 3612 const MCInstrDesc &Desc = MII.get(Opc); 3613 3614 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3615 return true; 3616 if (!Desc.mayLoad() || !Desc.mayStore()) 3617 return true; // Not atomic 3618 3619 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3620 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3621 3622 // This is an incomplete check because image_atomic_cmpswap 3623 // may only use 0x3 and 0xf while other atomic operations 3624 // may use 0x1 and 0x3. However these limitations are 3625 // verified when we check that dmask matches dst size. 3626 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3627 } 3628 3629 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3630 3631 const unsigned Opc = Inst.getOpcode(); 3632 const MCInstrDesc &Desc = MII.get(Opc); 3633 3634 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3635 return true; 3636 3637 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3638 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3639 3640 // GATHER4 instructions use dmask in a different fashion compared to 3641 // other MIMG instructions. The only useful DMASK values are 3642 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3643 // (red,red,red,red) etc.) The ISA document doesn't mention 3644 // this. 3645 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3646 } 3647 3648 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3649 const unsigned Opc = Inst.getOpcode(); 3650 const MCInstrDesc &Desc = MII.get(Opc); 3651 3652 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3653 return true; 3654 3655 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3656 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3657 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3658 3659 if (!BaseOpcode->MSAA) 3660 return true; 3661 3662 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3663 assert(DimIdx != -1); 3664 3665 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3666 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3667 3668 return DimInfo->MSAA; 3669 } 3670 3671 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3672 { 3673 switch (Opcode) { 3674 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3675 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3676 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3677 return true; 3678 default: 3679 return false; 3680 } 3681 } 3682 3683 // movrels* opcodes should only allow VGPRS as src0. 3684 // This is specified in .td description for vop1/vop3, 3685 // but sdwa is handled differently. See isSDWAOperand. 3686 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3687 const OperandVector &Operands) { 3688 3689 const unsigned Opc = Inst.getOpcode(); 3690 const MCInstrDesc &Desc = MII.get(Opc); 3691 3692 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3693 return true; 3694 3695 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3696 assert(Src0Idx != -1); 3697 3698 SMLoc ErrLoc; 3699 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3700 if (Src0.isReg()) { 3701 auto Reg = mc2PseudoReg(Src0.getReg()); 3702 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3703 if (!isSGPR(Reg, TRI)) 3704 return true; 3705 ErrLoc = getRegLoc(Reg, Operands); 3706 } else { 3707 ErrLoc = getConstLoc(Operands); 3708 } 3709 3710 Error(ErrLoc, "source operand must be a VGPR"); 3711 return false; 3712 } 3713 3714 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3715 const OperandVector &Operands) { 3716 3717 const unsigned Opc = Inst.getOpcode(); 3718 3719 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3720 return true; 3721 3722 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3723 assert(Src0Idx != -1); 3724 3725 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3726 if (!Src0.isReg()) 3727 return true; 3728 3729 auto Reg = mc2PseudoReg(Src0.getReg()); 3730 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3731 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3732 Error(getRegLoc(Reg, Operands), 3733 "source operand must be either a VGPR or an inline constant"); 3734 return false; 3735 } 3736 3737 return true; 3738 } 3739 3740 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3741 const OperandVector &Operands) { 3742 const unsigned Opc = Inst.getOpcode(); 3743 const MCInstrDesc &Desc = MII.get(Opc); 3744 3745 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3746 return true; 3747 3748 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3749 if (Src2Idx == -1) 3750 return true; 3751 3752 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3753 if (!Src2.isReg()) 3754 return true; 3755 3756 MCRegister Src2Reg = Src2.getReg(); 3757 MCRegister DstReg = Inst.getOperand(0).getReg(); 3758 if (Src2Reg == DstReg) 3759 return true; 3760 3761 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3762 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3763 return true; 3764 3765 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3766 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3767 "source 2 operand must not partially overlap with dst"); 3768 return false; 3769 } 3770 3771 return true; 3772 } 3773 3774 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3775 switch (Inst.getOpcode()) { 3776 default: 3777 return true; 3778 case V_DIV_SCALE_F32_gfx6_gfx7: 3779 case V_DIV_SCALE_F32_vi: 3780 case V_DIV_SCALE_F32_gfx10: 3781 case V_DIV_SCALE_F64_gfx6_gfx7: 3782 case V_DIV_SCALE_F64_vi: 3783 case V_DIV_SCALE_F64_gfx10: 3784 break; 3785 } 3786 3787 // TODO: Check that src0 = src1 or src2. 3788 3789 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3790 AMDGPU::OpName::src2_modifiers, 3791 AMDGPU::OpName::src2_modifiers}) { 3792 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3793 .getImm() & 3794 SISrcMods::ABS) { 3795 return false; 3796 } 3797 } 3798 3799 return true; 3800 } 3801 3802 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3803 3804 const unsigned Opc = Inst.getOpcode(); 3805 const MCInstrDesc &Desc = MII.get(Opc); 3806 3807 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3808 return true; 3809 3810 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3811 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3812 if (isCI() || isSI()) 3813 return false; 3814 } 3815 3816 return true; 3817 } 3818 3819 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3820 const unsigned Opc = Inst.getOpcode(); 3821 const MCInstrDesc &Desc = MII.get(Opc); 3822 3823 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3824 return true; 3825 3826 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3827 if (DimIdx < 0) 3828 return true; 3829 3830 long Imm = Inst.getOperand(DimIdx).getImm(); 3831 if (Imm < 0 || Imm >= 8) 3832 return false; 3833 3834 return true; 3835 } 3836 3837 static bool IsRevOpcode(const unsigned Opcode) 3838 { 3839 switch (Opcode) { 3840 case AMDGPU::V_SUBREV_F32_e32: 3841 case AMDGPU::V_SUBREV_F32_e64: 3842 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3843 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3844 case AMDGPU::V_SUBREV_F32_e32_vi: 3845 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3846 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3847 case AMDGPU::V_SUBREV_F32_e64_vi: 3848 3849 case AMDGPU::V_SUBREV_CO_U32_e32: 3850 case AMDGPU::V_SUBREV_CO_U32_e64: 3851 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3852 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3853 3854 case AMDGPU::V_SUBBREV_U32_e32: 3855 case AMDGPU::V_SUBBREV_U32_e64: 3856 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3857 case AMDGPU::V_SUBBREV_U32_e32_vi: 3858 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3859 case AMDGPU::V_SUBBREV_U32_e64_vi: 3860 3861 case AMDGPU::V_SUBREV_U32_e32: 3862 case AMDGPU::V_SUBREV_U32_e64: 3863 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3864 case AMDGPU::V_SUBREV_U32_e32_vi: 3865 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3866 case AMDGPU::V_SUBREV_U32_e64_vi: 3867 3868 case AMDGPU::V_SUBREV_F16_e32: 3869 case AMDGPU::V_SUBREV_F16_e64: 3870 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3871 case AMDGPU::V_SUBREV_F16_e32_vi: 3872 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3873 case AMDGPU::V_SUBREV_F16_e64_vi: 3874 3875 case AMDGPU::V_SUBREV_U16_e32: 3876 case AMDGPU::V_SUBREV_U16_e64: 3877 case AMDGPU::V_SUBREV_U16_e32_vi: 3878 case AMDGPU::V_SUBREV_U16_e64_vi: 3879 3880 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3881 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3882 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3883 3884 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3885 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3886 3887 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3888 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3889 3890 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3891 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3892 3893 case AMDGPU::V_LSHRREV_B32_e32: 3894 case AMDGPU::V_LSHRREV_B32_e64: 3895 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3896 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3897 case AMDGPU::V_LSHRREV_B32_e32_vi: 3898 case AMDGPU::V_LSHRREV_B32_e64_vi: 3899 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3900 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3901 3902 case AMDGPU::V_ASHRREV_I32_e32: 3903 case AMDGPU::V_ASHRREV_I32_e64: 3904 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3905 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3906 case AMDGPU::V_ASHRREV_I32_e32_vi: 3907 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3908 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3909 case AMDGPU::V_ASHRREV_I32_e64_vi: 3910 3911 case AMDGPU::V_LSHLREV_B32_e32: 3912 case AMDGPU::V_LSHLREV_B32_e64: 3913 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3914 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3915 case AMDGPU::V_LSHLREV_B32_e32_vi: 3916 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3917 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3918 case AMDGPU::V_LSHLREV_B32_e64_vi: 3919 3920 case AMDGPU::V_LSHLREV_B16_e32: 3921 case AMDGPU::V_LSHLREV_B16_e64: 3922 case AMDGPU::V_LSHLREV_B16_e32_vi: 3923 case AMDGPU::V_LSHLREV_B16_e64_vi: 3924 case AMDGPU::V_LSHLREV_B16_gfx10: 3925 3926 case AMDGPU::V_LSHRREV_B16_e32: 3927 case AMDGPU::V_LSHRREV_B16_e64: 3928 case AMDGPU::V_LSHRREV_B16_e32_vi: 3929 case AMDGPU::V_LSHRREV_B16_e64_vi: 3930 case AMDGPU::V_LSHRREV_B16_gfx10: 3931 3932 case AMDGPU::V_ASHRREV_I16_e32: 3933 case AMDGPU::V_ASHRREV_I16_e64: 3934 case AMDGPU::V_ASHRREV_I16_e32_vi: 3935 case AMDGPU::V_ASHRREV_I16_e64_vi: 3936 case AMDGPU::V_ASHRREV_I16_gfx10: 3937 3938 case AMDGPU::V_LSHLREV_B64_e64: 3939 case AMDGPU::V_LSHLREV_B64_gfx10: 3940 case AMDGPU::V_LSHLREV_B64_vi: 3941 3942 case AMDGPU::V_LSHRREV_B64_e64: 3943 case AMDGPU::V_LSHRREV_B64_gfx10: 3944 case AMDGPU::V_LSHRREV_B64_vi: 3945 3946 case AMDGPU::V_ASHRREV_I64_e64: 3947 case AMDGPU::V_ASHRREV_I64_gfx10: 3948 case AMDGPU::V_ASHRREV_I64_vi: 3949 3950 case AMDGPU::V_PK_LSHLREV_B16: 3951 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3952 case AMDGPU::V_PK_LSHLREV_B16_vi: 3953 3954 case AMDGPU::V_PK_LSHRREV_B16: 3955 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3956 case AMDGPU::V_PK_LSHRREV_B16_vi: 3957 case AMDGPU::V_PK_ASHRREV_I16: 3958 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3959 case AMDGPU::V_PK_ASHRREV_I16_vi: 3960 return true; 3961 default: 3962 return false; 3963 } 3964 } 3965 3966 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3967 3968 using namespace SIInstrFlags; 3969 const unsigned Opcode = Inst.getOpcode(); 3970 const MCInstrDesc &Desc = MII.get(Opcode); 3971 3972 // lds_direct register is defined so that it can be used 3973 // with 9-bit operands only. Ignore encodings which do not accept these. 3974 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3975 if ((Desc.TSFlags & Enc) == 0) 3976 return None; 3977 3978 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3979 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3980 if (SrcIdx == -1) 3981 break; 3982 const auto &Src = Inst.getOperand(SrcIdx); 3983 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3984 3985 if (isGFX90A() || isGFX11Plus()) 3986 return StringRef("lds_direct is not supported on this GPU"); 3987 3988 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3989 return StringRef("lds_direct cannot be used with this instruction"); 3990 3991 if (SrcName != OpName::src0) 3992 return StringRef("lds_direct may be used as src0 only"); 3993 } 3994 } 3995 3996 return None; 3997 } 3998 3999 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 4000 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4001 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4002 if (Op.isFlatOffset()) 4003 return Op.getStartLoc(); 4004 } 4005 return getLoc(); 4006 } 4007 4008 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4009 const OperandVector &Operands) { 4010 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4011 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4012 return true; 4013 4014 auto Opcode = Inst.getOpcode(); 4015 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4016 assert(OpNum != -1); 4017 4018 const auto &Op = Inst.getOperand(OpNum); 4019 if (!hasFlatOffsets() && Op.getImm() != 0) { 4020 Error(getFlatOffsetLoc(Operands), 4021 "flat offset modifier is not supported on this GPU"); 4022 return false; 4023 } 4024 4025 // For FLAT segment the offset must be positive; 4026 // MSB is ignored and forced to zero. 4027 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 4028 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 4029 if (!isIntN(OffsetSize, Op.getImm())) { 4030 Error(getFlatOffsetLoc(Operands), 4031 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4032 return false; 4033 } 4034 } else { 4035 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 4036 if (!isUIntN(OffsetSize, Op.getImm())) { 4037 Error(getFlatOffsetLoc(Operands), 4038 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4039 return false; 4040 } 4041 } 4042 4043 return true; 4044 } 4045 4046 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4047 // Start with second operand because SMEM Offset cannot be dst or src0. 4048 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4049 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4050 if (Op.isSMEMOffset()) 4051 return Op.getStartLoc(); 4052 } 4053 return getLoc(); 4054 } 4055 4056 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4057 const OperandVector &Operands) { 4058 if (isCI() || isSI()) 4059 return true; 4060 4061 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4062 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4063 return true; 4064 4065 auto Opcode = Inst.getOpcode(); 4066 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4067 if (OpNum == -1) 4068 return true; 4069 4070 const auto &Op = Inst.getOperand(OpNum); 4071 if (!Op.isImm()) 4072 return true; 4073 4074 uint64_t Offset = Op.getImm(); 4075 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4076 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4077 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4078 return true; 4079 4080 Error(getSMEMOffsetLoc(Operands), 4081 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4082 "expected a 21-bit signed offset"); 4083 4084 return false; 4085 } 4086 4087 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4088 unsigned Opcode = Inst.getOpcode(); 4089 const MCInstrDesc &Desc = MII.get(Opcode); 4090 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4091 return true; 4092 4093 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4094 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4095 4096 const int OpIndices[] = { Src0Idx, Src1Idx }; 4097 4098 unsigned NumExprs = 0; 4099 unsigned NumLiterals = 0; 4100 uint32_t LiteralValue; 4101 4102 for (int OpIdx : OpIndices) { 4103 if (OpIdx == -1) break; 4104 4105 const MCOperand &MO = Inst.getOperand(OpIdx); 4106 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4107 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4108 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4109 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4110 if (NumLiterals == 0 || LiteralValue != Value) { 4111 LiteralValue = Value; 4112 ++NumLiterals; 4113 } 4114 } else if (MO.isExpr()) { 4115 ++NumExprs; 4116 } 4117 } 4118 } 4119 4120 return NumLiterals + NumExprs <= 1; 4121 } 4122 4123 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4124 const unsigned Opc = Inst.getOpcode(); 4125 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4126 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4127 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4128 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4129 4130 if (OpSel & ~3) 4131 return false; 4132 } 4133 4134 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4135 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4136 if (OpSelIdx != -1) { 4137 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4138 return false; 4139 } 4140 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4141 if (OpSelHiIdx != -1) { 4142 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4143 return false; 4144 } 4145 } 4146 4147 return true; 4148 } 4149 4150 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4151 const OperandVector &Operands) { 4152 const unsigned Opc = Inst.getOpcode(); 4153 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4154 if (DppCtrlIdx < 0) 4155 return true; 4156 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4157 4158 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4159 // DPP64 is supported for row_newbcast only. 4160 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4161 if (Src0Idx >= 0 && 4162 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4163 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4164 Error(S, "64 bit dpp only supports row_newbcast"); 4165 return false; 4166 } 4167 } 4168 4169 return true; 4170 } 4171 4172 // Check if VCC register matches wavefront size 4173 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4174 auto FB = getFeatureBits(); 4175 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4176 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4177 } 4178 4179 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4180 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4181 const OperandVector &Operands) { 4182 unsigned Opcode = Inst.getOpcode(); 4183 const MCInstrDesc &Desc = MII.get(Opcode); 4184 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4185 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4186 ImmIdx == -1) 4187 return true; 4188 4189 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4190 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4191 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4192 4193 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4194 4195 unsigned NumExprs = 0; 4196 unsigned NumLiterals = 0; 4197 uint32_t LiteralValue; 4198 4199 for (int OpIdx : OpIndices) { 4200 if (OpIdx == -1) 4201 continue; 4202 4203 const MCOperand &MO = Inst.getOperand(OpIdx); 4204 if (!MO.isImm() && !MO.isExpr()) 4205 continue; 4206 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4207 continue; 4208 4209 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4210 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4211 Error(getConstLoc(Operands), 4212 "inline constants are not allowed for this operand"); 4213 return false; 4214 } 4215 4216 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4217 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4218 if (NumLiterals == 0 || LiteralValue != Value) { 4219 LiteralValue = Value; 4220 ++NumLiterals; 4221 } 4222 } else if (MO.isExpr()) { 4223 ++NumExprs; 4224 } 4225 } 4226 NumLiterals += NumExprs; 4227 4228 if (!NumLiterals) 4229 return true; 4230 4231 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4232 Error(getLitLoc(Operands), "literal operands are not supported"); 4233 return false; 4234 } 4235 4236 if (NumLiterals > 1) { 4237 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4238 return false; 4239 } 4240 4241 return true; 4242 } 4243 4244 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4245 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4246 const MCRegisterInfo *MRI) { 4247 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4248 if (OpIdx < 0) 4249 return -1; 4250 4251 const MCOperand &Op = Inst.getOperand(OpIdx); 4252 if (!Op.isReg()) 4253 return -1; 4254 4255 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4256 auto Reg = Sub ? Sub : Op.getReg(); 4257 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4258 return AGPR32.contains(Reg) ? 1 : 0; 4259 } 4260 4261 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4262 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4263 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4264 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4265 SIInstrFlags::DS)) == 0) 4266 return true; 4267 4268 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4269 : AMDGPU::OpName::vdata; 4270 4271 const MCRegisterInfo *MRI = getMRI(); 4272 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4273 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4274 4275 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4276 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4277 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4278 return false; 4279 } 4280 4281 auto FB = getFeatureBits(); 4282 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4283 if (DataAreg < 0 || DstAreg < 0) 4284 return true; 4285 return DstAreg == DataAreg; 4286 } 4287 4288 return DstAreg < 1 && DataAreg < 1; 4289 } 4290 4291 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4292 auto FB = getFeatureBits(); 4293 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4294 return true; 4295 4296 const MCRegisterInfo *MRI = getMRI(); 4297 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4298 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4299 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4300 const MCOperand &Op = Inst.getOperand(I); 4301 if (!Op.isReg()) 4302 continue; 4303 4304 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4305 if (!Sub) 4306 continue; 4307 4308 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4309 return false; 4310 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4311 return false; 4312 } 4313 4314 return true; 4315 } 4316 4317 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4318 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4319 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4320 if (Op.isBLGP()) 4321 return Op.getStartLoc(); 4322 } 4323 return SMLoc(); 4324 } 4325 4326 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4327 const OperandVector &Operands) { 4328 unsigned Opc = Inst.getOpcode(); 4329 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4330 if (BlgpIdx == -1) 4331 return true; 4332 SMLoc BLGPLoc = getBLGPLoc(Operands); 4333 if (!BLGPLoc.isValid()) 4334 return true; 4335 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4336 auto FB = getFeatureBits(); 4337 bool UsesNeg = false; 4338 if (FB[AMDGPU::FeatureGFX940Insts]) { 4339 switch (Opc) { 4340 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4341 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4342 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4343 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4344 UsesNeg = true; 4345 } 4346 } 4347 4348 if (IsNeg == UsesNeg) 4349 return true; 4350 4351 Error(BLGPLoc, 4352 UsesNeg ? "invalid modifier: blgp is not supported" 4353 : "invalid modifier: neg is not supported"); 4354 4355 return false; 4356 } 4357 4358 // gfx90a has an undocumented limitation: 4359 // DS_GWS opcodes must use even aligned registers. 4360 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4361 const OperandVector &Operands) { 4362 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4363 return true; 4364 4365 int Opc = Inst.getOpcode(); 4366 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4367 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4368 return true; 4369 4370 const MCRegisterInfo *MRI = getMRI(); 4371 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4372 int Data0Pos = 4373 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4374 assert(Data0Pos != -1); 4375 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4376 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4377 if (RegIdx & 1) { 4378 SMLoc RegLoc = getRegLoc(Reg, Operands); 4379 Error(RegLoc, "vgpr must be even aligned"); 4380 return false; 4381 } 4382 4383 return true; 4384 } 4385 4386 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4387 const OperandVector &Operands, 4388 const SMLoc &IDLoc) { 4389 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4390 AMDGPU::OpName::cpol); 4391 if (CPolPos == -1) 4392 return true; 4393 4394 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4395 4396 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4397 if (TSFlags & SIInstrFlags::SMRD) { 4398 if (CPol && (isSI() || isCI())) { 4399 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4400 Error(S, "cache policy is not supported for SMRD instructions"); 4401 return false; 4402 } 4403 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4404 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4405 return false; 4406 } 4407 } 4408 4409 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4410 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4411 StringRef CStr(S.getPointer()); 4412 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4413 Error(S, "scc is not supported on this GPU"); 4414 return false; 4415 } 4416 4417 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4418 return true; 4419 4420 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4421 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4422 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4423 : "instruction must use glc"); 4424 return false; 4425 } 4426 } else { 4427 if (CPol & CPol::GLC) { 4428 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4429 StringRef CStr(S.getPointer()); 4430 S = SMLoc::getFromPointer( 4431 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4432 Error(S, isGFX940() ? "instruction must not use sc0" 4433 : "instruction must not use glc"); 4434 return false; 4435 } 4436 } 4437 4438 return true; 4439 } 4440 4441 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst, 4442 const OperandVector &Operands, 4443 const SMLoc &IDLoc) { 4444 if (isGFX940()) 4445 return true; 4446 4447 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4448 if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) != 4449 (SIInstrFlags::VALU | SIInstrFlags::FLAT)) 4450 return true; 4451 // This is FLAT LDS DMA. 4452 4453 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands); 4454 StringRef CStr(S.getPointer()); 4455 if (!CStr.startswith("lds")) { 4456 // This is incorrectly selected LDS DMA version of a FLAT load opcode. 4457 // And LDS version should have 'lds' modifier, but it follows optional 4458 // operands so its absense is ignored by the matcher. 4459 Error(IDLoc, "invalid operands for instruction"); 4460 return false; 4461 } 4462 4463 return true; 4464 } 4465 4466 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4467 const SMLoc &IDLoc, 4468 const OperandVector &Operands) { 4469 if (auto ErrMsg = validateLdsDirect(Inst)) { 4470 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4471 return false; 4472 } 4473 if (!validateSOPLiteral(Inst)) { 4474 Error(getLitLoc(Operands), 4475 "only one literal operand is allowed"); 4476 return false; 4477 } 4478 if (!validateVOPLiteral(Inst, Operands)) { 4479 return false; 4480 } 4481 if (!validateConstantBusLimitations(Inst, Operands)) { 4482 return false; 4483 } 4484 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4485 return false; 4486 } 4487 if (!validateIntClampSupported(Inst)) { 4488 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4489 "integer clamping is not supported on this GPU"); 4490 return false; 4491 } 4492 if (!validateOpSel(Inst)) { 4493 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4494 "invalid op_sel operand"); 4495 return false; 4496 } 4497 if (!validateDPP(Inst, Operands)) { 4498 return false; 4499 } 4500 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4501 if (!validateMIMGD16(Inst)) { 4502 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4503 "d16 modifier is not supported on this GPU"); 4504 return false; 4505 } 4506 if (!validateMIMGDim(Inst)) { 4507 Error(IDLoc, "dim modifier is required on this GPU"); 4508 return false; 4509 } 4510 if (!validateMIMGMSAA(Inst)) { 4511 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4512 "invalid dim; must be MSAA type"); 4513 return false; 4514 } 4515 if (auto ErrMsg = validateMIMGDataSize(Inst)) { 4516 Error(IDLoc, *ErrMsg); 4517 return false; 4518 } 4519 if (!validateMIMGAddrSize(Inst)) { 4520 Error(IDLoc, 4521 "image address size does not match dim and a16"); 4522 return false; 4523 } 4524 if (!validateMIMGAtomicDMask(Inst)) { 4525 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4526 "invalid atomic image dmask"); 4527 return false; 4528 } 4529 if (!validateMIMGGatherDMask(Inst)) { 4530 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4531 "invalid image_gather dmask: only one bit must be set"); 4532 return false; 4533 } 4534 if (!validateMovrels(Inst, Operands)) { 4535 return false; 4536 } 4537 if (!validateFlatOffset(Inst, Operands)) { 4538 return false; 4539 } 4540 if (!validateSMEMOffset(Inst, Operands)) { 4541 return false; 4542 } 4543 if (!validateMAIAccWrite(Inst, Operands)) { 4544 return false; 4545 } 4546 if (!validateMFMA(Inst, Operands)) { 4547 return false; 4548 } 4549 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4550 return false; 4551 } 4552 4553 if (!validateAGPRLdSt(Inst)) { 4554 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4555 ? "invalid register class: data and dst should be all VGPR or AGPR" 4556 : "invalid register class: agpr loads and stores not supported on this GPU" 4557 ); 4558 return false; 4559 } 4560 if (!validateVGPRAlign(Inst)) { 4561 Error(IDLoc, 4562 "invalid register class: vgpr tuples must be 64 bit aligned"); 4563 return false; 4564 } 4565 if (!validateGWS(Inst, Operands)) { 4566 return false; 4567 } 4568 4569 if (!validateBLGP(Inst, Operands)) { 4570 return false; 4571 } 4572 4573 if (!validateDivScale(Inst)) { 4574 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4575 return false; 4576 } 4577 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4578 return false; 4579 } 4580 4581 if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) { 4582 return false; 4583 } 4584 4585 return true; 4586 } 4587 4588 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4589 const FeatureBitset &FBS, 4590 unsigned VariantID = 0); 4591 4592 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4593 const FeatureBitset &AvailableFeatures, 4594 unsigned VariantID); 4595 4596 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4597 const FeatureBitset &FBS) { 4598 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4599 } 4600 4601 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4602 const FeatureBitset &FBS, 4603 ArrayRef<unsigned> Variants) { 4604 for (auto Variant : Variants) { 4605 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4606 return true; 4607 } 4608 4609 return false; 4610 } 4611 4612 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4613 const SMLoc &IDLoc) { 4614 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4615 4616 // Check if requested instruction variant is supported. 4617 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4618 return false; 4619 4620 // This instruction is not supported. 4621 // Clear any other pending errors because they are no longer relevant. 4622 getParser().clearPendingErrors(); 4623 4624 // Requested instruction variant is not supported. 4625 // Check if any other variants are supported. 4626 StringRef VariantName = getMatchedVariantName(); 4627 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4628 return Error(IDLoc, 4629 Twine(VariantName, 4630 " variant of this instruction is not supported")); 4631 } 4632 4633 // Finally check if this instruction is supported on any other GPU. 4634 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4635 return Error(IDLoc, "instruction not supported on this GPU"); 4636 } 4637 4638 // Instruction not supported on any GPU. Probably a typo. 4639 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4640 return Error(IDLoc, "invalid instruction" + Suggestion); 4641 } 4642 4643 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4644 OperandVector &Operands, 4645 MCStreamer &Out, 4646 uint64_t &ErrorInfo, 4647 bool MatchingInlineAsm) { 4648 MCInst Inst; 4649 unsigned Result = Match_Success; 4650 for (auto Variant : getMatchedVariants()) { 4651 uint64_t EI; 4652 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4653 Variant); 4654 // We order match statuses from least to most specific. We use most specific 4655 // status as resulting 4656 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4657 if ((R == Match_Success) || 4658 (R == Match_PreferE32) || 4659 (R == Match_MissingFeature && Result != Match_PreferE32) || 4660 (R == Match_InvalidOperand && Result != Match_MissingFeature 4661 && Result != Match_PreferE32) || 4662 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4663 && Result != Match_MissingFeature 4664 && Result != Match_PreferE32)) { 4665 Result = R; 4666 ErrorInfo = EI; 4667 } 4668 if (R == Match_Success) 4669 break; 4670 } 4671 4672 if (Result == Match_Success) { 4673 if (!validateInstruction(Inst, IDLoc, Operands)) { 4674 return true; 4675 } 4676 Inst.setLoc(IDLoc); 4677 Out.emitInstruction(Inst, getSTI()); 4678 return false; 4679 } 4680 4681 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4682 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4683 return true; 4684 } 4685 4686 switch (Result) { 4687 default: break; 4688 case Match_MissingFeature: 4689 // It has been verified that the specified instruction 4690 // mnemonic is valid. A match was found but it requires 4691 // features which are not supported on this GPU. 4692 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4693 4694 case Match_InvalidOperand: { 4695 SMLoc ErrorLoc = IDLoc; 4696 if (ErrorInfo != ~0ULL) { 4697 if (ErrorInfo >= Operands.size()) { 4698 return Error(IDLoc, "too few operands for instruction"); 4699 } 4700 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4701 if (ErrorLoc == SMLoc()) 4702 ErrorLoc = IDLoc; 4703 } 4704 return Error(ErrorLoc, "invalid operand for instruction"); 4705 } 4706 4707 case Match_PreferE32: 4708 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4709 "should be encoded as e32"); 4710 case Match_MnemonicFail: 4711 llvm_unreachable("Invalid instructions should have been handled already"); 4712 } 4713 llvm_unreachable("Implement any new match types added!"); 4714 } 4715 4716 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4717 int64_t Tmp = -1; 4718 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4719 return true; 4720 } 4721 if (getParser().parseAbsoluteExpression(Tmp)) { 4722 return true; 4723 } 4724 Ret = static_cast<uint32_t>(Tmp); 4725 return false; 4726 } 4727 4728 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4729 uint32_t &Minor) { 4730 if (ParseAsAbsoluteExpression(Major)) 4731 return TokError("invalid major version"); 4732 4733 if (!trySkipToken(AsmToken::Comma)) 4734 return TokError("minor version number required, comma expected"); 4735 4736 if (ParseAsAbsoluteExpression(Minor)) 4737 return TokError("invalid minor version"); 4738 4739 return false; 4740 } 4741 4742 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4743 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4744 return TokError("directive only supported for amdgcn architecture"); 4745 4746 std::string TargetIDDirective; 4747 SMLoc TargetStart = getTok().getLoc(); 4748 if (getParser().parseEscapedString(TargetIDDirective)) 4749 return true; 4750 4751 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4752 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4753 return getParser().Error(TargetRange.Start, 4754 (Twine(".amdgcn_target directive's target id ") + 4755 Twine(TargetIDDirective) + 4756 Twine(" does not match the specified target id ") + 4757 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4758 4759 return false; 4760 } 4761 4762 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4763 return Error(Range.Start, "value out of range", Range); 4764 } 4765 4766 bool AMDGPUAsmParser::calculateGPRBlocks( 4767 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4768 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4769 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4770 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4771 // TODO(scott.linder): These calculations are duplicated from 4772 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4773 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4774 4775 unsigned NumVGPRs = NextFreeVGPR; 4776 unsigned NumSGPRs = NextFreeSGPR; 4777 4778 if (Version.Major >= 10) 4779 NumSGPRs = 0; 4780 else { 4781 unsigned MaxAddressableNumSGPRs = 4782 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4783 4784 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4785 NumSGPRs > MaxAddressableNumSGPRs) 4786 return OutOfRangeError(SGPRRange); 4787 4788 NumSGPRs += 4789 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4790 4791 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4792 NumSGPRs > MaxAddressableNumSGPRs) 4793 return OutOfRangeError(SGPRRange); 4794 4795 if (Features.test(FeatureSGPRInitBug)) 4796 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4797 } 4798 4799 VGPRBlocks = 4800 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4801 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4802 4803 return false; 4804 } 4805 4806 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4807 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4808 return TokError("directive only supported for amdgcn architecture"); 4809 4810 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4811 return TokError("directive only supported for amdhsa OS"); 4812 4813 StringRef KernelName; 4814 if (getParser().parseIdentifier(KernelName)) 4815 return true; 4816 4817 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4818 4819 StringSet<> Seen; 4820 4821 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4822 4823 SMRange VGPRRange; 4824 uint64_t NextFreeVGPR = 0; 4825 uint64_t AccumOffset = 0; 4826 uint64_t SharedVGPRCount = 0; 4827 SMRange SGPRRange; 4828 uint64_t NextFreeSGPR = 0; 4829 4830 // Count the number of user SGPRs implied from the enabled feature bits. 4831 unsigned ImpliedUserSGPRCount = 0; 4832 4833 // Track if the asm explicitly contains the directive for the user SGPR 4834 // count. 4835 Optional<unsigned> ExplicitUserSGPRCount; 4836 bool ReserveVCC = true; 4837 bool ReserveFlatScr = true; 4838 Optional<bool> EnableWavefrontSize32; 4839 4840 while (true) { 4841 while (trySkipToken(AsmToken::EndOfStatement)); 4842 4843 StringRef ID; 4844 SMRange IDRange = getTok().getLocRange(); 4845 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4846 return true; 4847 4848 if (ID == ".end_amdhsa_kernel") 4849 break; 4850 4851 if (Seen.find(ID) != Seen.end()) 4852 return TokError(".amdhsa_ directives cannot be repeated"); 4853 Seen.insert(ID); 4854 4855 SMLoc ValStart = getLoc(); 4856 int64_t IVal; 4857 if (getParser().parseAbsoluteExpression(IVal)) 4858 return true; 4859 SMLoc ValEnd = getLoc(); 4860 SMRange ValRange = SMRange(ValStart, ValEnd); 4861 4862 if (IVal < 0) 4863 return OutOfRangeError(ValRange); 4864 4865 uint64_t Val = IVal; 4866 4867 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4868 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4869 return OutOfRangeError(RANGE); \ 4870 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4871 4872 if (ID == ".amdhsa_group_segment_fixed_size") { 4873 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4874 return OutOfRangeError(ValRange); 4875 KD.group_segment_fixed_size = Val; 4876 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4877 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4878 return OutOfRangeError(ValRange); 4879 KD.private_segment_fixed_size = Val; 4880 } else if (ID == ".amdhsa_kernarg_size") { 4881 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4882 return OutOfRangeError(ValRange); 4883 KD.kernarg_size = Val; 4884 } else if (ID == ".amdhsa_user_sgpr_count") { 4885 ExplicitUserSGPRCount = Val; 4886 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4887 if (hasArchitectedFlatScratch()) 4888 return Error(IDRange.Start, 4889 "directive is not supported with architected flat scratch", 4890 IDRange); 4891 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4892 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4893 Val, ValRange); 4894 if (Val) 4895 ImpliedUserSGPRCount += 4; 4896 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4897 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4898 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4899 ValRange); 4900 if (Val) 4901 ImpliedUserSGPRCount += 2; 4902 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4903 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4904 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4905 ValRange); 4906 if (Val) 4907 ImpliedUserSGPRCount += 2; 4908 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4909 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4910 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4911 Val, ValRange); 4912 if (Val) 4913 ImpliedUserSGPRCount += 2; 4914 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4915 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4916 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4917 ValRange); 4918 if (Val) 4919 ImpliedUserSGPRCount += 2; 4920 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4921 if (hasArchitectedFlatScratch()) 4922 return Error(IDRange.Start, 4923 "directive is not supported with architected flat scratch", 4924 IDRange); 4925 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4926 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4927 ValRange); 4928 if (Val) 4929 ImpliedUserSGPRCount += 2; 4930 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4931 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4932 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4933 Val, ValRange); 4934 if (Val) 4935 ImpliedUserSGPRCount += 1; 4936 } else if (ID == ".amdhsa_wavefront_size32") { 4937 if (IVersion.Major < 10) 4938 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4939 EnableWavefrontSize32 = Val; 4940 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4941 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4942 Val, ValRange); 4943 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4944 if (hasArchitectedFlatScratch()) 4945 return Error(IDRange.Start, 4946 "directive is not supported with architected flat scratch", 4947 IDRange); 4948 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4949 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4950 } else if (ID == ".amdhsa_enable_private_segment") { 4951 if (!hasArchitectedFlatScratch()) 4952 return Error( 4953 IDRange.Start, 4954 "directive is not supported without architected flat scratch", 4955 IDRange); 4956 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4957 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4958 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4959 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4960 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4961 ValRange); 4962 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4963 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4964 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4965 ValRange); 4966 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4967 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4968 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4969 ValRange); 4970 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4971 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4972 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4973 ValRange); 4974 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4975 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4976 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4977 ValRange); 4978 } else if (ID == ".amdhsa_next_free_vgpr") { 4979 VGPRRange = ValRange; 4980 NextFreeVGPR = Val; 4981 } else if (ID == ".amdhsa_next_free_sgpr") { 4982 SGPRRange = ValRange; 4983 NextFreeSGPR = Val; 4984 } else if (ID == ".amdhsa_accum_offset") { 4985 if (!isGFX90A()) 4986 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4987 AccumOffset = Val; 4988 } else if (ID == ".amdhsa_reserve_vcc") { 4989 if (!isUInt<1>(Val)) 4990 return OutOfRangeError(ValRange); 4991 ReserveVCC = Val; 4992 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4993 if (IVersion.Major < 7) 4994 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4995 if (hasArchitectedFlatScratch()) 4996 return Error(IDRange.Start, 4997 "directive is not supported with architected flat scratch", 4998 IDRange); 4999 if (!isUInt<1>(Val)) 5000 return OutOfRangeError(ValRange); 5001 ReserveFlatScr = Val; 5002 } else if (ID == ".amdhsa_reserve_xnack_mask") { 5003 if (IVersion.Major < 8) 5004 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 5005 if (!isUInt<1>(Val)) 5006 return OutOfRangeError(ValRange); 5007 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5008 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5009 IDRange); 5010 } else if (ID == ".amdhsa_float_round_mode_32") { 5011 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5012 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 5013 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5014 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5015 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 5016 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5017 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5018 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5019 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5020 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5021 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5022 ValRange); 5023 } else if (ID == ".amdhsa_dx10_clamp") { 5024 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5025 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 5026 } else if (ID == ".amdhsa_ieee_mode") { 5027 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 5028 Val, ValRange); 5029 } else if (ID == ".amdhsa_fp16_overflow") { 5030 if (IVersion.Major < 9) 5031 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5032 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 5033 ValRange); 5034 } else if (ID == ".amdhsa_tg_split") { 5035 if (!isGFX90A()) 5036 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5037 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5038 ValRange); 5039 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5040 if (IVersion.Major < 10) 5041 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5042 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 5043 ValRange); 5044 } else if (ID == ".amdhsa_memory_ordered") { 5045 if (IVersion.Major < 10) 5046 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5047 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 5048 ValRange); 5049 } else if (ID == ".amdhsa_forward_progress") { 5050 if (IVersion.Major < 10) 5051 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5052 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 5053 ValRange); 5054 } else if (ID == ".amdhsa_shared_vgpr_count") { 5055 if (IVersion.Major < 10) 5056 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5057 SharedVGPRCount = Val; 5058 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5059 COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val, 5060 ValRange); 5061 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5062 PARSE_BITS_ENTRY( 5063 KD.compute_pgm_rsrc2, 5064 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5065 ValRange); 5066 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5067 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5068 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5069 Val, ValRange); 5070 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5071 PARSE_BITS_ENTRY( 5072 KD.compute_pgm_rsrc2, 5073 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5074 ValRange); 5075 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5076 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5077 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5078 Val, ValRange); 5079 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5080 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5081 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5082 Val, ValRange); 5083 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5084 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5085 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5086 Val, ValRange); 5087 } else if (ID == ".amdhsa_exception_int_div_zero") { 5088 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5089 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5090 Val, ValRange); 5091 } else { 5092 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5093 } 5094 5095 #undef PARSE_BITS_ENTRY 5096 } 5097 5098 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5099 return TokError(".amdhsa_next_free_vgpr directive is required"); 5100 5101 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5102 return TokError(".amdhsa_next_free_sgpr directive is required"); 5103 5104 unsigned VGPRBlocks; 5105 unsigned SGPRBlocks; 5106 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5107 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5108 EnableWavefrontSize32, NextFreeVGPR, 5109 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5110 SGPRBlocks)) 5111 return true; 5112 5113 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5114 VGPRBlocks)) 5115 return OutOfRangeError(VGPRRange); 5116 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5117 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5118 5119 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5120 SGPRBlocks)) 5121 return OutOfRangeError(SGPRRange); 5122 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5123 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5124 SGPRBlocks); 5125 5126 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5127 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5128 "enabled user SGPRs"); 5129 5130 unsigned UserSGPRCount = 5131 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5132 5133 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5134 return TokError("too many user SGPRs enabled"); 5135 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5136 UserSGPRCount); 5137 5138 if (isGFX90A()) { 5139 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5140 return TokError(".amdhsa_accum_offset directive is required"); 5141 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5142 return TokError("accum_offset should be in range [4..256] in " 5143 "increments of 4"); 5144 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5145 return TokError("accum_offset exceeds total VGPR allocation"); 5146 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5147 (AccumOffset / 4 - 1)); 5148 } 5149 5150 if (IVersion.Major == 10) { 5151 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5152 if (SharedVGPRCount && EnableWavefrontSize32) { 5153 return TokError("shared_vgpr_count directive not valid on " 5154 "wavefront size 32"); 5155 } 5156 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5157 return TokError("shared_vgpr_count*2 + " 5158 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5159 "exceed 63\n"); 5160 } 5161 } 5162 5163 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5164 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5165 ReserveFlatScr); 5166 return false; 5167 } 5168 5169 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5170 uint32_t Major; 5171 uint32_t Minor; 5172 5173 if (ParseDirectiveMajorMinor(Major, Minor)) 5174 return true; 5175 5176 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5177 return false; 5178 } 5179 5180 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5181 uint32_t Major; 5182 uint32_t Minor; 5183 uint32_t Stepping; 5184 StringRef VendorName; 5185 StringRef ArchName; 5186 5187 // If this directive has no arguments, then use the ISA version for the 5188 // targeted GPU. 5189 if (isToken(AsmToken::EndOfStatement)) { 5190 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5191 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5192 ISA.Stepping, 5193 "AMD", "AMDGPU"); 5194 return false; 5195 } 5196 5197 if (ParseDirectiveMajorMinor(Major, Minor)) 5198 return true; 5199 5200 if (!trySkipToken(AsmToken::Comma)) 5201 return TokError("stepping version number required, comma expected"); 5202 5203 if (ParseAsAbsoluteExpression(Stepping)) 5204 return TokError("invalid stepping version"); 5205 5206 if (!trySkipToken(AsmToken::Comma)) 5207 return TokError("vendor name required, comma expected"); 5208 5209 if (!parseString(VendorName, "invalid vendor name")) 5210 return true; 5211 5212 if (!trySkipToken(AsmToken::Comma)) 5213 return TokError("arch name required, comma expected"); 5214 5215 if (!parseString(ArchName, "invalid arch name")) 5216 return true; 5217 5218 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5219 VendorName, ArchName); 5220 return false; 5221 } 5222 5223 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5224 amd_kernel_code_t &Header) { 5225 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5226 // assembly for backwards compatibility. 5227 if (ID == "max_scratch_backing_memory_byte_size") { 5228 Parser.eatToEndOfStatement(); 5229 return false; 5230 } 5231 5232 SmallString<40> ErrStr; 5233 raw_svector_ostream Err(ErrStr); 5234 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5235 return TokError(Err.str()); 5236 } 5237 Lex(); 5238 5239 if (ID == "enable_wavefront_size32") { 5240 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5241 if (!isGFX10Plus()) 5242 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5243 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5244 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5245 } else { 5246 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5247 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5248 } 5249 } 5250 5251 if (ID == "wavefront_size") { 5252 if (Header.wavefront_size == 5) { 5253 if (!isGFX10Plus()) 5254 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5255 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5256 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5257 } else if (Header.wavefront_size == 6) { 5258 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5259 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5260 } 5261 } 5262 5263 if (ID == "enable_wgp_mode") { 5264 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5265 !isGFX10Plus()) 5266 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5267 } 5268 5269 if (ID == "enable_mem_ordered") { 5270 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5271 !isGFX10Plus()) 5272 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5273 } 5274 5275 if (ID == "enable_fwd_progress") { 5276 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5277 !isGFX10Plus()) 5278 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5279 } 5280 5281 return false; 5282 } 5283 5284 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5285 amd_kernel_code_t Header; 5286 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5287 5288 while (true) { 5289 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5290 // will set the current token to EndOfStatement. 5291 while(trySkipToken(AsmToken::EndOfStatement)); 5292 5293 StringRef ID; 5294 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5295 return true; 5296 5297 if (ID == ".end_amd_kernel_code_t") 5298 break; 5299 5300 if (ParseAMDKernelCodeTValue(ID, Header)) 5301 return true; 5302 } 5303 5304 getTargetStreamer().EmitAMDKernelCodeT(Header); 5305 5306 return false; 5307 } 5308 5309 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5310 StringRef KernelName; 5311 if (!parseId(KernelName, "expected symbol name")) 5312 return true; 5313 5314 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5315 ELF::STT_AMDGPU_HSA_KERNEL); 5316 5317 KernelScope.initialize(getContext()); 5318 return false; 5319 } 5320 5321 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5322 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5323 return Error(getLoc(), 5324 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5325 "architectures"); 5326 } 5327 5328 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5329 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5330 return Error(getParser().getTok().getLoc(), "target id must match options"); 5331 5332 getTargetStreamer().EmitISAVersion(); 5333 Lex(); 5334 5335 return false; 5336 } 5337 5338 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5339 const char *AssemblerDirectiveBegin; 5340 const char *AssemblerDirectiveEnd; 5341 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5342 isHsaAbiVersion3AndAbove(&getSTI()) 5343 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5344 HSAMD::V3::AssemblerDirectiveEnd) 5345 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5346 HSAMD::AssemblerDirectiveEnd); 5347 5348 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5349 return Error(getLoc(), 5350 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5351 "not available on non-amdhsa OSes")).str()); 5352 } 5353 5354 std::string HSAMetadataString; 5355 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5356 HSAMetadataString)) 5357 return true; 5358 5359 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5360 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5361 return Error(getLoc(), "invalid HSA metadata"); 5362 } else { 5363 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5364 return Error(getLoc(), "invalid HSA metadata"); 5365 } 5366 5367 return false; 5368 } 5369 5370 /// Common code to parse out a block of text (typically YAML) between start and 5371 /// end directives. 5372 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5373 const char *AssemblerDirectiveEnd, 5374 std::string &CollectString) { 5375 5376 raw_string_ostream CollectStream(CollectString); 5377 5378 getLexer().setSkipSpace(false); 5379 5380 bool FoundEnd = false; 5381 while (!isToken(AsmToken::Eof)) { 5382 while (isToken(AsmToken::Space)) { 5383 CollectStream << getTokenStr(); 5384 Lex(); 5385 } 5386 5387 if (trySkipId(AssemblerDirectiveEnd)) { 5388 FoundEnd = true; 5389 break; 5390 } 5391 5392 CollectStream << Parser.parseStringToEndOfStatement() 5393 << getContext().getAsmInfo()->getSeparatorString(); 5394 5395 Parser.eatToEndOfStatement(); 5396 } 5397 5398 getLexer().setSkipSpace(true); 5399 5400 if (isToken(AsmToken::Eof) && !FoundEnd) { 5401 return TokError(Twine("expected directive ") + 5402 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5403 } 5404 5405 CollectStream.flush(); 5406 return false; 5407 } 5408 5409 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5410 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5411 std::string String; 5412 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5413 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5414 return true; 5415 5416 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5417 if (!PALMetadata->setFromString(String)) 5418 return Error(getLoc(), "invalid PAL metadata"); 5419 return false; 5420 } 5421 5422 /// Parse the assembler directive for old linear-format PAL metadata. 5423 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5424 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5425 return Error(getLoc(), 5426 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5427 "not available on non-amdpal OSes")).str()); 5428 } 5429 5430 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5431 PALMetadata->setLegacy(); 5432 for (;;) { 5433 uint32_t Key, Value; 5434 if (ParseAsAbsoluteExpression(Key)) { 5435 return TokError(Twine("invalid value in ") + 5436 Twine(PALMD::AssemblerDirective)); 5437 } 5438 if (!trySkipToken(AsmToken::Comma)) { 5439 return TokError(Twine("expected an even number of values in ") + 5440 Twine(PALMD::AssemblerDirective)); 5441 } 5442 if (ParseAsAbsoluteExpression(Value)) { 5443 return TokError(Twine("invalid value in ") + 5444 Twine(PALMD::AssemblerDirective)); 5445 } 5446 PALMetadata->setRegister(Key, Value); 5447 if (!trySkipToken(AsmToken::Comma)) 5448 break; 5449 } 5450 return false; 5451 } 5452 5453 /// ParseDirectiveAMDGPULDS 5454 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5455 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5456 if (getParser().checkForValidSection()) 5457 return true; 5458 5459 StringRef Name; 5460 SMLoc NameLoc = getLoc(); 5461 if (getParser().parseIdentifier(Name)) 5462 return TokError("expected identifier in directive"); 5463 5464 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5465 if (parseToken(AsmToken::Comma, "expected ','")) 5466 return true; 5467 5468 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5469 5470 int64_t Size; 5471 SMLoc SizeLoc = getLoc(); 5472 if (getParser().parseAbsoluteExpression(Size)) 5473 return true; 5474 if (Size < 0) 5475 return Error(SizeLoc, "size must be non-negative"); 5476 if (Size > LocalMemorySize) 5477 return Error(SizeLoc, "size is too large"); 5478 5479 int64_t Alignment = 4; 5480 if (trySkipToken(AsmToken::Comma)) { 5481 SMLoc AlignLoc = getLoc(); 5482 if (getParser().parseAbsoluteExpression(Alignment)) 5483 return true; 5484 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5485 return Error(AlignLoc, "alignment must be a power of two"); 5486 5487 // Alignment larger than the size of LDS is possible in theory, as long 5488 // as the linker manages to place to symbol at address 0, but we do want 5489 // to make sure the alignment fits nicely into a 32-bit integer. 5490 if (Alignment >= 1u << 31) 5491 return Error(AlignLoc, "alignment is too large"); 5492 } 5493 5494 if (parseEOL()) 5495 return true; 5496 5497 Symbol->redefineIfPossible(); 5498 if (!Symbol->isUndefined()) 5499 return Error(NameLoc, "invalid symbol redefinition"); 5500 5501 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5502 return false; 5503 } 5504 5505 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5506 StringRef IDVal = DirectiveID.getString(); 5507 5508 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5509 if (IDVal == ".amdhsa_kernel") 5510 return ParseDirectiveAMDHSAKernel(); 5511 5512 // TODO: Restructure/combine with PAL metadata directive. 5513 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5514 return ParseDirectiveHSAMetadata(); 5515 } else { 5516 if (IDVal == ".hsa_code_object_version") 5517 return ParseDirectiveHSACodeObjectVersion(); 5518 5519 if (IDVal == ".hsa_code_object_isa") 5520 return ParseDirectiveHSACodeObjectISA(); 5521 5522 if (IDVal == ".amd_kernel_code_t") 5523 return ParseDirectiveAMDKernelCodeT(); 5524 5525 if (IDVal == ".amdgpu_hsa_kernel") 5526 return ParseDirectiveAMDGPUHsaKernel(); 5527 5528 if (IDVal == ".amd_amdgpu_isa") 5529 return ParseDirectiveISAVersion(); 5530 5531 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5532 return ParseDirectiveHSAMetadata(); 5533 } 5534 5535 if (IDVal == ".amdgcn_target") 5536 return ParseDirectiveAMDGCNTarget(); 5537 5538 if (IDVal == ".amdgpu_lds") 5539 return ParseDirectiveAMDGPULDS(); 5540 5541 if (IDVal == PALMD::AssemblerDirectiveBegin) 5542 return ParseDirectivePALMetadataBegin(); 5543 5544 if (IDVal == PALMD::AssemblerDirective) 5545 return ParseDirectivePALMetadata(); 5546 5547 return true; 5548 } 5549 5550 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5551 unsigned RegNo) { 5552 5553 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5554 return isGFX9Plus(); 5555 5556 // GFX10 has 2 more SGPRs 104 and 105. 5557 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5558 return hasSGPR104_SGPR105(); 5559 5560 switch (RegNo) { 5561 case AMDGPU::SRC_SHARED_BASE: 5562 case AMDGPU::SRC_SHARED_LIMIT: 5563 case AMDGPU::SRC_PRIVATE_BASE: 5564 case AMDGPU::SRC_PRIVATE_LIMIT: 5565 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5566 return isGFX9Plus(); 5567 case AMDGPU::TBA: 5568 case AMDGPU::TBA_LO: 5569 case AMDGPU::TBA_HI: 5570 case AMDGPU::TMA: 5571 case AMDGPU::TMA_LO: 5572 case AMDGPU::TMA_HI: 5573 return !isGFX9Plus(); 5574 case AMDGPU::XNACK_MASK: 5575 case AMDGPU::XNACK_MASK_LO: 5576 case AMDGPU::XNACK_MASK_HI: 5577 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5578 case AMDGPU::SGPR_NULL: 5579 return isGFX10Plus(); 5580 default: 5581 break; 5582 } 5583 5584 if (isCI()) 5585 return true; 5586 5587 if (isSI() || isGFX10Plus()) { 5588 // No flat_scr on SI. 5589 // On GFX10 flat scratch is not a valid register operand and can only be 5590 // accessed with s_setreg/s_getreg. 5591 switch (RegNo) { 5592 case AMDGPU::FLAT_SCR: 5593 case AMDGPU::FLAT_SCR_LO: 5594 case AMDGPU::FLAT_SCR_HI: 5595 return false; 5596 default: 5597 return true; 5598 } 5599 } 5600 5601 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5602 // SI/CI have. 5603 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5604 return hasSGPR102_SGPR103(); 5605 5606 return true; 5607 } 5608 5609 OperandMatchResultTy 5610 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5611 OperandMode Mode) { 5612 // Try to parse with a custom parser 5613 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5614 5615 // If we successfully parsed the operand or if there as an error parsing, 5616 // we are done. 5617 // 5618 // If we are parsing after we reach EndOfStatement then this means we 5619 // are appending default values to the Operands list. This is only done 5620 // by custom parser, so we shouldn't continue on to the generic parsing. 5621 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5622 isToken(AsmToken::EndOfStatement)) 5623 return ResTy; 5624 5625 SMLoc RBraceLoc; 5626 SMLoc LBraceLoc = getLoc(); 5627 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5628 unsigned Prefix = Operands.size(); 5629 5630 for (;;) { 5631 auto Loc = getLoc(); 5632 ResTy = parseReg(Operands); 5633 if (ResTy == MatchOperand_NoMatch) 5634 Error(Loc, "expected a register"); 5635 if (ResTy != MatchOperand_Success) 5636 return MatchOperand_ParseFail; 5637 5638 RBraceLoc = getLoc(); 5639 if (trySkipToken(AsmToken::RBrac)) 5640 break; 5641 5642 if (!skipToken(AsmToken::Comma, 5643 "expected a comma or a closing square bracket")) { 5644 return MatchOperand_ParseFail; 5645 } 5646 } 5647 5648 if (Operands.size() - Prefix > 1) { 5649 Operands.insert(Operands.begin() + Prefix, 5650 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5651 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5652 } 5653 5654 return MatchOperand_Success; 5655 } 5656 5657 return parseRegOrImm(Operands); 5658 } 5659 5660 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5661 // Clear any forced encodings from the previous instruction. 5662 setForcedEncodingSize(0); 5663 setForcedDPP(false); 5664 setForcedSDWA(false); 5665 5666 if (Name.endswith("_e64")) { 5667 setForcedEncodingSize(64); 5668 return Name.substr(0, Name.size() - 4); 5669 } else if (Name.endswith("_e32")) { 5670 setForcedEncodingSize(32); 5671 return Name.substr(0, Name.size() - 4); 5672 } else if (Name.endswith("_dpp")) { 5673 setForcedDPP(true); 5674 return Name.substr(0, Name.size() - 4); 5675 } else if (Name.endswith("_sdwa")) { 5676 setForcedSDWA(true); 5677 return Name.substr(0, Name.size() - 5); 5678 } 5679 return Name; 5680 } 5681 5682 static void applyMnemonicAliases(StringRef &Mnemonic, 5683 const FeatureBitset &Features, 5684 unsigned VariantID); 5685 5686 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5687 StringRef Name, 5688 SMLoc NameLoc, OperandVector &Operands) { 5689 // Add the instruction mnemonic 5690 Name = parseMnemonicSuffix(Name); 5691 5692 // If the target architecture uses MnemonicAlias, call it here to parse 5693 // operands correctly. 5694 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 5695 5696 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5697 5698 bool IsMIMG = Name.startswith("image_"); 5699 5700 while (!trySkipToken(AsmToken::EndOfStatement)) { 5701 OperandMode Mode = OperandMode_Default; 5702 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5703 Mode = OperandMode_NSA; 5704 CPolSeen = 0; 5705 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5706 5707 if (Res != MatchOperand_Success) { 5708 checkUnsupportedInstruction(Name, NameLoc); 5709 if (!Parser.hasPendingError()) { 5710 // FIXME: use real operand location rather than the current location. 5711 StringRef Msg = 5712 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5713 "not a valid operand."; 5714 Error(getLoc(), Msg); 5715 } 5716 while (!trySkipToken(AsmToken::EndOfStatement)) { 5717 lex(); 5718 } 5719 return true; 5720 } 5721 5722 // Eat the comma or space if there is one. 5723 trySkipToken(AsmToken::Comma); 5724 } 5725 5726 return false; 5727 } 5728 5729 //===----------------------------------------------------------------------===// 5730 // Utility functions 5731 //===----------------------------------------------------------------------===// 5732 5733 OperandMatchResultTy 5734 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5735 5736 if (!trySkipId(Prefix, AsmToken::Colon)) 5737 return MatchOperand_NoMatch; 5738 5739 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5740 } 5741 5742 OperandMatchResultTy 5743 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5744 AMDGPUOperand::ImmTy ImmTy, 5745 bool (*ConvertResult)(int64_t&)) { 5746 SMLoc S = getLoc(); 5747 int64_t Value = 0; 5748 5749 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5750 if (Res != MatchOperand_Success) 5751 return Res; 5752 5753 if (ConvertResult && !ConvertResult(Value)) { 5754 Error(S, "invalid " + StringRef(Prefix) + " value."); 5755 } 5756 5757 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5758 return MatchOperand_Success; 5759 } 5760 5761 OperandMatchResultTy 5762 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5763 OperandVector &Operands, 5764 AMDGPUOperand::ImmTy ImmTy, 5765 bool (*ConvertResult)(int64_t&)) { 5766 SMLoc S = getLoc(); 5767 if (!trySkipId(Prefix, AsmToken::Colon)) 5768 return MatchOperand_NoMatch; 5769 5770 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5771 return MatchOperand_ParseFail; 5772 5773 unsigned Val = 0; 5774 const unsigned MaxSize = 4; 5775 5776 // FIXME: How to verify the number of elements matches the number of src 5777 // operands? 5778 for (int I = 0; ; ++I) { 5779 int64_t Op; 5780 SMLoc Loc = getLoc(); 5781 if (!parseExpr(Op)) 5782 return MatchOperand_ParseFail; 5783 5784 if (Op != 0 && Op != 1) { 5785 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5786 return MatchOperand_ParseFail; 5787 } 5788 5789 Val |= (Op << I); 5790 5791 if (trySkipToken(AsmToken::RBrac)) 5792 break; 5793 5794 if (I + 1 == MaxSize) { 5795 Error(getLoc(), "expected a closing square bracket"); 5796 return MatchOperand_ParseFail; 5797 } 5798 5799 if (!skipToken(AsmToken::Comma, "expected a comma")) 5800 return MatchOperand_ParseFail; 5801 } 5802 5803 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5804 return MatchOperand_Success; 5805 } 5806 5807 OperandMatchResultTy 5808 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5809 AMDGPUOperand::ImmTy ImmTy) { 5810 int64_t Bit; 5811 SMLoc S = getLoc(); 5812 5813 if (trySkipId(Name)) { 5814 Bit = 1; 5815 } else if (trySkipId("no", Name)) { 5816 Bit = 0; 5817 } else { 5818 return MatchOperand_NoMatch; 5819 } 5820 5821 if (Name == "r128" && !hasMIMG_R128()) { 5822 Error(S, "r128 modifier is not supported on this GPU"); 5823 return MatchOperand_ParseFail; 5824 } 5825 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5826 Error(S, "a16 modifier is not supported on this GPU"); 5827 return MatchOperand_ParseFail; 5828 } 5829 5830 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5831 ImmTy = AMDGPUOperand::ImmTyR128A16; 5832 5833 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5834 return MatchOperand_Success; 5835 } 5836 5837 OperandMatchResultTy 5838 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5839 unsigned CPolOn = 0; 5840 unsigned CPolOff = 0; 5841 SMLoc S = getLoc(); 5842 5843 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5844 if (isGFX940() && !Mnemo.startswith("s_")) { 5845 if (trySkipId("sc0")) 5846 CPolOn = AMDGPU::CPol::SC0; 5847 else if (trySkipId("nosc0")) 5848 CPolOff = AMDGPU::CPol::SC0; 5849 else if (trySkipId("nt")) 5850 CPolOn = AMDGPU::CPol::NT; 5851 else if (trySkipId("nont")) 5852 CPolOff = AMDGPU::CPol::NT; 5853 else if (trySkipId("sc1")) 5854 CPolOn = AMDGPU::CPol::SC1; 5855 else if (trySkipId("nosc1")) 5856 CPolOff = AMDGPU::CPol::SC1; 5857 else 5858 return MatchOperand_NoMatch; 5859 } 5860 else if (trySkipId("glc")) 5861 CPolOn = AMDGPU::CPol::GLC; 5862 else if (trySkipId("noglc")) 5863 CPolOff = AMDGPU::CPol::GLC; 5864 else if (trySkipId("slc")) 5865 CPolOn = AMDGPU::CPol::SLC; 5866 else if (trySkipId("noslc")) 5867 CPolOff = AMDGPU::CPol::SLC; 5868 else if (trySkipId("dlc")) 5869 CPolOn = AMDGPU::CPol::DLC; 5870 else if (trySkipId("nodlc")) 5871 CPolOff = AMDGPU::CPol::DLC; 5872 else if (trySkipId("scc")) 5873 CPolOn = AMDGPU::CPol::SCC; 5874 else if (trySkipId("noscc")) 5875 CPolOff = AMDGPU::CPol::SCC; 5876 else 5877 return MatchOperand_NoMatch; 5878 5879 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5880 Error(S, "dlc modifier is not supported on this GPU"); 5881 return MatchOperand_ParseFail; 5882 } 5883 5884 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5885 Error(S, "scc modifier is not supported on this GPU"); 5886 return MatchOperand_ParseFail; 5887 } 5888 5889 if (CPolSeen & (CPolOn | CPolOff)) { 5890 Error(S, "duplicate cache policy modifier"); 5891 return MatchOperand_ParseFail; 5892 } 5893 5894 CPolSeen |= (CPolOn | CPolOff); 5895 5896 for (unsigned I = 1; I != Operands.size(); ++I) { 5897 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5898 if (Op.isCPol()) { 5899 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5900 return MatchOperand_Success; 5901 } 5902 } 5903 5904 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5905 AMDGPUOperand::ImmTyCPol)); 5906 5907 return MatchOperand_Success; 5908 } 5909 5910 static void addOptionalImmOperand( 5911 MCInst& Inst, const OperandVector& Operands, 5912 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5913 AMDGPUOperand::ImmTy ImmT, 5914 int64_t Default = 0) { 5915 auto i = OptionalIdx.find(ImmT); 5916 if (i != OptionalIdx.end()) { 5917 unsigned Idx = i->second; 5918 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5919 } else { 5920 Inst.addOperand(MCOperand::createImm(Default)); 5921 } 5922 } 5923 5924 OperandMatchResultTy 5925 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5926 StringRef &Value, 5927 SMLoc &StringLoc) { 5928 if (!trySkipId(Prefix, AsmToken::Colon)) 5929 return MatchOperand_NoMatch; 5930 5931 StringLoc = getLoc(); 5932 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5933 : MatchOperand_ParseFail; 5934 } 5935 5936 //===----------------------------------------------------------------------===// 5937 // MTBUF format 5938 //===----------------------------------------------------------------------===// 5939 5940 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5941 int64_t MaxVal, 5942 int64_t &Fmt) { 5943 int64_t Val; 5944 SMLoc Loc = getLoc(); 5945 5946 auto Res = parseIntWithPrefix(Pref, Val); 5947 if (Res == MatchOperand_ParseFail) 5948 return false; 5949 if (Res == MatchOperand_NoMatch) 5950 return true; 5951 5952 if (Val < 0 || Val > MaxVal) { 5953 Error(Loc, Twine("out of range ", StringRef(Pref))); 5954 return false; 5955 } 5956 5957 Fmt = Val; 5958 return true; 5959 } 5960 5961 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5962 // values to live in a joint format operand in the MCInst encoding. 5963 OperandMatchResultTy 5964 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5965 using namespace llvm::AMDGPU::MTBUFFormat; 5966 5967 int64_t Dfmt = DFMT_UNDEF; 5968 int64_t Nfmt = NFMT_UNDEF; 5969 5970 // dfmt and nfmt can appear in either order, and each is optional. 5971 for (int I = 0; I < 2; ++I) { 5972 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5973 return MatchOperand_ParseFail; 5974 5975 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5976 return MatchOperand_ParseFail; 5977 } 5978 // Skip optional comma between dfmt/nfmt 5979 // but guard against 2 commas following each other. 5980 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5981 !peekToken().is(AsmToken::Comma)) { 5982 trySkipToken(AsmToken::Comma); 5983 } 5984 } 5985 5986 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5987 return MatchOperand_NoMatch; 5988 5989 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5990 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5991 5992 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5993 return MatchOperand_Success; 5994 } 5995 5996 OperandMatchResultTy 5997 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5998 using namespace llvm::AMDGPU::MTBUFFormat; 5999 6000 int64_t Fmt = UFMT_UNDEF; 6001 6002 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 6003 return MatchOperand_ParseFail; 6004 6005 if (Fmt == UFMT_UNDEF) 6006 return MatchOperand_NoMatch; 6007 6008 Format = Fmt; 6009 return MatchOperand_Success; 6010 } 6011 6012 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6013 int64_t &Nfmt, 6014 StringRef FormatStr, 6015 SMLoc Loc) { 6016 using namespace llvm::AMDGPU::MTBUFFormat; 6017 int64_t Format; 6018 6019 Format = getDfmt(FormatStr); 6020 if (Format != DFMT_UNDEF) { 6021 Dfmt = Format; 6022 return true; 6023 } 6024 6025 Format = getNfmt(FormatStr, getSTI()); 6026 if (Format != NFMT_UNDEF) { 6027 Nfmt = Format; 6028 return true; 6029 } 6030 6031 Error(Loc, "unsupported format"); 6032 return false; 6033 } 6034 6035 OperandMatchResultTy 6036 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6037 SMLoc FormatLoc, 6038 int64_t &Format) { 6039 using namespace llvm::AMDGPU::MTBUFFormat; 6040 6041 int64_t Dfmt = DFMT_UNDEF; 6042 int64_t Nfmt = NFMT_UNDEF; 6043 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6044 return MatchOperand_ParseFail; 6045 6046 if (trySkipToken(AsmToken::Comma)) { 6047 StringRef Str; 6048 SMLoc Loc = getLoc(); 6049 if (!parseId(Str, "expected a format string") || 6050 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 6051 return MatchOperand_ParseFail; 6052 } 6053 if (Dfmt == DFMT_UNDEF) { 6054 Error(Loc, "duplicate numeric format"); 6055 return MatchOperand_ParseFail; 6056 } else if (Nfmt == NFMT_UNDEF) { 6057 Error(Loc, "duplicate data format"); 6058 return MatchOperand_ParseFail; 6059 } 6060 } 6061 6062 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6063 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6064 6065 if (isGFX10Plus()) { 6066 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6067 if (Ufmt == UFMT_UNDEF) { 6068 Error(FormatLoc, "unsupported format"); 6069 return MatchOperand_ParseFail; 6070 } 6071 Format = Ufmt; 6072 } else { 6073 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6074 } 6075 6076 return MatchOperand_Success; 6077 } 6078 6079 OperandMatchResultTy 6080 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6081 SMLoc Loc, 6082 int64_t &Format) { 6083 using namespace llvm::AMDGPU::MTBUFFormat; 6084 6085 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6086 if (Id == UFMT_UNDEF) 6087 return MatchOperand_NoMatch; 6088 6089 if (!isGFX10Plus()) { 6090 Error(Loc, "unified format is not supported on this GPU"); 6091 return MatchOperand_ParseFail; 6092 } 6093 6094 Format = Id; 6095 return MatchOperand_Success; 6096 } 6097 6098 OperandMatchResultTy 6099 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6100 using namespace llvm::AMDGPU::MTBUFFormat; 6101 SMLoc Loc = getLoc(); 6102 6103 if (!parseExpr(Format)) 6104 return MatchOperand_ParseFail; 6105 if (!isValidFormatEncoding(Format, getSTI())) { 6106 Error(Loc, "out of range format"); 6107 return MatchOperand_ParseFail; 6108 } 6109 6110 return MatchOperand_Success; 6111 } 6112 6113 OperandMatchResultTy 6114 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6115 using namespace llvm::AMDGPU::MTBUFFormat; 6116 6117 if (!trySkipId("format", AsmToken::Colon)) 6118 return MatchOperand_NoMatch; 6119 6120 if (trySkipToken(AsmToken::LBrac)) { 6121 StringRef FormatStr; 6122 SMLoc Loc = getLoc(); 6123 if (!parseId(FormatStr, "expected a format string")) 6124 return MatchOperand_ParseFail; 6125 6126 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6127 if (Res == MatchOperand_NoMatch) 6128 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6129 if (Res != MatchOperand_Success) 6130 return Res; 6131 6132 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6133 return MatchOperand_ParseFail; 6134 6135 return MatchOperand_Success; 6136 } 6137 6138 return parseNumericFormat(Format); 6139 } 6140 6141 OperandMatchResultTy 6142 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6143 using namespace llvm::AMDGPU::MTBUFFormat; 6144 6145 int64_t Format = getDefaultFormatEncoding(getSTI()); 6146 OperandMatchResultTy Res; 6147 SMLoc Loc = getLoc(); 6148 6149 // Parse legacy format syntax. 6150 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6151 if (Res == MatchOperand_ParseFail) 6152 return Res; 6153 6154 bool FormatFound = (Res == MatchOperand_Success); 6155 6156 Operands.push_back( 6157 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6158 6159 if (FormatFound) 6160 trySkipToken(AsmToken::Comma); 6161 6162 if (isToken(AsmToken::EndOfStatement)) { 6163 // We are expecting an soffset operand, 6164 // but let matcher handle the error. 6165 return MatchOperand_Success; 6166 } 6167 6168 // Parse soffset. 6169 Res = parseRegOrImm(Operands); 6170 if (Res != MatchOperand_Success) 6171 return Res; 6172 6173 trySkipToken(AsmToken::Comma); 6174 6175 if (!FormatFound) { 6176 Res = parseSymbolicOrNumericFormat(Format); 6177 if (Res == MatchOperand_ParseFail) 6178 return Res; 6179 if (Res == MatchOperand_Success) { 6180 auto Size = Operands.size(); 6181 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6182 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6183 Op.setImm(Format); 6184 } 6185 return MatchOperand_Success; 6186 } 6187 6188 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6189 Error(getLoc(), "duplicate format"); 6190 return MatchOperand_ParseFail; 6191 } 6192 return MatchOperand_Success; 6193 } 6194 6195 //===----------------------------------------------------------------------===// 6196 // ds 6197 //===----------------------------------------------------------------------===// 6198 6199 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6200 const OperandVector &Operands) { 6201 OptionalImmIndexMap OptionalIdx; 6202 6203 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6204 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6205 6206 // Add the register arguments 6207 if (Op.isReg()) { 6208 Op.addRegOperands(Inst, 1); 6209 continue; 6210 } 6211 6212 // Handle optional arguments 6213 OptionalIdx[Op.getImmTy()] = i; 6214 } 6215 6216 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6217 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6218 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6219 6220 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6221 } 6222 6223 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6224 bool IsGdsHardcoded) { 6225 OptionalImmIndexMap OptionalIdx; 6226 6227 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6228 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6229 6230 // Add the register arguments 6231 if (Op.isReg()) { 6232 Op.addRegOperands(Inst, 1); 6233 continue; 6234 } 6235 6236 if (Op.isToken() && Op.getToken() == "gds") { 6237 IsGdsHardcoded = true; 6238 continue; 6239 } 6240 6241 // Handle optional arguments 6242 OptionalIdx[Op.getImmTy()] = i; 6243 } 6244 6245 AMDGPUOperand::ImmTy OffsetType = 6246 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6247 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6248 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6249 AMDGPUOperand::ImmTyOffset; 6250 6251 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6252 6253 if (!IsGdsHardcoded) { 6254 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6255 } 6256 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6257 } 6258 6259 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6260 OptionalImmIndexMap OptionalIdx; 6261 6262 unsigned OperandIdx[4]; 6263 unsigned EnMask = 0; 6264 int SrcIdx = 0; 6265 6266 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6267 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6268 6269 // Add the register arguments 6270 if (Op.isReg()) { 6271 assert(SrcIdx < 4); 6272 OperandIdx[SrcIdx] = Inst.size(); 6273 Op.addRegOperands(Inst, 1); 6274 ++SrcIdx; 6275 continue; 6276 } 6277 6278 if (Op.isOff()) { 6279 assert(SrcIdx < 4); 6280 OperandIdx[SrcIdx] = Inst.size(); 6281 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6282 ++SrcIdx; 6283 continue; 6284 } 6285 6286 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6287 Op.addImmOperands(Inst, 1); 6288 continue; 6289 } 6290 6291 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 6292 continue; 6293 6294 // Handle optional arguments 6295 OptionalIdx[Op.getImmTy()] = i; 6296 } 6297 6298 assert(SrcIdx == 4); 6299 6300 bool Compr = false; 6301 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6302 Compr = true; 6303 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6304 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6305 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6306 } 6307 6308 for (auto i = 0; i < SrcIdx; ++i) { 6309 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6310 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6311 } 6312 } 6313 6314 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6315 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6316 6317 Inst.addOperand(MCOperand::createImm(EnMask)); 6318 } 6319 6320 //===----------------------------------------------------------------------===// 6321 // s_waitcnt 6322 //===----------------------------------------------------------------------===// 6323 6324 static bool 6325 encodeCnt( 6326 const AMDGPU::IsaVersion ISA, 6327 int64_t &IntVal, 6328 int64_t CntVal, 6329 bool Saturate, 6330 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6331 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6332 { 6333 bool Failed = false; 6334 6335 IntVal = encode(ISA, IntVal, CntVal); 6336 if (CntVal != decode(ISA, IntVal)) { 6337 if (Saturate) { 6338 IntVal = encode(ISA, IntVal, -1); 6339 } else { 6340 Failed = true; 6341 } 6342 } 6343 return Failed; 6344 } 6345 6346 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6347 6348 SMLoc CntLoc = getLoc(); 6349 StringRef CntName = getTokenStr(); 6350 6351 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6352 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6353 return false; 6354 6355 int64_t CntVal; 6356 SMLoc ValLoc = getLoc(); 6357 if (!parseExpr(CntVal)) 6358 return false; 6359 6360 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6361 6362 bool Failed = true; 6363 bool Sat = CntName.endswith("_sat"); 6364 6365 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6366 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6367 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6368 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6369 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6370 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6371 } else { 6372 Error(CntLoc, "invalid counter name " + CntName); 6373 return false; 6374 } 6375 6376 if (Failed) { 6377 Error(ValLoc, "too large value for " + CntName); 6378 return false; 6379 } 6380 6381 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6382 return false; 6383 6384 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6385 if (isToken(AsmToken::EndOfStatement)) { 6386 Error(getLoc(), "expected a counter name"); 6387 return false; 6388 } 6389 } 6390 6391 return true; 6392 } 6393 6394 OperandMatchResultTy 6395 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6396 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6397 int64_t Waitcnt = getWaitcntBitMask(ISA); 6398 SMLoc S = getLoc(); 6399 6400 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6401 while (!isToken(AsmToken::EndOfStatement)) { 6402 if (!parseCnt(Waitcnt)) 6403 return MatchOperand_ParseFail; 6404 } 6405 } else { 6406 if (!parseExpr(Waitcnt)) 6407 return MatchOperand_ParseFail; 6408 } 6409 6410 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6411 return MatchOperand_Success; 6412 } 6413 6414 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6415 SMLoc FieldLoc = getLoc(); 6416 StringRef FieldName = getTokenStr(); 6417 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6418 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6419 return false; 6420 6421 SMLoc ValueLoc = getLoc(); 6422 StringRef ValueName = getTokenStr(); 6423 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6424 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6425 return false; 6426 6427 unsigned Shift; 6428 if (FieldName == "instid0") { 6429 Shift = 0; 6430 } else if (FieldName == "instskip") { 6431 Shift = 4; 6432 } else if (FieldName == "instid1") { 6433 Shift = 7; 6434 } else { 6435 Error(FieldLoc, "invalid field name " + FieldName); 6436 return false; 6437 } 6438 6439 int Value; 6440 if (Shift == 4) { 6441 // Parse values for instskip. 6442 Value = StringSwitch<int>(ValueName) 6443 .Case("SAME", 0) 6444 .Case("NEXT", 1) 6445 .Case("SKIP_1", 2) 6446 .Case("SKIP_2", 3) 6447 .Case("SKIP_3", 4) 6448 .Case("SKIP_4", 5) 6449 .Default(-1); 6450 } else { 6451 // Parse values for instid0 and instid1. 6452 Value = StringSwitch<int>(ValueName) 6453 .Case("NO_DEP", 0) 6454 .Case("VALU_DEP_1", 1) 6455 .Case("VALU_DEP_2", 2) 6456 .Case("VALU_DEP_3", 3) 6457 .Case("VALU_DEP_4", 4) 6458 .Case("TRANS32_DEP_1", 5) 6459 .Case("TRANS32_DEP_2", 6) 6460 .Case("TRANS32_DEP_3", 7) 6461 .Case("FMA_ACCUM_CYCLE_1", 8) 6462 .Case("SALU_CYCLE_1", 9) 6463 .Case("SALU_CYCLE_2", 10) 6464 .Case("SALU_CYCLE_3", 11) 6465 .Default(-1); 6466 } 6467 if (Value < 0) { 6468 Error(ValueLoc, "invalid value name " + ValueName); 6469 return false; 6470 } 6471 6472 Delay |= Value << Shift; 6473 return true; 6474 } 6475 6476 OperandMatchResultTy 6477 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) { 6478 int64_t Delay = 0; 6479 SMLoc S = getLoc(); 6480 6481 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6482 do { 6483 if (!parseDelay(Delay)) 6484 return MatchOperand_ParseFail; 6485 } while (trySkipToken(AsmToken::Pipe)); 6486 } else { 6487 if (!parseExpr(Delay)) 6488 return MatchOperand_ParseFail; 6489 } 6490 6491 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6492 return MatchOperand_Success; 6493 } 6494 6495 bool 6496 AMDGPUOperand::isSWaitCnt() const { 6497 return isImm(); 6498 } 6499 6500 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); } 6501 6502 //===----------------------------------------------------------------------===// 6503 // DepCtr 6504 //===----------------------------------------------------------------------===// 6505 6506 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6507 StringRef DepCtrName) { 6508 switch (ErrorId) { 6509 case OPR_ID_UNKNOWN: 6510 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6511 return; 6512 case OPR_ID_UNSUPPORTED: 6513 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6514 return; 6515 case OPR_ID_DUPLICATE: 6516 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6517 return; 6518 case OPR_VAL_INVALID: 6519 Error(Loc, Twine("invalid value for ", DepCtrName)); 6520 return; 6521 default: 6522 assert(false); 6523 } 6524 } 6525 6526 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6527 6528 using namespace llvm::AMDGPU::DepCtr; 6529 6530 SMLoc DepCtrLoc = getLoc(); 6531 StringRef DepCtrName = getTokenStr(); 6532 6533 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6534 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6535 return false; 6536 6537 int64_t ExprVal; 6538 if (!parseExpr(ExprVal)) 6539 return false; 6540 6541 unsigned PrevOprMask = UsedOprMask; 6542 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6543 6544 if (CntVal < 0) { 6545 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6546 return false; 6547 } 6548 6549 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6550 return false; 6551 6552 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6553 if (isToken(AsmToken::EndOfStatement)) { 6554 Error(getLoc(), "expected a counter name"); 6555 return false; 6556 } 6557 } 6558 6559 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6560 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6561 return true; 6562 } 6563 6564 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6565 using namespace llvm::AMDGPU::DepCtr; 6566 6567 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6568 SMLoc Loc = getLoc(); 6569 6570 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6571 unsigned UsedOprMask = 0; 6572 while (!isToken(AsmToken::EndOfStatement)) { 6573 if (!parseDepCtr(DepCtr, UsedOprMask)) 6574 return MatchOperand_ParseFail; 6575 } 6576 } else { 6577 if (!parseExpr(DepCtr)) 6578 return MatchOperand_ParseFail; 6579 } 6580 6581 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6582 return MatchOperand_Success; 6583 } 6584 6585 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6586 6587 //===----------------------------------------------------------------------===// 6588 // hwreg 6589 //===----------------------------------------------------------------------===// 6590 6591 bool 6592 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6593 OperandInfoTy &Offset, 6594 OperandInfoTy &Width) { 6595 using namespace llvm::AMDGPU::Hwreg; 6596 6597 // The register may be specified by name or using a numeric code 6598 HwReg.Loc = getLoc(); 6599 if (isToken(AsmToken::Identifier) && 6600 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6601 HwReg.IsSymbolic = true; 6602 lex(); // skip register name 6603 } else if (!parseExpr(HwReg.Id, "a register name")) { 6604 return false; 6605 } 6606 6607 if (trySkipToken(AsmToken::RParen)) 6608 return true; 6609 6610 // parse optional params 6611 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6612 return false; 6613 6614 Offset.Loc = getLoc(); 6615 if (!parseExpr(Offset.Id)) 6616 return false; 6617 6618 if (!skipToken(AsmToken::Comma, "expected a comma")) 6619 return false; 6620 6621 Width.Loc = getLoc(); 6622 return parseExpr(Width.Id) && 6623 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6624 } 6625 6626 bool 6627 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6628 const OperandInfoTy &Offset, 6629 const OperandInfoTy &Width) { 6630 6631 using namespace llvm::AMDGPU::Hwreg; 6632 6633 if (HwReg.IsSymbolic) { 6634 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6635 Error(HwReg.Loc, 6636 "specified hardware register is not supported on this GPU"); 6637 return false; 6638 } 6639 } else { 6640 if (!isValidHwreg(HwReg.Id)) { 6641 Error(HwReg.Loc, 6642 "invalid code of hardware register: only 6-bit values are legal"); 6643 return false; 6644 } 6645 } 6646 if (!isValidHwregOffset(Offset.Id)) { 6647 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6648 return false; 6649 } 6650 if (!isValidHwregWidth(Width.Id)) { 6651 Error(Width.Loc, 6652 "invalid bitfield width: only values from 1 to 32 are legal"); 6653 return false; 6654 } 6655 return true; 6656 } 6657 6658 OperandMatchResultTy 6659 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6660 using namespace llvm::AMDGPU::Hwreg; 6661 6662 int64_t ImmVal = 0; 6663 SMLoc Loc = getLoc(); 6664 6665 if (trySkipId("hwreg", AsmToken::LParen)) { 6666 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6667 OperandInfoTy Offset(OFFSET_DEFAULT_); 6668 OperandInfoTy Width(WIDTH_DEFAULT_); 6669 if (parseHwregBody(HwReg, Offset, Width) && 6670 validateHwreg(HwReg, Offset, Width)) { 6671 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6672 } else { 6673 return MatchOperand_ParseFail; 6674 } 6675 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6676 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6677 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6678 return MatchOperand_ParseFail; 6679 } 6680 } else { 6681 return MatchOperand_ParseFail; 6682 } 6683 6684 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6685 return MatchOperand_Success; 6686 } 6687 6688 bool AMDGPUOperand::isHwreg() const { 6689 return isImmTy(ImmTyHwreg); 6690 } 6691 6692 //===----------------------------------------------------------------------===// 6693 // sendmsg 6694 //===----------------------------------------------------------------------===// 6695 6696 bool 6697 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6698 OperandInfoTy &Op, 6699 OperandInfoTy &Stream) { 6700 using namespace llvm::AMDGPU::SendMsg; 6701 6702 Msg.Loc = getLoc(); 6703 if (isToken(AsmToken::Identifier) && 6704 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6705 Msg.IsSymbolic = true; 6706 lex(); // skip message name 6707 } else if (!parseExpr(Msg.Id, "a message name")) { 6708 return false; 6709 } 6710 6711 if (trySkipToken(AsmToken::Comma)) { 6712 Op.IsDefined = true; 6713 Op.Loc = getLoc(); 6714 if (isToken(AsmToken::Identifier) && 6715 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6716 lex(); // skip operation name 6717 } else if (!parseExpr(Op.Id, "an operation name")) { 6718 return false; 6719 } 6720 6721 if (trySkipToken(AsmToken::Comma)) { 6722 Stream.IsDefined = true; 6723 Stream.Loc = getLoc(); 6724 if (!parseExpr(Stream.Id)) 6725 return false; 6726 } 6727 } 6728 6729 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6730 } 6731 6732 bool 6733 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6734 const OperandInfoTy &Op, 6735 const OperandInfoTy &Stream) { 6736 using namespace llvm::AMDGPU::SendMsg; 6737 6738 // Validation strictness depends on whether message is specified 6739 // in a symbolic or in a numeric form. In the latter case 6740 // only encoding possibility is checked. 6741 bool Strict = Msg.IsSymbolic; 6742 6743 if (Strict) { 6744 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6745 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6746 return false; 6747 } 6748 } else { 6749 if (!isValidMsgId(Msg.Id, getSTI())) { 6750 Error(Msg.Loc, "invalid message id"); 6751 return false; 6752 } 6753 } 6754 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 6755 if (Op.IsDefined) { 6756 Error(Op.Loc, "message does not support operations"); 6757 } else { 6758 Error(Msg.Loc, "missing message operation"); 6759 } 6760 return false; 6761 } 6762 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6763 Error(Op.Loc, "invalid operation id"); 6764 return false; 6765 } 6766 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 6767 Stream.IsDefined) { 6768 Error(Stream.Loc, "message operation does not support streams"); 6769 return false; 6770 } 6771 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6772 Error(Stream.Loc, "invalid message stream id"); 6773 return false; 6774 } 6775 return true; 6776 } 6777 6778 OperandMatchResultTy 6779 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6780 using namespace llvm::AMDGPU::SendMsg; 6781 6782 int64_t ImmVal = 0; 6783 SMLoc Loc = getLoc(); 6784 6785 if (trySkipId("sendmsg", AsmToken::LParen)) { 6786 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6787 OperandInfoTy Op(OP_NONE_); 6788 OperandInfoTy Stream(STREAM_ID_NONE_); 6789 if (parseSendMsgBody(Msg, Op, Stream) && 6790 validateSendMsg(Msg, Op, Stream)) { 6791 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6792 } else { 6793 return MatchOperand_ParseFail; 6794 } 6795 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6796 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6797 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6798 return MatchOperand_ParseFail; 6799 } 6800 } else { 6801 return MatchOperand_ParseFail; 6802 } 6803 6804 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6805 return MatchOperand_Success; 6806 } 6807 6808 bool AMDGPUOperand::isSendMsg() const { 6809 return isImmTy(ImmTySendMsg); 6810 } 6811 6812 //===----------------------------------------------------------------------===// 6813 // v_interp 6814 //===----------------------------------------------------------------------===// 6815 6816 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6817 StringRef Str; 6818 SMLoc S = getLoc(); 6819 6820 if (!parseId(Str)) 6821 return MatchOperand_NoMatch; 6822 6823 int Slot = StringSwitch<int>(Str) 6824 .Case("p10", 0) 6825 .Case("p20", 1) 6826 .Case("p0", 2) 6827 .Default(-1); 6828 6829 if (Slot == -1) { 6830 Error(S, "invalid interpolation slot"); 6831 return MatchOperand_ParseFail; 6832 } 6833 6834 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6835 AMDGPUOperand::ImmTyInterpSlot)); 6836 return MatchOperand_Success; 6837 } 6838 6839 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6840 StringRef Str; 6841 SMLoc S = getLoc(); 6842 6843 if (!parseId(Str)) 6844 return MatchOperand_NoMatch; 6845 6846 if (!Str.startswith("attr")) { 6847 Error(S, "invalid interpolation attribute"); 6848 return MatchOperand_ParseFail; 6849 } 6850 6851 StringRef Chan = Str.take_back(2); 6852 int AttrChan = StringSwitch<int>(Chan) 6853 .Case(".x", 0) 6854 .Case(".y", 1) 6855 .Case(".z", 2) 6856 .Case(".w", 3) 6857 .Default(-1); 6858 if (AttrChan == -1) { 6859 Error(S, "invalid or missing interpolation attribute channel"); 6860 return MatchOperand_ParseFail; 6861 } 6862 6863 Str = Str.drop_back(2).drop_front(4); 6864 6865 uint8_t Attr; 6866 if (Str.getAsInteger(10, Attr)) { 6867 Error(S, "invalid or missing interpolation attribute number"); 6868 return MatchOperand_ParseFail; 6869 } 6870 6871 if (Attr > 63) { 6872 Error(S, "out of bounds interpolation attribute number"); 6873 return MatchOperand_ParseFail; 6874 } 6875 6876 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6877 6878 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6879 AMDGPUOperand::ImmTyInterpAttr)); 6880 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6881 AMDGPUOperand::ImmTyAttrChan)); 6882 return MatchOperand_Success; 6883 } 6884 6885 //===----------------------------------------------------------------------===// 6886 // exp 6887 //===----------------------------------------------------------------------===// 6888 6889 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6890 using namespace llvm::AMDGPU::Exp; 6891 6892 StringRef Str; 6893 SMLoc S = getLoc(); 6894 6895 if (!parseId(Str)) 6896 return MatchOperand_NoMatch; 6897 6898 unsigned Id = getTgtId(Str); 6899 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6900 Error(S, (Id == ET_INVALID) ? 6901 "invalid exp target" : 6902 "exp target is not supported on this GPU"); 6903 return MatchOperand_ParseFail; 6904 } 6905 6906 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6907 AMDGPUOperand::ImmTyExpTgt)); 6908 return MatchOperand_Success; 6909 } 6910 6911 //===----------------------------------------------------------------------===// 6912 // parser helpers 6913 //===----------------------------------------------------------------------===// 6914 6915 bool 6916 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6917 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6918 } 6919 6920 bool 6921 AMDGPUAsmParser::isId(const StringRef Id) const { 6922 return isId(getToken(), Id); 6923 } 6924 6925 bool 6926 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6927 return getTokenKind() == Kind; 6928 } 6929 6930 bool 6931 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6932 if (isId(Id)) { 6933 lex(); 6934 return true; 6935 } 6936 return false; 6937 } 6938 6939 bool 6940 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6941 if (isToken(AsmToken::Identifier)) { 6942 StringRef Tok = getTokenStr(); 6943 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6944 lex(); 6945 return true; 6946 } 6947 } 6948 return false; 6949 } 6950 6951 bool 6952 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6953 if (isId(Id) && peekToken().is(Kind)) { 6954 lex(); 6955 lex(); 6956 return true; 6957 } 6958 return false; 6959 } 6960 6961 bool 6962 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6963 if (isToken(Kind)) { 6964 lex(); 6965 return true; 6966 } 6967 return false; 6968 } 6969 6970 bool 6971 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6972 const StringRef ErrMsg) { 6973 if (!trySkipToken(Kind)) { 6974 Error(getLoc(), ErrMsg); 6975 return false; 6976 } 6977 return true; 6978 } 6979 6980 bool 6981 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6982 SMLoc S = getLoc(); 6983 6984 const MCExpr *Expr; 6985 if (Parser.parseExpression(Expr)) 6986 return false; 6987 6988 if (Expr->evaluateAsAbsolute(Imm)) 6989 return true; 6990 6991 if (Expected.empty()) { 6992 Error(S, "expected absolute expression"); 6993 } else { 6994 Error(S, Twine("expected ", Expected) + 6995 Twine(" or an absolute expression")); 6996 } 6997 return false; 6998 } 6999 7000 bool 7001 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 7002 SMLoc S = getLoc(); 7003 7004 const MCExpr *Expr; 7005 if (Parser.parseExpression(Expr)) 7006 return false; 7007 7008 int64_t IntVal; 7009 if (Expr->evaluateAsAbsolute(IntVal)) { 7010 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7011 } else { 7012 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7013 } 7014 return true; 7015 } 7016 7017 bool 7018 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7019 if (isToken(AsmToken::String)) { 7020 Val = getToken().getStringContents(); 7021 lex(); 7022 return true; 7023 } else { 7024 Error(getLoc(), ErrMsg); 7025 return false; 7026 } 7027 } 7028 7029 bool 7030 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7031 if (isToken(AsmToken::Identifier)) { 7032 Val = getTokenStr(); 7033 lex(); 7034 return true; 7035 } else { 7036 if (!ErrMsg.empty()) 7037 Error(getLoc(), ErrMsg); 7038 return false; 7039 } 7040 } 7041 7042 AsmToken 7043 AMDGPUAsmParser::getToken() const { 7044 return Parser.getTok(); 7045 } 7046 7047 AsmToken 7048 AMDGPUAsmParser::peekToken() { 7049 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 7050 } 7051 7052 void 7053 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7054 auto TokCount = getLexer().peekTokens(Tokens); 7055 7056 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7057 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7058 } 7059 7060 AsmToken::TokenKind 7061 AMDGPUAsmParser::getTokenKind() const { 7062 return getLexer().getKind(); 7063 } 7064 7065 SMLoc 7066 AMDGPUAsmParser::getLoc() const { 7067 return getToken().getLoc(); 7068 } 7069 7070 StringRef 7071 AMDGPUAsmParser::getTokenStr() const { 7072 return getToken().getString(); 7073 } 7074 7075 void 7076 AMDGPUAsmParser::lex() { 7077 Parser.Lex(); 7078 } 7079 7080 SMLoc 7081 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7082 const OperandVector &Operands) const { 7083 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7084 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7085 if (Test(Op)) 7086 return Op.getStartLoc(); 7087 } 7088 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7089 } 7090 7091 SMLoc 7092 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7093 const OperandVector &Operands) const { 7094 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7095 return getOperandLoc(Test, Operands); 7096 } 7097 7098 SMLoc 7099 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7100 const OperandVector &Operands) const { 7101 auto Test = [=](const AMDGPUOperand& Op) { 7102 return Op.isRegKind() && Op.getReg() == Reg; 7103 }; 7104 return getOperandLoc(Test, Operands); 7105 } 7106 7107 SMLoc 7108 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 7109 auto Test = [](const AMDGPUOperand& Op) { 7110 return Op.IsImmKindLiteral() || Op.isExpr(); 7111 }; 7112 return getOperandLoc(Test, Operands); 7113 } 7114 7115 SMLoc 7116 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7117 auto Test = [](const AMDGPUOperand& Op) { 7118 return Op.isImmKindConst(); 7119 }; 7120 return getOperandLoc(Test, Operands); 7121 } 7122 7123 //===----------------------------------------------------------------------===// 7124 // swizzle 7125 //===----------------------------------------------------------------------===// 7126 7127 LLVM_READNONE 7128 static unsigned 7129 encodeBitmaskPerm(const unsigned AndMask, 7130 const unsigned OrMask, 7131 const unsigned XorMask) { 7132 using namespace llvm::AMDGPU::Swizzle; 7133 7134 return BITMASK_PERM_ENC | 7135 (AndMask << BITMASK_AND_SHIFT) | 7136 (OrMask << BITMASK_OR_SHIFT) | 7137 (XorMask << BITMASK_XOR_SHIFT); 7138 } 7139 7140 bool 7141 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7142 const unsigned MinVal, 7143 const unsigned MaxVal, 7144 const StringRef ErrMsg, 7145 SMLoc &Loc) { 7146 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7147 return false; 7148 } 7149 Loc = getLoc(); 7150 if (!parseExpr(Op)) { 7151 return false; 7152 } 7153 if (Op < MinVal || Op > MaxVal) { 7154 Error(Loc, ErrMsg); 7155 return false; 7156 } 7157 7158 return true; 7159 } 7160 7161 bool 7162 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7163 const unsigned MinVal, 7164 const unsigned MaxVal, 7165 const StringRef ErrMsg) { 7166 SMLoc Loc; 7167 for (unsigned i = 0; i < OpNum; ++i) { 7168 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7169 return false; 7170 } 7171 7172 return true; 7173 } 7174 7175 bool 7176 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7177 using namespace llvm::AMDGPU::Swizzle; 7178 7179 int64_t Lane[LANE_NUM]; 7180 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7181 "expected a 2-bit lane id")) { 7182 Imm = QUAD_PERM_ENC; 7183 for (unsigned I = 0; I < LANE_NUM; ++I) { 7184 Imm |= Lane[I] << (LANE_SHIFT * I); 7185 } 7186 return true; 7187 } 7188 return false; 7189 } 7190 7191 bool 7192 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7193 using namespace llvm::AMDGPU::Swizzle; 7194 7195 SMLoc Loc; 7196 int64_t GroupSize; 7197 int64_t LaneIdx; 7198 7199 if (!parseSwizzleOperand(GroupSize, 7200 2, 32, 7201 "group size must be in the interval [2,32]", 7202 Loc)) { 7203 return false; 7204 } 7205 if (!isPowerOf2_64(GroupSize)) { 7206 Error(Loc, "group size must be a power of two"); 7207 return false; 7208 } 7209 if (parseSwizzleOperand(LaneIdx, 7210 0, GroupSize - 1, 7211 "lane id must be in the interval [0,group size - 1]", 7212 Loc)) { 7213 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7214 return true; 7215 } 7216 return false; 7217 } 7218 7219 bool 7220 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7221 using namespace llvm::AMDGPU::Swizzle; 7222 7223 SMLoc Loc; 7224 int64_t GroupSize; 7225 7226 if (!parseSwizzleOperand(GroupSize, 7227 2, 32, 7228 "group size must be in the interval [2,32]", 7229 Loc)) { 7230 return false; 7231 } 7232 if (!isPowerOf2_64(GroupSize)) { 7233 Error(Loc, "group size must be a power of two"); 7234 return false; 7235 } 7236 7237 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7238 return true; 7239 } 7240 7241 bool 7242 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7243 using namespace llvm::AMDGPU::Swizzle; 7244 7245 SMLoc Loc; 7246 int64_t GroupSize; 7247 7248 if (!parseSwizzleOperand(GroupSize, 7249 1, 16, 7250 "group size must be in the interval [1,16]", 7251 Loc)) { 7252 return false; 7253 } 7254 if (!isPowerOf2_64(GroupSize)) { 7255 Error(Loc, "group size must be a power of two"); 7256 return false; 7257 } 7258 7259 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7260 return true; 7261 } 7262 7263 bool 7264 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7265 using namespace llvm::AMDGPU::Swizzle; 7266 7267 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7268 return false; 7269 } 7270 7271 StringRef Ctl; 7272 SMLoc StrLoc = getLoc(); 7273 if (!parseString(Ctl)) { 7274 return false; 7275 } 7276 if (Ctl.size() != BITMASK_WIDTH) { 7277 Error(StrLoc, "expected a 5-character mask"); 7278 return false; 7279 } 7280 7281 unsigned AndMask = 0; 7282 unsigned OrMask = 0; 7283 unsigned XorMask = 0; 7284 7285 for (size_t i = 0; i < Ctl.size(); ++i) { 7286 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7287 switch(Ctl[i]) { 7288 default: 7289 Error(StrLoc, "invalid mask"); 7290 return false; 7291 case '0': 7292 break; 7293 case '1': 7294 OrMask |= Mask; 7295 break; 7296 case 'p': 7297 AndMask |= Mask; 7298 break; 7299 case 'i': 7300 AndMask |= Mask; 7301 XorMask |= Mask; 7302 break; 7303 } 7304 } 7305 7306 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7307 return true; 7308 } 7309 7310 bool 7311 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7312 7313 SMLoc OffsetLoc = getLoc(); 7314 7315 if (!parseExpr(Imm, "a swizzle macro")) { 7316 return false; 7317 } 7318 if (!isUInt<16>(Imm)) { 7319 Error(OffsetLoc, "expected a 16-bit offset"); 7320 return false; 7321 } 7322 return true; 7323 } 7324 7325 bool 7326 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7327 using namespace llvm::AMDGPU::Swizzle; 7328 7329 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7330 7331 SMLoc ModeLoc = getLoc(); 7332 bool Ok = false; 7333 7334 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7335 Ok = parseSwizzleQuadPerm(Imm); 7336 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7337 Ok = parseSwizzleBitmaskPerm(Imm); 7338 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7339 Ok = parseSwizzleBroadcast(Imm); 7340 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7341 Ok = parseSwizzleSwap(Imm); 7342 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7343 Ok = parseSwizzleReverse(Imm); 7344 } else { 7345 Error(ModeLoc, "expected a swizzle mode"); 7346 } 7347 7348 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7349 } 7350 7351 return false; 7352 } 7353 7354 OperandMatchResultTy 7355 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7356 SMLoc S = getLoc(); 7357 int64_t Imm = 0; 7358 7359 if (trySkipId("offset")) { 7360 7361 bool Ok = false; 7362 if (skipToken(AsmToken::Colon, "expected a colon")) { 7363 if (trySkipId("swizzle")) { 7364 Ok = parseSwizzleMacro(Imm); 7365 } else { 7366 Ok = parseSwizzleOffset(Imm); 7367 } 7368 } 7369 7370 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7371 7372 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7373 } else { 7374 // Swizzle "offset" operand is optional. 7375 // If it is omitted, try parsing other optional operands. 7376 return parseOptionalOpr(Operands); 7377 } 7378 } 7379 7380 bool 7381 AMDGPUOperand::isSwizzle() const { 7382 return isImmTy(ImmTySwizzle); 7383 } 7384 7385 //===----------------------------------------------------------------------===// 7386 // VGPR Index Mode 7387 //===----------------------------------------------------------------------===// 7388 7389 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7390 7391 using namespace llvm::AMDGPU::VGPRIndexMode; 7392 7393 if (trySkipToken(AsmToken::RParen)) { 7394 return OFF; 7395 } 7396 7397 int64_t Imm = 0; 7398 7399 while (true) { 7400 unsigned Mode = 0; 7401 SMLoc S = getLoc(); 7402 7403 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7404 if (trySkipId(IdSymbolic[ModeId])) { 7405 Mode = 1 << ModeId; 7406 break; 7407 } 7408 } 7409 7410 if (Mode == 0) { 7411 Error(S, (Imm == 0)? 7412 "expected a VGPR index mode or a closing parenthesis" : 7413 "expected a VGPR index mode"); 7414 return UNDEF; 7415 } 7416 7417 if (Imm & Mode) { 7418 Error(S, "duplicate VGPR index mode"); 7419 return UNDEF; 7420 } 7421 Imm |= Mode; 7422 7423 if (trySkipToken(AsmToken::RParen)) 7424 break; 7425 if (!skipToken(AsmToken::Comma, 7426 "expected a comma or a closing parenthesis")) 7427 return UNDEF; 7428 } 7429 7430 return Imm; 7431 } 7432 7433 OperandMatchResultTy 7434 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7435 7436 using namespace llvm::AMDGPU::VGPRIndexMode; 7437 7438 int64_t Imm = 0; 7439 SMLoc S = getLoc(); 7440 7441 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7442 Imm = parseGPRIdxMacro(); 7443 if (Imm == UNDEF) 7444 return MatchOperand_ParseFail; 7445 } else { 7446 if (getParser().parseAbsoluteExpression(Imm)) 7447 return MatchOperand_ParseFail; 7448 if (Imm < 0 || !isUInt<4>(Imm)) { 7449 Error(S, "invalid immediate: only 4-bit values are legal"); 7450 return MatchOperand_ParseFail; 7451 } 7452 } 7453 7454 Operands.push_back( 7455 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7456 return MatchOperand_Success; 7457 } 7458 7459 bool AMDGPUOperand::isGPRIdxMode() const { 7460 return isImmTy(ImmTyGprIdxMode); 7461 } 7462 7463 //===----------------------------------------------------------------------===// 7464 // sopp branch targets 7465 //===----------------------------------------------------------------------===// 7466 7467 OperandMatchResultTy 7468 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7469 7470 // Make sure we are not parsing something 7471 // that looks like a label or an expression but is not. 7472 // This will improve error messages. 7473 if (isRegister() || isModifier()) 7474 return MatchOperand_NoMatch; 7475 7476 if (!parseExpr(Operands)) 7477 return MatchOperand_ParseFail; 7478 7479 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7480 assert(Opr.isImm() || Opr.isExpr()); 7481 SMLoc Loc = Opr.getStartLoc(); 7482 7483 // Currently we do not support arbitrary expressions as branch targets. 7484 // Only labels and absolute expressions are accepted. 7485 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7486 Error(Loc, "expected an absolute expression or a label"); 7487 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7488 Error(Loc, "expected a 16-bit signed jump offset"); 7489 } 7490 7491 return MatchOperand_Success; 7492 } 7493 7494 //===----------------------------------------------------------------------===// 7495 // Boolean holding registers 7496 //===----------------------------------------------------------------------===// 7497 7498 OperandMatchResultTy 7499 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7500 return parseReg(Operands); 7501 } 7502 7503 //===----------------------------------------------------------------------===// 7504 // mubuf 7505 //===----------------------------------------------------------------------===// 7506 7507 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7508 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7509 } 7510 7511 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7512 const OperandVector &Operands, 7513 bool IsAtomic, 7514 bool IsLds) { 7515 OptionalImmIndexMap OptionalIdx; 7516 unsigned FirstOperandIdx = 1; 7517 bool IsAtomicReturn = false; 7518 7519 if (IsAtomic) { 7520 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7521 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7522 if (!Op.isCPol()) 7523 continue; 7524 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7525 break; 7526 } 7527 7528 if (!IsAtomicReturn) { 7529 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7530 if (NewOpc != -1) 7531 Inst.setOpcode(NewOpc); 7532 } 7533 7534 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7535 SIInstrFlags::IsAtomicRet; 7536 } 7537 7538 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7539 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7540 7541 // Add the register arguments 7542 if (Op.isReg()) { 7543 Op.addRegOperands(Inst, 1); 7544 // Insert a tied src for atomic return dst. 7545 // This cannot be postponed as subsequent calls to 7546 // addImmOperands rely on correct number of MC operands. 7547 if (IsAtomicReturn && i == FirstOperandIdx) 7548 Op.addRegOperands(Inst, 1); 7549 continue; 7550 } 7551 7552 // Handle the case where soffset is an immediate 7553 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7554 Op.addImmOperands(Inst, 1); 7555 continue; 7556 } 7557 7558 // Handle tokens like 'offen' which are sometimes hard-coded into the 7559 // asm string. There are no MCInst operands for these. 7560 if (Op.isToken()) { 7561 continue; 7562 } 7563 assert(Op.isImm()); 7564 7565 // Handle optional arguments 7566 OptionalIdx[Op.getImmTy()] = i; 7567 } 7568 7569 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7570 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7571 7572 if (!IsLds) { // tfe is not legal with lds opcodes 7573 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7574 } 7575 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7576 } 7577 7578 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7579 OptionalImmIndexMap OptionalIdx; 7580 7581 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7582 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7583 7584 // Add the register arguments 7585 if (Op.isReg()) { 7586 Op.addRegOperands(Inst, 1); 7587 continue; 7588 } 7589 7590 // Handle the case where soffset is an immediate 7591 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7592 Op.addImmOperands(Inst, 1); 7593 continue; 7594 } 7595 7596 // Handle tokens like 'offen' which are sometimes hard-coded into the 7597 // asm string. There are no MCInst operands for these. 7598 if (Op.isToken()) { 7599 continue; 7600 } 7601 assert(Op.isImm()); 7602 7603 // Handle optional arguments 7604 OptionalIdx[Op.getImmTy()] = i; 7605 } 7606 7607 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7608 AMDGPUOperand::ImmTyOffset); 7609 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7610 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7611 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7612 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7613 } 7614 7615 //===----------------------------------------------------------------------===// 7616 // mimg 7617 //===----------------------------------------------------------------------===// 7618 7619 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7620 bool IsAtomic) { 7621 unsigned I = 1; 7622 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7623 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7624 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7625 } 7626 7627 if (IsAtomic) { 7628 // Add src, same as dst 7629 assert(Desc.getNumDefs() == 1); 7630 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7631 } 7632 7633 OptionalImmIndexMap OptionalIdx; 7634 7635 for (unsigned E = Operands.size(); I != E; ++I) { 7636 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7637 7638 // Add the register arguments 7639 if (Op.isReg()) { 7640 Op.addRegOperands(Inst, 1); 7641 } else if (Op.isImmModifier()) { 7642 OptionalIdx[Op.getImmTy()] = I; 7643 } else if (!Op.isToken()) { 7644 llvm_unreachable("unexpected operand type"); 7645 } 7646 } 7647 7648 bool IsGFX10Plus = isGFX10Plus(); 7649 7650 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7651 if (IsGFX10Plus) 7652 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7653 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7654 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7655 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7656 if (IsGFX10Plus) 7657 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7658 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7659 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7660 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7661 if (!IsGFX10Plus) 7662 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7663 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7664 } 7665 7666 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7667 cvtMIMG(Inst, Operands, true); 7668 } 7669 7670 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7671 OptionalImmIndexMap OptionalIdx; 7672 bool IsAtomicReturn = false; 7673 7674 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7675 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7676 if (!Op.isCPol()) 7677 continue; 7678 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7679 break; 7680 } 7681 7682 if (!IsAtomicReturn) { 7683 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7684 if (NewOpc != -1) 7685 Inst.setOpcode(NewOpc); 7686 } 7687 7688 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7689 SIInstrFlags::IsAtomicRet; 7690 7691 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7692 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7693 7694 // Add the register arguments 7695 if (Op.isReg()) { 7696 Op.addRegOperands(Inst, 1); 7697 if (IsAtomicReturn && i == 1) 7698 Op.addRegOperands(Inst, 1); 7699 continue; 7700 } 7701 7702 // Handle the case where soffset is an immediate 7703 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7704 Op.addImmOperands(Inst, 1); 7705 continue; 7706 } 7707 7708 // Handle tokens like 'offen' which are sometimes hard-coded into the 7709 // asm string. There are no MCInst operands for these. 7710 if (Op.isToken()) { 7711 continue; 7712 } 7713 assert(Op.isImm()); 7714 7715 // Handle optional arguments 7716 OptionalIdx[Op.getImmTy()] = i; 7717 } 7718 7719 if ((int)Inst.getNumOperands() <= 7720 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7721 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7722 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7723 } 7724 7725 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7726 const OperandVector &Operands) { 7727 for (unsigned I = 1; I < Operands.size(); ++I) { 7728 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7729 if (Operand.isReg()) 7730 Operand.addRegOperands(Inst, 1); 7731 } 7732 7733 Inst.addOperand(MCOperand::createImm(1)); // a16 7734 } 7735 7736 //===----------------------------------------------------------------------===// 7737 // smrd 7738 //===----------------------------------------------------------------------===// 7739 7740 bool AMDGPUOperand::isSMRDOffset8() const { 7741 return isImm() && isUInt<8>(getImm()); 7742 } 7743 7744 bool AMDGPUOperand::isSMEMOffset() const { 7745 return isImmTy(ImmTyNone) || 7746 isImmTy(ImmTyOffset); // Offset range is checked later by validator. 7747 } 7748 7749 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7750 // 32-bit literals are only supported on CI and we only want to use them 7751 // when the offset is > 8-bits. 7752 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7753 } 7754 7755 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7756 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7757 } 7758 7759 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7760 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7761 } 7762 7763 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7764 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7765 } 7766 7767 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7768 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7769 } 7770 7771 //===----------------------------------------------------------------------===// 7772 // vop3 7773 //===----------------------------------------------------------------------===// 7774 7775 static bool ConvertOmodMul(int64_t &Mul) { 7776 if (Mul != 1 && Mul != 2 && Mul != 4) 7777 return false; 7778 7779 Mul >>= 1; 7780 return true; 7781 } 7782 7783 static bool ConvertOmodDiv(int64_t &Div) { 7784 if (Div == 1) { 7785 Div = 0; 7786 return true; 7787 } 7788 7789 if (Div == 2) { 7790 Div = 3; 7791 return true; 7792 } 7793 7794 return false; 7795 } 7796 7797 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7798 // This is intentional and ensures compatibility with sp3. 7799 // See bug 35397 for details. 7800 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7801 if (BoundCtrl == 0 || BoundCtrl == 1) { 7802 BoundCtrl = 1; 7803 return true; 7804 } 7805 return false; 7806 } 7807 7808 // Note: the order in this table matches the order of operands in AsmString. 7809 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7810 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7811 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7812 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7813 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7814 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7815 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7816 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7817 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7818 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7819 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7820 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7821 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7822 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7823 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7824 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7825 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7826 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7827 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7828 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7829 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7830 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7831 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7832 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7833 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7834 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7835 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7836 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7837 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7838 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7839 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7840 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7841 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7842 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7843 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7844 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7845 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7846 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7847 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7848 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7849 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7850 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}, 7851 {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr}, 7852 {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr} 7853 }; 7854 7855 void AMDGPUAsmParser::onBeginOfFile() { 7856 if (!getParser().getStreamer().getTargetStreamer() || 7857 getSTI().getTargetTriple().getArch() == Triple::r600) 7858 return; 7859 7860 if (!getTargetStreamer().getTargetID()) 7861 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7862 7863 if (isHsaAbiVersion3AndAbove(&getSTI())) 7864 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7865 } 7866 7867 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7868 7869 OperandMatchResultTy res = parseOptionalOpr(Operands); 7870 7871 // This is a hack to enable hardcoded mandatory operands which follow 7872 // optional operands. 7873 // 7874 // Current design assumes that all operands after the first optional operand 7875 // are also optional. However implementation of some instructions violates 7876 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7877 // 7878 // To alleviate this problem, we have to (implicitly) parse extra operands 7879 // to make sure autogenerated parser of custom operands never hit hardcoded 7880 // mandatory operands. 7881 7882 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7883 if (res != MatchOperand_Success || 7884 isToken(AsmToken::EndOfStatement)) 7885 break; 7886 7887 trySkipToken(AsmToken::Comma); 7888 res = parseOptionalOpr(Operands); 7889 } 7890 7891 return res; 7892 } 7893 7894 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7895 OperandMatchResultTy res; 7896 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7897 // try to parse any optional operand here 7898 if (Op.IsBit) { 7899 res = parseNamedBit(Op.Name, Operands, Op.Type); 7900 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7901 res = parseOModOperand(Operands); 7902 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7903 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7904 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7905 res = parseSDWASel(Operands, Op.Name, Op.Type); 7906 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7907 res = parseSDWADstUnused(Operands); 7908 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7909 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7910 Op.Type == AMDGPUOperand::ImmTyNegLo || 7911 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7912 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7913 Op.ConvertResult); 7914 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7915 res = parseDim(Operands); 7916 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7917 res = parseCPol(Operands); 7918 } else { 7919 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7920 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 7921 res = parseOperandArrayWithPrefix("neg", Operands, 7922 AMDGPUOperand::ImmTyBLGP, 7923 nullptr); 7924 } 7925 } 7926 if (res != MatchOperand_NoMatch) { 7927 return res; 7928 } 7929 } 7930 return MatchOperand_NoMatch; 7931 } 7932 7933 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7934 StringRef Name = getTokenStr(); 7935 if (Name == "mul") { 7936 return parseIntWithPrefix("mul", Operands, 7937 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7938 } 7939 7940 if (Name == "div") { 7941 return parseIntWithPrefix("div", Operands, 7942 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7943 } 7944 7945 return MatchOperand_NoMatch; 7946 } 7947 7948 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7949 cvtVOP3P(Inst, Operands); 7950 7951 int Opc = Inst.getOpcode(); 7952 7953 int SrcNum; 7954 const int Ops[] = { AMDGPU::OpName::src0, 7955 AMDGPU::OpName::src1, 7956 AMDGPU::OpName::src2 }; 7957 for (SrcNum = 0; 7958 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7959 ++SrcNum); 7960 assert(SrcNum > 0); 7961 7962 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7963 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7964 7965 if ((OpSel & (1 << SrcNum)) != 0) { 7966 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7967 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7968 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7969 } 7970 } 7971 7972 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7973 // 1. This operand is input modifiers 7974 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7975 // 2. This is not last operand 7976 && Desc.NumOperands > (OpNum + 1) 7977 // 3. Next operand is register class 7978 && Desc.OpInfo[OpNum + 1].RegClass != -1 7979 // 4. Next register is not tied to any other operand 7980 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7981 } 7982 7983 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7984 { 7985 OptionalImmIndexMap OptionalIdx; 7986 unsigned Opc = Inst.getOpcode(); 7987 7988 unsigned I = 1; 7989 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7990 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7991 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7992 } 7993 7994 for (unsigned E = Operands.size(); I != E; ++I) { 7995 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7996 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7997 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7998 } else if (Op.isInterpSlot() || 7999 Op.isInterpAttr() || 8000 Op.isAttrChan()) { 8001 Inst.addOperand(MCOperand::createImm(Op.getImm())); 8002 } else if (Op.isImmModifier()) { 8003 OptionalIdx[Op.getImmTy()] = I; 8004 } else { 8005 llvm_unreachable("unhandled operand type"); 8006 } 8007 } 8008 8009 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 8010 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 8011 } 8012 8013 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8014 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8015 } 8016 8017 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8018 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8019 } 8020 } 8021 8022 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 8023 { 8024 OptionalImmIndexMap OptionalIdx; 8025 unsigned Opc = Inst.getOpcode(); 8026 8027 unsigned I = 1; 8028 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8029 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8030 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8031 } 8032 8033 for (unsigned E = Operands.size(); I != E; ++I) { 8034 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8035 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8036 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8037 } else if (Op.isImmModifier()) { 8038 OptionalIdx[Op.getImmTy()] = I; 8039 } else { 8040 llvm_unreachable("unhandled operand type"); 8041 } 8042 } 8043 8044 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8045 8046 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8047 if (OpSelIdx != -1) 8048 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8049 8050 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 8051 8052 if (OpSelIdx == -1) 8053 return; 8054 8055 const int Ops[] = { AMDGPU::OpName::src0, 8056 AMDGPU::OpName::src1, 8057 AMDGPU::OpName::src2 }; 8058 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8059 AMDGPU::OpName::src1_modifiers, 8060 AMDGPU::OpName::src2_modifiers }; 8061 8062 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8063 8064 for (int J = 0; J < 3; ++J) { 8065 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8066 if (OpIdx == -1) 8067 break; 8068 8069 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8070 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8071 8072 if ((OpSel & (1 << J)) != 0) 8073 ModVal |= SISrcMods::OP_SEL_0; 8074 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 8075 (OpSel & (1 << 3)) != 0) 8076 ModVal |= SISrcMods::DST_OP_SEL; 8077 8078 Inst.getOperand(ModIdx).setImm(ModVal); 8079 } 8080 } 8081 8082 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8083 OptionalImmIndexMap &OptionalIdx) { 8084 unsigned Opc = Inst.getOpcode(); 8085 8086 unsigned I = 1; 8087 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8088 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8089 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8090 } 8091 8092 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 8093 // This instruction has src modifiers 8094 for (unsigned E = Operands.size(); I != E; ++I) { 8095 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8096 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8097 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8098 } else if (Op.isImmModifier()) { 8099 OptionalIdx[Op.getImmTy()] = I; 8100 } else if (Op.isRegOrImm()) { 8101 Op.addRegOrImmOperands(Inst, 1); 8102 } else { 8103 llvm_unreachable("unhandled operand type"); 8104 } 8105 } 8106 } else { 8107 // No src modifiers 8108 for (unsigned E = Operands.size(); I != E; ++I) { 8109 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8110 if (Op.isMod()) { 8111 OptionalIdx[Op.getImmTy()] = I; 8112 } else { 8113 Op.addRegOrImmOperands(Inst, 1); 8114 } 8115 } 8116 } 8117 8118 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8119 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8120 } 8121 8122 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8123 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8124 } 8125 8126 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8127 // it has src2 register operand that is tied to dst operand 8128 // we don't allow modifiers for this operand in assembler so src2_modifiers 8129 // should be 0. 8130 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 8131 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 8132 Opc == AMDGPU::V_MAC_F32_e64_vi || 8133 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 8134 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 8135 Opc == AMDGPU::V_MAC_F16_e64_vi || 8136 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 8137 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 8138 Opc == AMDGPU::V_FMAC_F32_e64_vi || 8139 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 8140 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 8141 auto it = Inst.begin(); 8142 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8143 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8144 ++it; 8145 // Copy the operand to ensure it's not invalidated when Inst grows. 8146 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8147 } 8148 } 8149 8150 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8151 OptionalImmIndexMap OptionalIdx; 8152 cvtVOP3(Inst, Operands, OptionalIdx); 8153 } 8154 8155 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8156 OptionalImmIndexMap &OptIdx) { 8157 const int Opc = Inst.getOpcode(); 8158 const MCInstrDesc &Desc = MII.get(Opc); 8159 8160 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8161 8162 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 8163 assert(!IsPacked); 8164 Inst.addOperand(Inst.getOperand(0)); 8165 } 8166 8167 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8168 // instruction, and then figure out where to actually put the modifiers 8169 8170 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8171 if (OpSelIdx != -1) { 8172 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8173 } 8174 8175 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8176 if (OpSelHiIdx != -1) { 8177 int DefaultVal = IsPacked ? -1 : 0; 8178 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8179 DefaultVal); 8180 } 8181 8182 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8183 if (NegLoIdx != -1) { 8184 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8185 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8186 } 8187 8188 const int Ops[] = { AMDGPU::OpName::src0, 8189 AMDGPU::OpName::src1, 8190 AMDGPU::OpName::src2 }; 8191 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8192 AMDGPU::OpName::src1_modifiers, 8193 AMDGPU::OpName::src2_modifiers }; 8194 8195 unsigned OpSel = 0; 8196 unsigned OpSelHi = 0; 8197 unsigned NegLo = 0; 8198 unsigned NegHi = 0; 8199 8200 if (OpSelIdx != -1) 8201 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8202 8203 if (OpSelHiIdx != -1) 8204 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8205 8206 if (NegLoIdx != -1) { 8207 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8208 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8209 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8210 } 8211 8212 for (int J = 0; J < 3; ++J) { 8213 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8214 if (OpIdx == -1) 8215 break; 8216 8217 uint32_t ModVal = 0; 8218 8219 if ((OpSel & (1 << J)) != 0) 8220 ModVal |= SISrcMods::OP_SEL_0; 8221 8222 if ((OpSelHi & (1 << J)) != 0) 8223 ModVal |= SISrcMods::OP_SEL_1; 8224 8225 if ((NegLo & (1 << J)) != 0) 8226 ModVal |= SISrcMods::NEG; 8227 8228 if ((NegHi & (1 << J)) != 0) 8229 ModVal |= SISrcMods::NEG_HI; 8230 8231 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8232 8233 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8234 } 8235 } 8236 8237 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8238 OptionalImmIndexMap OptIdx; 8239 cvtVOP3(Inst, Operands, OptIdx); 8240 cvtVOP3P(Inst, Operands, OptIdx); 8241 } 8242 8243 //===----------------------------------------------------------------------===// 8244 // dpp 8245 //===----------------------------------------------------------------------===// 8246 8247 bool AMDGPUOperand::isDPP8() const { 8248 return isImmTy(ImmTyDPP8); 8249 } 8250 8251 bool AMDGPUOperand::isDPPCtrl() const { 8252 using namespace AMDGPU::DPP; 8253 8254 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8255 if (result) { 8256 int64_t Imm = getImm(); 8257 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8258 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8259 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8260 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8261 (Imm == DppCtrl::WAVE_SHL1) || 8262 (Imm == DppCtrl::WAVE_ROL1) || 8263 (Imm == DppCtrl::WAVE_SHR1) || 8264 (Imm == DppCtrl::WAVE_ROR1) || 8265 (Imm == DppCtrl::ROW_MIRROR) || 8266 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8267 (Imm == DppCtrl::BCAST15) || 8268 (Imm == DppCtrl::BCAST31) || 8269 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8270 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8271 } 8272 return false; 8273 } 8274 8275 //===----------------------------------------------------------------------===// 8276 // mAI 8277 //===----------------------------------------------------------------------===// 8278 8279 bool AMDGPUOperand::isBLGP() const { 8280 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8281 } 8282 8283 bool AMDGPUOperand::isCBSZ() const { 8284 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8285 } 8286 8287 bool AMDGPUOperand::isABID() const { 8288 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8289 } 8290 8291 bool AMDGPUOperand::isS16Imm() const { 8292 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8293 } 8294 8295 bool AMDGPUOperand::isU16Imm() const { 8296 return isImm() && isUInt<16>(getImm()); 8297 } 8298 8299 //===----------------------------------------------------------------------===// 8300 // dim 8301 //===----------------------------------------------------------------------===// 8302 8303 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8304 // We want to allow "dim:1D" etc., 8305 // but the initial 1 is tokenized as an integer. 8306 std::string Token; 8307 if (isToken(AsmToken::Integer)) { 8308 SMLoc Loc = getToken().getEndLoc(); 8309 Token = std::string(getTokenStr()); 8310 lex(); 8311 if (getLoc() != Loc) 8312 return false; 8313 } 8314 8315 StringRef Suffix; 8316 if (!parseId(Suffix)) 8317 return false; 8318 Token += Suffix; 8319 8320 StringRef DimId = Token; 8321 if (DimId.startswith("SQ_RSRC_IMG_")) 8322 DimId = DimId.drop_front(12); 8323 8324 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8325 if (!DimInfo) 8326 return false; 8327 8328 Encoding = DimInfo->Encoding; 8329 return true; 8330 } 8331 8332 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8333 if (!isGFX10Plus()) 8334 return MatchOperand_NoMatch; 8335 8336 SMLoc S = getLoc(); 8337 8338 if (!trySkipId("dim", AsmToken::Colon)) 8339 return MatchOperand_NoMatch; 8340 8341 unsigned Encoding; 8342 SMLoc Loc = getLoc(); 8343 if (!parseDimId(Encoding)) { 8344 Error(Loc, "invalid dim value"); 8345 return MatchOperand_ParseFail; 8346 } 8347 8348 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8349 AMDGPUOperand::ImmTyDim)); 8350 return MatchOperand_Success; 8351 } 8352 8353 //===----------------------------------------------------------------------===// 8354 // dpp 8355 //===----------------------------------------------------------------------===// 8356 8357 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8358 SMLoc S = getLoc(); 8359 8360 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8361 return MatchOperand_NoMatch; 8362 8363 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8364 8365 int64_t Sels[8]; 8366 8367 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8368 return MatchOperand_ParseFail; 8369 8370 for (size_t i = 0; i < 8; ++i) { 8371 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8372 return MatchOperand_ParseFail; 8373 8374 SMLoc Loc = getLoc(); 8375 if (getParser().parseAbsoluteExpression(Sels[i])) 8376 return MatchOperand_ParseFail; 8377 if (0 > Sels[i] || 7 < Sels[i]) { 8378 Error(Loc, "expected a 3-bit value"); 8379 return MatchOperand_ParseFail; 8380 } 8381 } 8382 8383 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8384 return MatchOperand_ParseFail; 8385 8386 unsigned DPP8 = 0; 8387 for (size_t i = 0; i < 8; ++i) 8388 DPP8 |= (Sels[i] << (i * 3)); 8389 8390 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8391 return MatchOperand_Success; 8392 } 8393 8394 bool 8395 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8396 const OperandVector &Operands) { 8397 if (Ctrl == "row_newbcast") 8398 return isGFX90A(); 8399 8400 if (Ctrl == "row_share" || 8401 Ctrl == "row_xmask") 8402 return isGFX10Plus(); 8403 8404 if (Ctrl == "wave_shl" || 8405 Ctrl == "wave_shr" || 8406 Ctrl == "wave_rol" || 8407 Ctrl == "wave_ror" || 8408 Ctrl == "row_bcast") 8409 return isVI() || isGFX9(); 8410 8411 return Ctrl == "row_mirror" || 8412 Ctrl == "row_half_mirror" || 8413 Ctrl == "quad_perm" || 8414 Ctrl == "row_shl" || 8415 Ctrl == "row_shr" || 8416 Ctrl == "row_ror"; 8417 } 8418 8419 int64_t 8420 AMDGPUAsmParser::parseDPPCtrlPerm() { 8421 // quad_perm:[%d,%d,%d,%d] 8422 8423 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8424 return -1; 8425 8426 int64_t Val = 0; 8427 for (int i = 0; i < 4; ++i) { 8428 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8429 return -1; 8430 8431 int64_t Temp; 8432 SMLoc Loc = getLoc(); 8433 if (getParser().parseAbsoluteExpression(Temp)) 8434 return -1; 8435 if (Temp < 0 || Temp > 3) { 8436 Error(Loc, "expected a 2-bit value"); 8437 return -1; 8438 } 8439 8440 Val += (Temp << i * 2); 8441 } 8442 8443 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8444 return -1; 8445 8446 return Val; 8447 } 8448 8449 int64_t 8450 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8451 using namespace AMDGPU::DPP; 8452 8453 // sel:%d 8454 8455 int64_t Val; 8456 SMLoc Loc = getLoc(); 8457 8458 if (getParser().parseAbsoluteExpression(Val)) 8459 return -1; 8460 8461 struct DppCtrlCheck { 8462 int64_t Ctrl; 8463 int Lo; 8464 int Hi; 8465 }; 8466 8467 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8468 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8469 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8470 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8471 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8472 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8473 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8474 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8475 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8476 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8477 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8478 .Default({-1, 0, 0}); 8479 8480 bool Valid; 8481 if (Check.Ctrl == -1) { 8482 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8483 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8484 } else { 8485 Valid = Check.Lo <= Val && Val <= Check.Hi; 8486 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8487 } 8488 8489 if (!Valid) { 8490 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8491 return -1; 8492 } 8493 8494 return Val; 8495 } 8496 8497 OperandMatchResultTy 8498 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8499 using namespace AMDGPU::DPP; 8500 8501 if (!isToken(AsmToken::Identifier) || 8502 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8503 return MatchOperand_NoMatch; 8504 8505 SMLoc S = getLoc(); 8506 int64_t Val = -1; 8507 StringRef Ctrl; 8508 8509 parseId(Ctrl); 8510 8511 if (Ctrl == "row_mirror") { 8512 Val = DppCtrl::ROW_MIRROR; 8513 } else if (Ctrl == "row_half_mirror") { 8514 Val = DppCtrl::ROW_HALF_MIRROR; 8515 } else { 8516 if (skipToken(AsmToken::Colon, "expected a colon")) { 8517 if (Ctrl == "quad_perm") { 8518 Val = parseDPPCtrlPerm(); 8519 } else { 8520 Val = parseDPPCtrlSel(Ctrl); 8521 } 8522 } 8523 } 8524 8525 if (Val == -1) 8526 return MatchOperand_ParseFail; 8527 8528 Operands.push_back( 8529 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8530 return MatchOperand_Success; 8531 } 8532 8533 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8534 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8535 } 8536 8537 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8538 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8539 } 8540 8541 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8542 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8543 } 8544 8545 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8546 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8547 } 8548 8549 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8550 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8551 } 8552 8553 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8554 OptionalImmIndexMap OptionalIdx; 8555 8556 unsigned Opc = Inst.getOpcode(); 8557 bool HasModifiers = 8558 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8559 unsigned I = 1; 8560 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8561 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8562 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8563 } 8564 8565 int Fi = 0; 8566 for (unsigned E = Operands.size(); I != E; ++I) { 8567 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8568 MCOI::TIED_TO); 8569 if (TiedTo != -1) { 8570 assert((unsigned)TiedTo < Inst.getNumOperands()); 8571 // handle tied old or src2 for MAC instructions 8572 Inst.addOperand(Inst.getOperand(TiedTo)); 8573 } 8574 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8575 // Add the register arguments 8576 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8577 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8578 // Skip it. 8579 continue; 8580 } 8581 8582 if (IsDPP8) { 8583 if (Op.isDPP8()) { 8584 Op.addImmOperands(Inst, 1); 8585 } else if (HasModifiers && 8586 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8587 Op.addRegWithFPInputModsOperands(Inst, 2); 8588 } else if (Op.isFI()) { 8589 Fi = Op.getImm(); 8590 } else if (Op.isReg()) { 8591 Op.addRegOperands(Inst, 1); 8592 } else { 8593 llvm_unreachable("Invalid operand type"); 8594 } 8595 } else { 8596 if (HasModifiers && 8597 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8598 Op.addRegWithFPInputModsOperands(Inst, 2); 8599 } else if (Op.isReg()) { 8600 Op.addRegOperands(Inst, 1); 8601 } else if (Op.isDPPCtrl()) { 8602 Op.addImmOperands(Inst, 1); 8603 } else if (Op.isImm()) { 8604 // Handle optional arguments 8605 OptionalIdx[Op.getImmTy()] = I; 8606 } else { 8607 llvm_unreachable("Invalid operand type"); 8608 } 8609 } 8610 } 8611 8612 if (IsDPP8) { 8613 using namespace llvm::AMDGPU::DPP; 8614 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8615 } else { 8616 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8617 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8618 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8619 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8620 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8621 } 8622 } 8623 } 8624 8625 //===----------------------------------------------------------------------===// 8626 // sdwa 8627 //===----------------------------------------------------------------------===// 8628 8629 OperandMatchResultTy 8630 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8631 AMDGPUOperand::ImmTy Type) { 8632 using namespace llvm::AMDGPU::SDWA; 8633 8634 SMLoc S = getLoc(); 8635 StringRef Value; 8636 OperandMatchResultTy res; 8637 8638 SMLoc StringLoc; 8639 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8640 if (res != MatchOperand_Success) { 8641 return res; 8642 } 8643 8644 int64_t Int; 8645 Int = StringSwitch<int64_t>(Value) 8646 .Case("BYTE_0", SdwaSel::BYTE_0) 8647 .Case("BYTE_1", SdwaSel::BYTE_1) 8648 .Case("BYTE_2", SdwaSel::BYTE_2) 8649 .Case("BYTE_3", SdwaSel::BYTE_3) 8650 .Case("WORD_0", SdwaSel::WORD_0) 8651 .Case("WORD_1", SdwaSel::WORD_1) 8652 .Case("DWORD", SdwaSel::DWORD) 8653 .Default(0xffffffff); 8654 8655 if (Int == 0xffffffff) { 8656 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8657 return MatchOperand_ParseFail; 8658 } 8659 8660 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8661 return MatchOperand_Success; 8662 } 8663 8664 OperandMatchResultTy 8665 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8666 using namespace llvm::AMDGPU::SDWA; 8667 8668 SMLoc S = getLoc(); 8669 StringRef Value; 8670 OperandMatchResultTy res; 8671 8672 SMLoc StringLoc; 8673 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8674 if (res != MatchOperand_Success) { 8675 return res; 8676 } 8677 8678 int64_t Int; 8679 Int = StringSwitch<int64_t>(Value) 8680 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8681 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8682 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8683 .Default(0xffffffff); 8684 8685 if (Int == 0xffffffff) { 8686 Error(StringLoc, "invalid dst_unused value"); 8687 return MatchOperand_ParseFail; 8688 } 8689 8690 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8691 return MatchOperand_Success; 8692 } 8693 8694 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8695 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8696 } 8697 8698 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8699 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8700 } 8701 8702 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8703 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8704 } 8705 8706 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8707 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8708 } 8709 8710 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8711 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8712 } 8713 8714 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8715 uint64_t BasicInstType, 8716 bool SkipDstVcc, 8717 bool SkipSrcVcc) { 8718 using namespace llvm::AMDGPU::SDWA; 8719 8720 OptionalImmIndexMap OptionalIdx; 8721 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8722 bool SkippedVcc = false; 8723 8724 unsigned I = 1; 8725 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8726 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8727 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8728 } 8729 8730 for (unsigned E = Operands.size(); I != E; ++I) { 8731 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8732 if (SkipVcc && !SkippedVcc && Op.isReg() && 8733 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8734 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8735 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8736 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8737 // Skip VCC only if we didn't skip it on previous iteration. 8738 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8739 if (BasicInstType == SIInstrFlags::VOP2 && 8740 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8741 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8742 SkippedVcc = true; 8743 continue; 8744 } else if (BasicInstType == SIInstrFlags::VOPC && 8745 Inst.getNumOperands() == 0) { 8746 SkippedVcc = true; 8747 continue; 8748 } 8749 } 8750 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8751 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8752 } else if (Op.isImm()) { 8753 // Handle optional arguments 8754 OptionalIdx[Op.getImmTy()] = I; 8755 } else { 8756 llvm_unreachable("Invalid operand type"); 8757 } 8758 SkippedVcc = false; 8759 } 8760 8761 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8762 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8763 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8764 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8765 switch (BasicInstType) { 8766 case SIInstrFlags::VOP1: 8767 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8768 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8769 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8770 } 8771 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8772 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8773 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8774 break; 8775 8776 case SIInstrFlags::VOP2: 8777 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8778 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8779 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8780 } 8781 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8782 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8783 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8784 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8785 break; 8786 8787 case SIInstrFlags::VOPC: 8788 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8789 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8790 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8791 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8792 break; 8793 8794 default: 8795 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8796 } 8797 } 8798 8799 // special case v_mac_{f16, f32}: 8800 // it has src2 register operand that is tied to dst operand 8801 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8802 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8803 auto it = Inst.begin(); 8804 std::advance( 8805 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8806 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8807 } 8808 } 8809 8810 //===----------------------------------------------------------------------===// 8811 // mAI 8812 //===----------------------------------------------------------------------===// 8813 8814 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8815 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8816 } 8817 8818 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8819 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8820 } 8821 8822 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8823 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8824 } 8825 8826 /// Force static initialization. 8827 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8828 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8829 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8830 } 8831 8832 #define GET_REGISTER_MATCHER 8833 #define GET_MATCHER_IMPLEMENTATION 8834 #define GET_MNEMONIC_SPELL_CHECKER 8835 #define GET_MNEMONIC_CHECKER 8836 #include "AMDGPUGenAsmMatcher.inc" 8837 8838 // This function should be defined after auto-generated include so that we have 8839 // MatchClassKind enum defined 8840 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8841 unsigned Kind) { 8842 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8843 // But MatchInstructionImpl() expects to meet token and fails to validate 8844 // operand. This method checks if we are given immediate operand but expect to 8845 // get corresponding token. 8846 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8847 switch (Kind) { 8848 case MCK_addr64: 8849 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8850 case MCK_gds: 8851 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8852 case MCK_lds: 8853 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8854 case MCK_idxen: 8855 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8856 case MCK_offen: 8857 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8858 case MCK_SSrcB32: 8859 // When operands have expression values, they will return true for isToken, 8860 // because it is not possible to distinguish between a token and an 8861 // expression at parse time. MatchInstructionImpl() will always try to 8862 // match an operand as a token, when isToken returns true, and when the 8863 // name of the expression is not a valid token, the match will fail, 8864 // so we need to handle it here. 8865 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8866 case MCK_SSrcF32: 8867 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8868 case MCK_SoppBrTarget: 8869 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8870 case MCK_VReg32OrOff: 8871 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8872 case MCK_InterpSlot: 8873 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8874 case MCK_Attr: 8875 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8876 case MCK_AttrChan: 8877 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8878 case MCK_ImmSMEMOffset: 8879 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8880 case MCK_SReg_64: 8881 case MCK_SReg_64_XEXEC: 8882 // Null is defined as a 32-bit register but 8883 // it should also be enabled with 64-bit operands. 8884 // The following code enables it for SReg_64 operands 8885 // used as source and destination. Remaining source 8886 // operands are handled in isInlinableImm. 8887 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8888 default: 8889 return Match_InvalidOperand; 8890 } 8891 } 8892 8893 //===----------------------------------------------------------------------===// 8894 // endpgm 8895 //===----------------------------------------------------------------------===// 8896 8897 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8898 SMLoc S = getLoc(); 8899 int64_t Imm = 0; 8900 8901 if (!parseExpr(Imm)) { 8902 // The operand is optional, if not present default to 0 8903 Imm = 0; 8904 } 8905 8906 if (!isUInt<16>(Imm)) { 8907 Error(S, "expected a 16-bit value"); 8908 return MatchOperand_ParseFail; 8909 } 8910 8911 Operands.push_back( 8912 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8913 return MatchOperand_Success; 8914 } 8915 8916 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8917 8918 //===----------------------------------------------------------------------===// 8919 // LDSDIR 8920 //===----------------------------------------------------------------------===// 8921 8922 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const { 8923 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST); 8924 } 8925 8926 bool AMDGPUOperand::isWaitVDST() const { 8927 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); 8928 } 8929 8930 //===----------------------------------------------------------------------===// 8931 // VINTERP 8932 //===----------------------------------------------------------------------===// 8933 8934 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const { 8935 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP); 8936 } 8937 8938 bool AMDGPUOperand::isWaitEXP() const { 8939 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); 8940 } 8941