1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCParser/MCAsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 32 #include "llvm/MC/MCSymbol.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/AMDGPUMetadata.h" 35 #include "llvm/Support/AMDHSAKernelDescriptor.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/MachineValueType.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Support/TargetParser.h" 40 41 using namespace llvm; 42 using namespace llvm::AMDGPU; 43 using namespace llvm::amdhsa; 44 45 namespace { 46 47 class AMDGPUAsmParser; 48 49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 50 51 //===----------------------------------------------------------------------===// 52 // Operand 53 //===----------------------------------------------------------------------===// 54 55 class AMDGPUOperand : public MCParsedAsmOperand { 56 enum KindTy { 57 Token, 58 Immediate, 59 Register, 60 Expression 61 } Kind; 62 63 SMLoc StartLoc, EndLoc; 64 const AMDGPUAsmParser *AsmParser; 65 66 public: 67 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 68 : Kind(Kind_), AsmParser(AsmParser_) {} 69 70 using Ptr = std::unique_ptr<AMDGPUOperand>; 71 72 struct Modifiers { 73 bool Abs = false; 74 bool Neg = false; 75 bool Sext = false; 76 77 bool hasFPModifiers() const { return Abs || Neg; } 78 bool hasIntModifiers() const { return Sext; } 79 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 80 81 int64_t getFPModifiersOperand() const { 82 int64_t Operand = 0; 83 Operand |= Abs ? SISrcMods::ABS : 0u; 84 Operand |= Neg ? SISrcMods::NEG : 0u; 85 return Operand; 86 } 87 88 int64_t getIntModifiersOperand() const { 89 int64_t Operand = 0; 90 Operand |= Sext ? SISrcMods::SEXT : 0u; 91 return Operand; 92 } 93 94 int64_t getModifiersOperand() const { 95 assert(!(hasFPModifiers() && hasIntModifiers()) 96 && "fp and int modifiers should not be used simultaneously"); 97 if (hasFPModifiers()) { 98 return getFPModifiersOperand(); 99 } else if (hasIntModifiers()) { 100 return getIntModifiersOperand(); 101 } else { 102 return 0; 103 } 104 } 105 106 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 107 }; 108 109 enum ImmTy { 110 ImmTyNone, 111 ImmTyGDS, 112 ImmTyLDS, 113 ImmTyOffen, 114 ImmTyIdxen, 115 ImmTyAddr64, 116 ImmTyOffset, 117 ImmTyInstOffset, 118 ImmTyOffset0, 119 ImmTyOffset1, 120 ImmTyCPol, 121 ImmTySWZ, 122 ImmTyTFE, 123 ImmTyD16, 124 ImmTyClampSI, 125 ImmTyOModSI, 126 ImmTyDPP8, 127 ImmTyDppCtrl, 128 ImmTyDppRowMask, 129 ImmTyDppBankMask, 130 ImmTyDppBoundCtrl, 131 ImmTyDppFi, 132 ImmTySdwaDstSel, 133 ImmTySdwaSrc0Sel, 134 ImmTySdwaSrc1Sel, 135 ImmTySdwaDstUnused, 136 ImmTyDMask, 137 ImmTyDim, 138 ImmTyUNorm, 139 ImmTyDA, 140 ImmTyR128A16, 141 ImmTyA16, 142 ImmTyLWE, 143 ImmTyExpTgt, 144 ImmTyExpCompr, 145 ImmTyExpVM, 146 ImmTyFORMAT, 147 ImmTyHwreg, 148 ImmTyOff, 149 ImmTySendMsg, 150 ImmTyInterpSlot, 151 ImmTyInterpAttr, 152 ImmTyAttrChan, 153 ImmTyOpSel, 154 ImmTyOpSelHi, 155 ImmTyNegLo, 156 ImmTyNegHi, 157 ImmTySwizzle, 158 ImmTyGprIdxMode, 159 ImmTyHigh, 160 ImmTyBLGP, 161 ImmTyCBSZ, 162 ImmTyABID, 163 ImmTyEndpgm, 164 ImmTyWaitVDST, 165 ImmTyWaitEXP, 166 }; 167 168 enum ImmKindTy { 169 ImmKindTyNone, 170 ImmKindTyLiteral, 171 ImmKindTyConst, 172 }; 173 174 private: 175 struct TokOp { 176 const char *Data; 177 unsigned Length; 178 }; 179 180 struct ImmOp { 181 int64_t Val; 182 ImmTy Type; 183 bool IsFPImm; 184 mutable ImmKindTy Kind; 185 Modifiers Mods; 186 }; 187 188 struct RegOp { 189 unsigned RegNo; 190 Modifiers Mods; 191 }; 192 193 union { 194 TokOp Tok; 195 ImmOp Imm; 196 RegOp Reg; 197 const MCExpr *Expr; 198 }; 199 200 public: 201 bool isToken() const override { 202 if (Kind == Token) 203 return true; 204 205 // When parsing operands, we can't always tell if something was meant to be 206 // a token, like 'gds', or an expression that references a global variable. 207 // In this case, we assume the string is an expression, and if we need to 208 // interpret is a token, then we treat the symbol name as the token. 209 return isSymbolRefExpr(); 210 } 211 212 bool isSymbolRefExpr() const { 213 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 214 } 215 216 bool isImm() const override { 217 return Kind == Immediate; 218 } 219 220 void setImmKindNone() const { 221 assert(isImm()); 222 Imm.Kind = ImmKindTyNone; 223 } 224 225 void setImmKindLiteral() const { 226 assert(isImm()); 227 Imm.Kind = ImmKindTyLiteral; 228 } 229 230 void setImmKindConst() const { 231 assert(isImm()); 232 Imm.Kind = ImmKindTyConst; 233 } 234 235 bool IsImmKindLiteral() const { 236 return isImm() && Imm.Kind == ImmKindTyLiteral; 237 } 238 239 bool isImmKindConst() const { 240 return isImm() && Imm.Kind == ImmKindTyConst; 241 } 242 243 bool isInlinableImm(MVT type) const; 244 bool isLiteralImm(MVT type) const; 245 246 bool isRegKind() const { 247 return Kind == Register; 248 } 249 250 bool isReg() const override { 251 return isRegKind() && !hasModifiers(); 252 } 253 254 bool isRegOrInline(unsigned RCID, MVT type) const { 255 return isRegClass(RCID) || isInlinableImm(type); 256 } 257 258 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 259 return isRegOrInline(RCID, type) || isLiteralImm(type); 260 } 261 262 bool isRegOrImmWithInt16InputMods() const { 263 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 264 } 265 266 bool isRegOrImmWithInt32InputMods() const { 267 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 268 } 269 270 bool isRegOrImmWithInt64InputMods() const { 271 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 272 } 273 274 bool isRegOrImmWithFP16InputMods() const { 275 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 276 } 277 278 bool isRegOrImmWithFP32InputMods() const { 279 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 280 } 281 282 bool isRegOrImmWithFP64InputMods() const { 283 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 284 } 285 286 bool isVReg() const { 287 return isRegClass(AMDGPU::VGPR_32RegClassID) || 288 isRegClass(AMDGPU::VReg_64RegClassID) || 289 isRegClass(AMDGPU::VReg_96RegClassID) || 290 isRegClass(AMDGPU::VReg_128RegClassID) || 291 isRegClass(AMDGPU::VReg_160RegClassID) || 292 isRegClass(AMDGPU::VReg_192RegClassID) || 293 isRegClass(AMDGPU::VReg_256RegClassID) || 294 isRegClass(AMDGPU::VReg_512RegClassID) || 295 isRegClass(AMDGPU::VReg_1024RegClassID); 296 } 297 298 bool isVReg32() const { 299 return isRegClass(AMDGPU::VGPR_32RegClassID); 300 } 301 302 bool isVReg32OrOff() const { 303 return isOff() || isVReg32(); 304 } 305 306 bool isNull() const { 307 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 308 } 309 310 bool isVRegWithInputMods() const; 311 312 bool isSDWAOperand(MVT type) const; 313 bool isSDWAFP16Operand() const; 314 bool isSDWAFP32Operand() const; 315 bool isSDWAInt16Operand() const; 316 bool isSDWAInt32Operand() const; 317 318 bool isImmTy(ImmTy ImmT) const { 319 return isImm() && Imm.Type == ImmT; 320 } 321 322 bool isImmModifier() const { 323 return isImm() && Imm.Type != ImmTyNone; 324 } 325 326 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 327 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 328 bool isDMask() const { return isImmTy(ImmTyDMask); } 329 bool isDim() const { return isImmTy(ImmTyDim); } 330 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 331 bool isDA() const { return isImmTy(ImmTyDA); } 332 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 333 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 334 bool isLWE() const { return isImmTy(ImmTyLWE); } 335 bool isOff() const { return isImmTy(ImmTyOff); } 336 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 337 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 338 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 339 bool isOffen() const { return isImmTy(ImmTyOffen); } 340 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 341 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 342 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 343 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 344 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 345 346 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 347 bool isGDS() const { return isImmTy(ImmTyGDS); } 348 bool isLDS() const { return isImmTy(ImmTyLDS); } 349 bool isCPol() const { return isImmTy(ImmTyCPol); } 350 bool isSWZ() const { return isImmTy(ImmTySWZ); } 351 bool isTFE() const { return isImmTy(ImmTyTFE); } 352 bool isD16() const { return isImmTy(ImmTyD16); } 353 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 354 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 355 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 356 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 357 bool isFI() const { return isImmTy(ImmTyDppFi); } 358 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 359 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 360 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 361 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 362 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 363 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 364 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 365 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 366 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 367 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 368 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 369 bool isHigh() const { return isImmTy(ImmTyHigh); } 370 371 bool isMod() const { 372 return isClampSI() || isOModSI(); 373 } 374 375 bool isRegOrImm() const { 376 return isReg() || isImm(); 377 } 378 379 bool isRegClass(unsigned RCID) const; 380 381 bool isInlineValue() const; 382 383 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 384 return isRegOrInline(RCID, type) && !hasModifiers(); 385 } 386 387 bool isSCSrcB16() const { 388 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 389 } 390 391 bool isSCSrcV2B16() const { 392 return isSCSrcB16(); 393 } 394 395 bool isSCSrcB32() const { 396 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 397 } 398 399 bool isSCSrcB64() const { 400 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 401 } 402 403 bool isBoolReg() const; 404 405 bool isSCSrcF16() const { 406 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 407 } 408 409 bool isSCSrcV2F16() const { 410 return isSCSrcF16(); 411 } 412 413 bool isSCSrcF32() const { 414 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 415 } 416 417 bool isSCSrcF64() const { 418 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 419 } 420 421 bool isSSrcB32() const { 422 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 423 } 424 425 bool isSSrcB16() const { 426 return isSCSrcB16() || isLiteralImm(MVT::i16); 427 } 428 429 bool isSSrcV2B16() const { 430 llvm_unreachable("cannot happen"); 431 return isSSrcB16(); 432 } 433 434 bool isSSrcB64() const { 435 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 436 // See isVSrc64(). 437 return isSCSrcB64() || isLiteralImm(MVT::i64); 438 } 439 440 bool isSSrcF32() const { 441 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 442 } 443 444 bool isSSrcF64() const { 445 return isSCSrcB64() || isLiteralImm(MVT::f64); 446 } 447 448 bool isSSrcF16() const { 449 return isSCSrcB16() || isLiteralImm(MVT::f16); 450 } 451 452 bool isSSrcV2F16() const { 453 llvm_unreachable("cannot happen"); 454 return isSSrcF16(); 455 } 456 457 bool isSSrcV2FP32() const { 458 llvm_unreachable("cannot happen"); 459 return isSSrcF32(); 460 } 461 462 bool isSCSrcV2FP32() const { 463 llvm_unreachable("cannot happen"); 464 return isSCSrcF32(); 465 } 466 467 bool isSSrcV2INT32() const { 468 llvm_unreachable("cannot happen"); 469 return isSSrcB32(); 470 } 471 472 bool isSCSrcV2INT32() const { 473 llvm_unreachable("cannot happen"); 474 return isSCSrcB32(); 475 } 476 477 bool isSSrcOrLdsB32() const { 478 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 479 isLiteralImm(MVT::i32) || isExpr(); 480 } 481 482 bool isVCSrcB32() const { 483 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 484 } 485 486 bool isVCSrcB64() const { 487 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 488 } 489 490 bool isVCSrcB16() const { 491 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 492 } 493 494 bool isVCSrcV2B16() const { 495 return isVCSrcB16(); 496 } 497 498 bool isVCSrcF32() const { 499 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 500 } 501 502 bool isVCSrcF64() const { 503 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 504 } 505 506 bool isVCSrcF16() const { 507 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 508 } 509 510 bool isVCSrcV2F16() const { 511 return isVCSrcF16(); 512 } 513 514 bool isVSrcB32() const { 515 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 516 } 517 518 bool isVSrcB64() const { 519 return isVCSrcF64() || isLiteralImm(MVT::i64); 520 } 521 522 bool isVSrcB16() const { 523 return isVCSrcB16() || isLiteralImm(MVT::i16); 524 } 525 526 bool isVSrcV2B16() const { 527 return isVSrcB16() || isLiteralImm(MVT::v2i16); 528 } 529 530 bool isVCSrcV2FP32() const { 531 return isVCSrcF64(); 532 } 533 534 bool isVSrcV2FP32() const { 535 return isVSrcF64() || isLiteralImm(MVT::v2f32); 536 } 537 538 bool isVCSrcV2INT32() const { 539 return isVCSrcB64(); 540 } 541 542 bool isVSrcV2INT32() const { 543 return isVSrcB64() || isLiteralImm(MVT::v2i32); 544 } 545 546 bool isVSrcF32() const { 547 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 548 } 549 550 bool isVSrcF64() const { 551 return isVCSrcF64() || isLiteralImm(MVT::f64); 552 } 553 554 bool isVSrcF16() const { 555 return isVCSrcF16() || isLiteralImm(MVT::f16); 556 } 557 558 bool isVSrcV2F16() const { 559 return isVSrcF16() || isLiteralImm(MVT::v2f16); 560 } 561 562 bool isVISrcB32() const { 563 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 564 } 565 566 bool isVISrcB16() const { 567 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 568 } 569 570 bool isVISrcV2B16() const { 571 return isVISrcB16(); 572 } 573 574 bool isVISrcF32() const { 575 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 576 } 577 578 bool isVISrcF16() const { 579 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 580 } 581 582 bool isVISrcV2F16() const { 583 return isVISrcF16() || isVISrcB32(); 584 } 585 586 bool isVISrc_64B64() const { 587 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 588 } 589 590 bool isVISrc_64F64() const { 591 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 592 } 593 594 bool isVISrc_64V2FP32() const { 595 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 596 } 597 598 bool isVISrc_64V2INT32() const { 599 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 600 } 601 602 bool isVISrc_256B64() const { 603 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 604 } 605 606 bool isVISrc_256F64() const { 607 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 608 } 609 610 bool isVISrc_128B16() const { 611 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 612 } 613 614 bool isVISrc_128V2B16() const { 615 return isVISrc_128B16(); 616 } 617 618 bool isVISrc_128B32() const { 619 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 620 } 621 622 bool isVISrc_128F32() const { 623 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 624 } 625 626 bool isVISrc_256V2FP32() const { 627 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 628 } 629 630 bool isVISrc_256V2INT32() const { 631 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 632 } 633 634 bool isVISrc_512B32() const { 635 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 636 } 637 638 bool isVISrc_512B16() const { 639 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 640 } 641 642 bool isVISrc_512V2B16() const { 643 return isVISrc_512B16(); 644 } 645 646 bool isVISrc_512F32() const { 647 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 648 } 649 650 bool isVISrc_512F16() const { 651 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 652 } 653 654 bool isVISrc_512V2F16() const { 655 return isVISrc_512F16() || isVISrc_512B32(); 656 } 657 658 bool isVISrc_1024B32() const { 659 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 660 } 661 662 bool isVISrc_1024B16() const { 663 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 664 } 665 666 bool isVISrc_1024V2B16() const { 667 return isVISrc_1024B16(); 668 } 669 670 bool isVISrc_1024F32() const { 671 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 672 } 673 674 bool isVISrc_1024F16() const { 675 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 676 } 677 678 bool isVISrc_1024V2F16() const { 679 return isVISrc_1024F16() || isVISrc_1024B32(); 680 } 681 682 bool isAISrcB32() const { 683 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 684 } 685 686 bool isAISrcB16() const { 687 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 688 } 689 690 bool isAISrcV2B16() const { 691 return isAISrcB16(); 692 } 693 694 bool isAISrcF32() const { 695 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 696 } 697 698 bool isAISrcF16() const { 699 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 700 } 701 702 bool isAISrcV2F16() const { 703 return isAISrcF16() || isAISrcB32(); 704 } 705 706 bool isAISrc_64B64() const { 707 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 708 } 709 710 bool isAISrc_64F64() const { 711 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 712 } 713 714 bool isAISrc_128B32() const { 715 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 716 } 717 718 bool isAISrc_128B16() const { 719 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 720 } 721 722 bool isAISrc_128V2B16() const { 723 return isAISrc_128B16(); 724 } 725 726 bool isAISrc_128F32() const { 727 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 728 } 729 730 bool isAISrc_128F16() const { 731 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 732 } 733 734 bool isAISrc_128V2F16() const { 735 return isAISrc_128F16() || isAISrc_128B32(); 736 } 737 738 bool isVISrc_128F16() const { 739 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 740 } 741 742 bool isVISrc_128V2F16() const { 743 return isVISrc_128F16() || isVISrc_128B32(); 744 } 745 746 bool isAISrc_256B64() const { 747 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 748 } 749 750 bool isAISrc_256F64() const { 751 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 752 } 753 754 bool isAISrc_512B32() const { 755 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 756 } 757 758 bool isAISrc_512B16() const { 759 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 760 } 761 762 bool isAISrc_512V2B16() const { 763 return isAISrc_512B16(); 764 } 765 766 bool isAISrc_512F32() const { 767 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 768 } 769 770 bool isAISrc_512F16() const { 771 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 772 } 773 774 bool isAISrc_512V2F16() const { 775 return isAISrc_512F16() || isAISrc_512B32(); 776 } 777 778 bool isAISrc_1024B32() const { 779 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 780 } 781 782 bool isAISrc_1024B16() const { 783 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 784 } 785 786 bool isAISrc_1024V2B16() const { 787 return isAISrc_1024B16(); 788 } 789 790 bool isAISrc_1024F32() const { 791 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 792 } 793 794 bool isAISrc_1024F16() const { 795 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 796 } 797 798 bool isAISrc_1024V2F16() const { 799 return isAISrc_1024F16() || isAISrc_1024B32(); 800 } 801 802 bool isKImmFP32() const { 803 return isLiteralImm(MVT::f32); 804 } 805 806 bool isKImmFP16() const { 807 return isLiteralImm(MVT::f16); 808 } 809 810 bool isMem() const override { 811 return false; 812 } 813 814 bool isExpr() const { 815 return Kind == Expression; 816 } 817 818 bool isSoppBrTarget() const { 819 return isExpr() || isImm(); 820 } 821 822 bool isSWaitCnt() const; 823 bool isDepCtr() const; 824 bool isSDelayAlu() const; 825 bool isHwreg() const; 826 bool isSendMsg() const; 827 bool isSwizzle() const; 828 bool isSMRDOffset8() const; 829 bool isSMEMOffset() const; 830 bool isSMRDLiteralOffset() const; 831 bool isDPP8() const; 832 bool isDPPCtrl() const; 833 bool isBLGP() const; 834 bool isCBSZ() const; 835 bool isABID() const; 836 bool isGPRIdxMode() const; 837 bool isS16Imm() const; 838 bool isU16Imm() const; 839 bool isEndpgm() const; 840 bool isWaitVDST() const; 841 bool isWaitEXP() const; 842 843 StringRef getExpressionAsToken() const { 844 assert(isExpr()); 845 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 846 return S->getSymbol().getName(); 847 } 848 849 StringRef getToken() const { 850 assert(isToken()); 851 852 if (Kind == Expression) 853 return getExpressionAsToken(); 854 855 return StringRef(Tok.Data, Tok.Length); 856 } 857 858 int64_t getImm() const { 859 assert(isImm()); 860 return Imm.Val; 861 } 862 863 void setImm(int64_t Val) { 864 assert(isImm()); 865 Imm.Val = Val; 866 } 867 868 ImmTy getImmTy() const { 869 assert(isImm()); 870 return Imm.Type; 871 } 872 873 unsigned getReg() const override { 874 assert(isRegKind()); 875 return Reg.RegNo; 876 } 877 878 SMLoc getStartLoc() const override { 879 return StartLoc; 880 } 881 882 SMLoc getEndLoc() const override { 883 return EndLoc; 884 } 885 886 SMRange getLocRange() const { 887 return SMRange(StartLoc, EndLoc); 888 } 889 890 Modifiers getModifiers() const { 891 assert(isRegKind() || isImmTy(ImmTyNone)); 892 return isRegKind() ? Reg.Mods : Imm.Mods; 893 } 894 895 void setModifiers(Modifiers Mods) { 896 assert(isRegKind() || isImmTy(ImmTyNone)); 897 if (isRegKind()) 898 Reg.Mods = Mods; 899 else 900 Imm.Mods = Mods; 901 } 902 903 bool hasModifiers() const { 904 return getModifiers().hasModifiers(); 905 } 906 907 bool hasFPModifiers() const { 908 return getModifiers().hasFPModifiers(); 909 } 910 911 bool hasIntModifiers() const { 912 return getModifiers().hasIntModifiers(); 913 } 914 915 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 916 917 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 918 919 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 920 921 template <unsigned Bitwidth> 922 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 923 924 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 925 addKImmFPOperands<16>(Inst, N); 926 } 927 928 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 929 addKImmFPOperands<32>(Inst, N); 930 } 931 932 void addRegOperands(MCInst &Inst, unsigned N) const; 933 934 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 935 addRegOperands(Inst, N); 936 } 937 938 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 939 if (isRegKind()) 940 addRegOperands(Inst, N); 941 else if (isExpr()) 942 Inst.addOperand(MCOperand::createExpr(Expr)); 943 else 944 addImmOperands(Inst, N); 945 } 946 947 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 948 Modifiers Mods = getModifiers(); 949 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 950 if (isRegKind()) { 951 addRegOperands(Inst, N); 952 } else { 953 addImmOperands(Inst, N, false); 954 } 955 } 956 957 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 958 assert(!hasIntModifiers()); 959 addRegOrImmWithInputModsOperands(Inst, N); 960 } 961 962 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 963 assert(!hasFPModifiers()); 964 addRegOrImmWithInputModsOperands(Inst, N); 965 } 966 967 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 968 Modifiers Mods = getModifiers(); 969 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 970 assert(isRegKind()); 971 addRegOperands(Inst, N); 972 } 973 974 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 975 assert(!hasIntModifiers()); 976 addRegWithInputModsOperands(Inst, N); 977 } 978 979 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 980 assert(!hasFPModifiers()); 981 addRegWithInputModsOperands(Inst, N); 982 } 983 984 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 985 if (isImm()) 986 addImmOperands(Inst, N); 987 else { 988 assert(isExpr()); 989 Inst.addOperand(MCOperand::createExpr(Expr)); 990 } 991 } 992 993 static void printImmTy(raw_ostream& OS, ImmTy Type) { 994 switch (Type) { 995 case ImmTyNone: OS << "None"; break; 996 case ImmTyGDS: OS << "GDS"; break; 997 case ImmTyLDS: OS << "LDS"; break; 998 case ImmTyOffen: OS << "Offen"; break; 999 case ImmTyIdxen: OS << "Idxen"; break; 1000 case ImmTyAddr64: OS << "Addr64"; break; 1001 case ImmTyOffset: OS << "Offset"; break; 1002 case ImmTyInstOffset: OS << "InstOffset"; break; 1003 case ImmTyOffset0: OS << "Offset0"; break; 1004 case ImmTyOffset1: OS << "Offset1"; break; 1005 case ImmTyCPol: OS << "CPol"; break; 1006 case ImmTySWZ: OS << "SWZ"; break; 1007 case ImmTyTFE: OS << "TFE"; break; 1008 case ImmTyD16: OS << "D16"; break; 1009 case ImmTyFORMAT: OS << "FORMAT"; break; 1010 case ImmTyClampSI: OS << "ClampSI"; break; 1011 case ImmTyOModSI: OS << "OModSI"; break; 1012 case ImmTyDPP8: OS << "DPP8"; break; 1013 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1014 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1015 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1016 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1017 case ImmTyDppFi: OS << "FI"; break; 1018 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1019 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1020 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1021 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1022 case ImmTyDMask: OS << "DMask"; break; 1023 case ImmTyDim: OS << "Dim"; break; 1024 case ImmTyUNorm: OS << "UNorm"; break; 1025 case ImmTyDA: OS << "DA"; break; 1026 case ImmTyR128A16: OS << "R128A16"; break; 1027 case ImmTyA16: OS << "A16"; break; 1028 case ImmTyLWE: OS << "LWE"; break; 1029 case ImmTyOff: OS << "Off"; break; 1030 case ImmTyExpTgt: OS << "ExpTgt"; break; 1031 case ImmTyExpCompr: OS << "ExpCompr"; break; 1032 case ImmTyExpVM: OS << "ExpVM"; break; 1033 case ImmTyHwreg: OS << "Hwreg"; break; 1034 case ImmTySendMsg: OS << "SendMsg"; break; 1035 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1036 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1037 case ImmTyAttrChan: OS << "AttrChan"; break; 1038 case ImmTyOpSel: OS << "OpSel"; break; 1039 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1040 case ImmTyNegLo: OS << "NegLo"; break; 1041 case ImmTyNegHi: OS << "NegHi"; break; 1042 case ImmTySwizzle: OS << "Swizzle"; break; 1043 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1044 case ImmTyHigh: OS << "High"; break; 1045 case ImmTyBLGP: OS << "BLGP"; break; 1046 case ImmTyCBSZ: OS << "CBSZ"; break; 1047 case ImmTyABID: OS << "ABID"; break; 1048 case ImmTyEndpgm: OS << "Endpgm"; break; 1049 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1050 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1051 } 1052 } 1053 1054 void print(raw_ostream &OS) const override { 1055 switch (Kind) { 1056 case Register: 1057 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1058 break; 1059 case Immediate: 1060 OS << '<' << getImm(); 1061 if (getImmTy() != ImmTyNone) { 1062 OS << " type: "; printImmTy(OS, getImmTy()); 1063 } 1064 OS << " mods: " << Imm.Mods << '>'; 1065 break; 1066 case Token: 1067 OS << '\'' << getToken() << '\''; 1068 break; 1069 case Expression: 1070 OS << "<expr " << *Expr << '>'; 1071 break; 1072 } 1073 } 1074 1075 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1076 int64_t Val, SMLoc Loc, 1077 ImmTy Type = ImmTyNone, 1078 bool IsFPImm = false) { 1079 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1080 Op->Imm.Val = Val; 1081 Op->Imm.IsFPImm = IsFPImm; 1082 Op->Imm.Kind = ImmKindTyNone; 1083 Op->Imm.Type = Type; 1084 Op->Imm.Mods = Modifiers(); 1085 Op->StartLoc = Loc; 1086 Op->EndLoc = Loc; 1087 return Op; 1088 } 1089 1090 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1091 StringRef Str, SMLoc Loc, 1092 bool HasExplicitEncodingSize = true) { 1093 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1094 Res->Tok.Data = Str.data(); 1095 Res->Tok.Length = Str.size(); 1096 Res->StartLoc = Loc; 1097 Res->EndLoc = Loc; 1098 return Res; 1099 } 1100 1101 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1102 unsigned RegNo, SMLoc S, 1103 SMLoc E) { 1104 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1105 Op->Reg.RegNo = RegNo; 1106 Op->Reg.Mods = Modifiers(); 1107 Op->StartLoc = S; 1108 Op->EndLoc = E; 1109 return Op; 1110 } 1111 1112 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1113 const class MCExpr *Expr, SMLoc S) { 1114 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1115 Op->Expr = Expr; 1116 Op->StartLoc = S; 1117 Op->EndLoc = S; 1118 return Op; 1119 } 1120 }; 1121 1122 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1123 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1124 return OS; 1125 } 1126 1127 //===----------------------------------------------------------------------===// 1128 // AsmParser 1129 //===----------------------------------------------------------------------===// 1130 1131 // Holds info related to the current kernel, e.g. count of SGPRs used. 1132 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1133 // .amdgpu_hsa_kernel or at EOF. 1134 class KernelScopeInfo { 1135 int SgprIndexUnusedMin = -1; 1136 int VgprIndexUnusedMin = -1; 1137 int AgprIndexUnusedMin = -1; 1138 MCContext *Ctx = nullptr; 1139 MCSubtargetInfo const *MSTI = nullptr; 1140 1141 void usesSgprAt(int i) { 1142 if (i >= SgprIndexUnusedMin) { 1143 SgprIndexUnusedMin = ++i; 1144 if (Ctx) { 1145 MCSymbol* const Sym = 1146 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1147 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1148 } 1149 } 1150 } 1151 1152 void usesVgprAt(int i) { 1153 if (i >= VgprIndexUnusedMin) { 1154 VgprIndexUnusedMin = ++i; 1155 if (Ctx) { 1156 MCSymbol* const Sym = 1157 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1158 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1159 VgprIndexUnusedMin); 1160 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1161 } 1162 } 1163 } 1164 1165 void usesAgprAt(int i) { 1166 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1167 if (!hasMAIInsts(*MSTI)) 1168 return; 1169 1170 if (i >= AgprIndexUnusedMin) { 1171 AgprIndexUnusedMin = ++i; 1172 if (Ctx) { 1173 MCSymbol* const Sym = 1174 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1175 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1176 1177 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1178 MCSymbol* const vSym = 1179 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1180 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1181 VgprIndexUnusedMin); 1182 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1183 } 1184 } 1185 } 1186 1187 public: 1188 KernelScopeInfo() = default; 1189 1190 void initialize(MCContext &Context) { 1191 Ctx = &Context; 1192 MSTI = Ctx->getSubtargetInfo(); 1193 1194 usesSgprAt(SgprIndexUnusedMin = -1); 1195 usesVgprAt(VgprIndexUnusedMin = -1); 1196 if (hasMAIInsts(*MSTI)) { 1197 usesAgprAt(AgprIndexUnusedMin = -1); 1198 } 1199 } 1200 1201 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1202 unsigned RegWidth) { 1203 switch (RegKind) { 1204 case IS_SGPR: 1205 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1206 break; 1207 case IS_AGPR: 1208 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1209 break; 1210 case IS_VGPR: 1211 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1212 break; 1213 default: 1214 break; 1215 } 1216 } 1217 }; 1218 1219 class AMDGPUAsmParser : public MCTargetAsmParser { 1220 MCAsmParser &Parser; 1221 1222 // Number of extra operands parsed after the first optional operand. 1223 // This may be necessary to skip hardcoded mandatory operands. 1224 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1225 1226 unsigned ForcedEncodingSize = 0; 1227 bool ForcedDPP = false; 1228 bool ForcedSDWA = false; 1229 KernelScopeInfo KernelScope; 1230 unsigned CPolSeen; 1231 1232 /// @name Auto-generated Match Functions 1233 /// { 1234 1235 #define GET_ASSEMBLER_HEADER 1236 #include "AMDGPUGenAsmMatcher.inc" 1237 1238 /// } 1239 1240 private: 1241 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1242 bool OutOfRangeError(SMRange Range); 1243 /// Calculate VGPR/SGPR blocks required for given target, reserved 1244 /// registers, and user-specified NextFreeXGPR values. 1245 /// 1246 /// \param Features [in] Target features, used for bug corrections. 1247 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1248 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1249 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1250 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1251 /// descriptor field, if valid. 1252 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1253 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1254 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1255 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1256 /// \param VGPRBlocks [out] Result VGPR block count. 1257 /// \param SGPRBlocks [out] Result SGPR block count. 1258 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1259 bool FlatScrUsed, bool XNACKUsed, 1260 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1261 SMRange VGPRRange, unsigned NextFreeSGPR, 1262 SMRange SGPRRange, unsigned &VGPRBlocks, 1263 unsigned &SGPRBlocks); 1264 bool ParseDirectiveAMDGCNTarget(); 1265 bool ParseDirectiveAMDHSAKernel(); 1266 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1267 bool ParseDirectiveHSACodeObjectVersion(); 1268 bool ParseDirectiveHSACodeObjectISA(); 1269 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1270 bool ParseDirectiveAMDKernelCodeT(); 1271 // TODO: Possibly make subtargetHasRegister const. 1272 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1273 bool ParseDirectiveAMDGPUHsaKernel(); 1274 1275 bool ParseDirectiveISAVersion(); 1276 bool ParseDirectiveHSAMetadata(); 1277 bool ParseDirectivePALMetadataBegin(); 1278 bool ParseDirectivePALMetadata(); 1279 bool ParseDirectiveAMDGPULDS(); 1280 1281 /// Common code to parse out a block of text (typically YAML) between start and 1282 /// end directives. 1283 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1284 const char *AssemblerDirectiveEnd, 1285 std::string &CollectString); 1286 1287 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1288 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1289 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1290 unsigned &RegNum, unsigned &RegWidth, 1291 bool RestoreOnFailure = false); 1292 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1293 unsigned &RegNum, unsigned &RegWidth, 1294 SmallVectorImpl<AsmToken> &Tokens); 1295 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1296 unsigned &RegWidth, 1297 SmallVectorImpl<AsmToken> &Tokens); 1298 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1299 unsigned &RegWidth, 1300 SmallVectorImpl<AsmToken> &Tokens); 1301 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1302 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1303 bool ParseRegRange(unsigned& Num, unsigned& Width); 1304 unsigned getRegularReg(RegisterKind RegKind, 1305 unsigned RegNum, 1306 unsigned RegWidth, 1307 SMLoc Loc); 1308 1309 bool isRegister(); 1310 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1311 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1312 void initializeGprCountSymbol(RegisterKind RegKind); 1313 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1314 unsigned RegWidth); 1315 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1316 bool IsAtomic, bool IsLds = false); 1317 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1318 bool IsGdsHardcoded); 1319 1320 public: 1321 enum AMDGPUMatchResultTy { 1322 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1323 }; 1324 enum OperandMode { 1325 OperandMode_Default, 1326 OperandMode_NSA, 1327 }; 1328 1329 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1330 1331 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1332 const MCInstrInfo &MII, 1333 const MCTargetOptions &Options) 1334 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1335 MCAsmParserExtension::Initialize(Parser); 1336 1337 if (getFeatureBits().none()) { 1338 // Set default features. 1339 copySTI().ToggleFeature("southern-islands"); 1340 } 1341 1342 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1343 1344 { 1345 // TODO: make those pre-defined variables read-only. 1346 // Currently there is none suitable machinery in the core llvm-mc for this. 1347 // MCSymbol::isRedefinable is intended for another purpose, and 1348 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1349 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1350 MCContext &Ctx = getContext(); 1351 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1352 MCSymbol *Sym = 1353 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1354 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1355 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1356 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1357 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1358 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1359 } else { 1360 MCSymbol *Sym = 1361 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1362 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1363 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1364 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1365 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1366 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1367 } 1368 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1369 initializeGprCountSymbol(IS_VGPR); 1370 initializeGprCountSymbol(IS_SGPR); 1371 } else 1372 KernelScope.initialize(getContext()); 1373 } 1374 } 1375 1376 bool hasMIMG_R128() const { 1377 return AMDGPU::hasMIMG_R128(getSTI()); 1378 } 1379 1380 bool hasPackedD16() const { 1381 return AMDGPU::hasPackedD16(getSTI()); 1382 } 1383 1384 bool hasGFX10A16() const { 1385 return AMDGPU::hasGFX10A16(getSTI()); 1386 } 1387 1388 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1389 1390 bool isSI() const { 1391 return AMDGPU::isSI(getSTI()); 1392 } 1393 1394 bool isCI() const { 1395 return AMDGPU::isCI(getSTI()); 1396 } 1397 1398 bool isVI() const { 1399 return AMDGPU::isVI(getSTI()); 1400 } 1401 1402 bool isGFX9() const { 1403 return AMDGPU::isGFX9(getSTI()); 1404 } 1405 1406 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1407 bool isGFX90A() const { 1408 return AMDGPU::isGFX90A(getSTI()); 1409 } 1410 1411 bool isGFX940() const { 1412 return AMDGPU::isGFX940(getSTI()); 1413 } 1414 1415 bool isGFX9Plus() const { 1416 return AMDGPU::isGFX9Plus(getSTI()); 1417 } 1418 1419 bool isGFX10() const { 1420 return AMDGPU::isGFX10(getSTI()); 1421 } 1422 1423 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1424 1425 bool isGFX11() const { 1426 return AMDGPU::isGFX11(getSTI()); 1427 } 1428 1429 bool isGFX11Plus() const { 1430 return AMDGPU::isGFX11Plus(getSTI()); 1431 } 1432 1433 bool isGFX10_BEncoding() const { 1434 return AMDGPU::isGFX10_BEncoding(getSTI()); 1435 } 1436 1437 bool hasInv2PiInlineImm() const { 1438 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1439 } 1440 1441 bool hasFlatOffsets() const { 1442 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1443 } 1444 1445 bool hasArchitectedFlatScratch() const { 1446 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1447 } 1448 1449 bool hasSGPR102_SGPR103() const { 1450 return !isVI() && !isGFX9(); 1451 } 1452 1453 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1454 1455 bool hasIntClamp() const { 1456 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1457 } 1458 1459 AMDGPUTargetStreamer &getTargetStreamer() { 1460 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1461 return static_cast<AMDGPUTargetStreamer &>(TS); 1462 } 1463 1464 const MCRegisterInfo *getMRI() const { 1465 // We need this const_cast because for some reason getContext() is not const 1466 // in MCAsmParser. 1467 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1468 } 1469 1470 const MCInstrInfo *getMII() const { 1471 return &MII; 1472 } 1473 1474 const FeatureBitset &getFeatureBits() const { 1475 return getSTI().getFeatureBits(); 1476 } 1477 1478 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1479 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1480 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1481 1482 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1483 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1484 bool isForcedDPP() const { return ForcedDPP; } 1485 bool isForcedSDWA() const { return ForcedSDWA; } 1486 ArrayRef<unsigned> getMatchedVariants() const; 1487 StringRef getMatchedVariantName() const; 1488 1489 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1490 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1491 bool RestoreOnFailure); 1492 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1493 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1494 SMLoc &EndLoc) override; 1495 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1496 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1497 unsigned Kind) override; 1498 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1499 OperandVector &Operands, MCStreamer &Out, 1500 uint64_t &ErrorInfo, 1501 bool MatchingInlineAsm) override; 1502 bool ParseDirective(AsmToken DirectiveID) override; 1503 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1504 OperandMode Mode = OperandMode_Default); 1505 StringRef parseMnemonicSuffix(StringRef Name); 1506 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1507 SMLoc NameLoc, OperandVector &Operands) override; 1508 //bool ProcessInstruction(MCInst &Inst); 1509 1510 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1511 1512 OperandMatchResultTy 1513 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1514 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1515 bool (*ConvertResult)(int64_t &) = nullptr); 1516 1517 OperandMatchResultTy 1518 parseOperandArrayWithPrefix(const char *Prefix, 1519 OperandVector &Operands, 1520 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1521 bool (*ConvertResult)(int64_t&) = nullptr); 1522 1523 OperandMatchResultTy 1524 parseNamedBit(StringRef Name, OperandVector &Operands, 1525 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1526 OperandMatchResultTy parseCPol(OperandVector &Operands); 1527 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1528 StringRef &Value, 1529 SMLoc &StringLoc); 1530 1531 bool isModifier(); 1532 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1533 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1534 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1535 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1536 bool parseSP3NegModifier(); 1537 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1538 OperandMatchResultTy parseReg(OperandVector &Operands); 1539 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1540 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1541 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1542 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1543 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1544 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1545 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1546 OperandMatchResultTy parseUfmt(int64_t &Format); 1547 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1548 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1549 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1550 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1551 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1552 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1553 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1554 1555 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1556 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1557 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1558 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1559 1560 bool parseCnt(int64_t &IntVal); 1561 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1562 1563 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1564 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1565 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1566 1567 bool parseDelay(int64_t &Delay); 1568 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands); 1569 1570 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1571 1572 private: 1573 struct OperandInfoTy { 1574 SMLoc Loc; 1575 int64_t Id; 1576 bool IsSymbolic = false; 1577 bool IsDefined = false; 1578 1579 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1580 }; 1581 1582 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1583 bool validateSendMsg(const OperandInfoTy &Msg, 1584 const OperandInfoTy &Op, 1585 const OperandInfoTy &Stream); 1586 1587 bool parseHwregBody(OperandInfoTy &HwReg, 1588 OperandInfoTy &Offset, 1589 OperandInfoTy &Width); 1590 bool validateHwreg(const OperandInfoTy &HwReg, 1591 const OperandInfoTy &Offset, 1592 const OperandInfoTy &Width); 1593 1594 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1595 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1596 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1597 1598 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1599 const OperandVector &Operands) const; 1600 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1601 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1602 SMLoc getLitLoc(const OperandVector &Operands) const; 1603 SMLoc getConstLoc(const OperandVector &Operands) const; 1604 1605 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1606 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1607 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1608 bool validateSOPLiteral(const MCInst &Inst) const; 1609 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1610 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1611 bool validateIntClampSupported(const MCInst &Inst); 1612 bool validateMIMGAtomicDMask(const MCInst &Inst); 1613 bool validateMIMGGatherDMask(const MCInst &Inst); 1614 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1615 Optional<StringRef> validateMIMGDataSize(const MCInst &Inst); 1616 bool validateMIMGAddrSize(const MCInst &Inst); 1617 bool validateMIMGD16(const MCInst &Inst); 1618 bool validateMIMGDim(const MCInst &Inst); 1619 bool validateMIMGMSAA(const MCInst &Inst); 1620 bool validateOpSel(const MCInst &Inst); 1621 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1622 bool validateVccOperand(unsigned Reg) const; 1623 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1624 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1625 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1626 bool validateAGPRLdSt(const MCInst &Inst) const; 1627 bool validateVGPRAlign(const MCInst &Inst) const; 1628 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1629 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1630 bool validateDivScale(const MCInst &Inst); 1631 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1632 const SMLoc &IDLoc); 1633 bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands, 1634 const SMLoc &IDLoc); 1635 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1636 unsigned getConstantBusLimit(unsigned Opcode) const; 1637 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1638 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1639 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1640 1641 bool isSupportedMnemo(StringRef Mnemo, 1642 const FeatureBitset &FBS); 1643 bool isSupportedMnemo(StringRef Mnemo, 1644 const FeatureBitset &FBS, 1645 ArrayRef<unsigned> Variants); 1646 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1647 1648 bool isId(const StringRef Id) const; 1649 bool isId(const AsmToken &Token, const StringRef Id) const; 1650 bool isToken(const AsmToken::TokenKind Kind) const; 1651 bool trySkipId(const StringRef Id); 1652 bool trySkipId(const StringRef Pref, const StringRef Id); 1653 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1654 bool trySkipToken(const AsmToken::TokenKind Kind); 1655 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1656 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1657 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1658 1659 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1660 AsmToken::TokenKind getTokenKind() const; 1661 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1662 bool parseExpr(OperandVector &Operands); 1663 StringRef getTokenStr() const; 1664 AsmToken peekToken(); 1665 AsmToken getToken() const; 1666 SMLoc getLoc() const; 1667 void lex(); 1668 1669 public: 1670 void onBeginOfFile() override; 1671 1672 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1673 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1674 1675 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1676 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1677 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1678 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1679 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1680 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1681 1682 bool parseSwizzleOperand(int64_t &Op, 1683 const unsigned MinVal, 1684 const unsigned MaxVal, 1685 const StringRef ErrMsg, 1686 SMLoc &Loc); 1687 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1688 const unsigned MinVal, 1689 const unsigned MaxVal, 1690 const StringRef ErrMsg); 1691 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1692 bool parseSwizzleOffset(int64_t &Imm); 1693 bool parseSwizzleMacro(int64_t &Imm); 1694 bool parseSwizzleQuadPerm(int64_t &Imm); 1695 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1696 bool parseSwizzleBroadcast(int64_t &Imm); 1697 bool parseSwizzleSwap(int64_t &Imm); 1698 bool parseSwizzleReverse(int64_t &Imm); 1699 1700 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1701 int64_t parseGPRIdxMacro(); 1702 1703 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1704 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1705 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1706 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1707 1708 AMDGPUOperand::Ptr defaultCPol() const; 1709 1710 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1711 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1712 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1713 AMDGPUOperand::Ptr defaultFlatOffset() const; 1714 1715 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1716 1717 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1718 OptionalImmIndexMap &OptionalIdx); 1719 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1720 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1721 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1722 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1723 OptionalImmIndexMap &OptionalIdx); 1724 1725 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1726 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1727 1728 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1729 bool IsAtomic = false); 1730 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1731 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1732 1733 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1734 1735 bool parseDimId(unsigned &Encoding); 1736 OperandMatchResultTy parseDim(OperandVector &Operands); 1737 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1738 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1739 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1740 int64_t parseDPPCtrlSel(StringRef Ctrl); 1741 int64_t parseDPPCtrlPerm(); 1742 AMDGPUOperand::Ptr defaultRowMask() const; 1743 AMDGPUOperand::Ptr defaultBankMask() const; 1744 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1745 AMDGPUOperand::Ptr defaultFI() const; 1746 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1747 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1748 1749 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1750 AMDGPUOperand::ImmTy Type); 1751 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1752 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1753 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1754 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1755 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1756 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1757 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1758 uint64_t BasicInstType, 1759 bool SkipDstVcc = false, 1760 bool SkipSrcVcc = false); 1761 1762 AMDGPUOperand::Ptr defaultBLGP() const; 1763 AMDGPUOperand::Ptr defaultCBSZ() const; 1764 AMDGPUOperand::Ptr defaultABID() const; 1765 1766 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1767 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1768 1769 AMDGPUOperand::Ptr defaultWaitVDST() const; 1770 AMDGPUOperand::Ptr defaultWaitEXP() const; 1771 }; 1772 1773 struct OptionalOperand { 1774 const char *Name; 1775 AMDGPUOperand::ImmTy Type; 1776 bool IsBit; 1777 bool (*ConvertResult)(int64_t&); 1778 }; 1779 1780 } // end anonymous namespace 1781 1782 // May be called with integer type with equivalent bitwidth. 1783 static const fltSemantics *getFltSemantics(unsigned Size) { 1784 switch (Size) { 1785 case 4: 1786 return &APFloat::IEEEsingle(); 1787 case 8: 1788 return &APFloat::IEEEdouble(); 1789 case 2: 1790 return &APFloat::IEEEhalf(); 1791 default: 1792 llvm_unreachable("unsupported fp type"); 1793 } 1794 } 1795 1796 static const fltSemantics *getFltSemantics(MVT VT) { 1797 return getFltSemantics(VT.getSizeInBits() / 8); 1798 } 1799 1800 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1801 switch (OperandType) { 1802 case AMDGPU::OPERAND_REG_IMM_INT32: 1803 case AMDGPU::OPERAND_REG_IMM_FP32: 1804 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1805 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1806 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1807 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1808 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1809 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1810 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1811 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1812 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1813 case AMDGPU::OPERAND_KIMM32: 1814 return &APFloat::IEEEsingle(); 1815 case AMDGPU::OPERAND_REG_IMM_INT64: 1816 case AMDGPU::OPERAND_REG_IMM_FP64: 1817 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1818 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1819 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1820 return &APFloat::IEEEdouble(); 1821 case AMDGPU::OPERAND_REG_IMM_INT16: 1822 case AMDGPU::OPERAND_REG_IMM_FP16: 1823 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1824 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1825 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1826 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1827 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1828 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1829 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1830 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1831 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1832 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1833 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1834 case AMDGPU::OPERAND_KIMM16: 1835 return &APFloat::IEEEhalf(); 1836 default: 1837 llvm_unreachable("unsupported fp type"); 1838 } 1839 } 1840 1841 //===----------------------------------------------------------------------===// 1842 // Operand 1843 //===----------------------------------------------------------------------===// 1844 1845 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1846 bool Lost; 1847 1848 // Convert literal to single precision 1849 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1850 APFloat::rmNearestTiesToEven, 1851 &Lost); 1852 // We allow precision lost but not overflow or underflow 1853 if (Status != APFloat::opOK && 1854 Lost && 1855 ((Status & APFloat::opOverflow) != 0 || 1856 (Status & APFloat::opUnderflow) != 0)) { 1857 return false; 1858 } 1859 1860 return true; 1861 } 1862 1863 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1864 return isUIntN(Size, Val) || isIntN(Size, Val); 1865 } 1866 1867 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1868 if (VT.getScalarType() == MVT::i16) { 1869 // FP immediate values are broken. 1870 return isInlinableIntLiteral(Val); 1871 } 1872 1873 // f16/v2f16 operands work correctly for all values. 1874 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1875 } 1876 1877 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1878 1879 // This is a hack to enable named inline values like 1880 // shared_base with both 32-bit and 64-bit operands. 1881 // Note that these values are defined as 1882 // 32-bit operands only. 1883 if (isInlineValue()) { 1884 return true; 1885 } 1886 1887 if (!isImmTy(ImmTyNone)) { 1888 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1889 return false; 1890 } 1891 // TODO: We should avoid using host float here. It would be better to 1892 // check the float bit values which is what a few other places do. 1893 // We've had bot failures before due to weird NaN support on mips hosts. 1894 1895 APInt Literal(64, Imm.Val); 1896 1897 if (Imm.IsFPImm) { // We got fp literal token 1898 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1899 return AMDGPU::isInlinableLiteral64(Imm.Val, 1900 AsmParser->hasInv2PiInlineImm()); 1901 } 1902 1903 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1904 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1905 return false; 1906 1907 if (type.getScalarSizeInBits() == 16) { 1908 return isInlineableLiteralOp16( 1909 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1910 type, AsmParser->hasInv2PiInlineImm()); 1911 } 1912 1913 // Check if single precision literal is inlinable 1914 return AMDGPU::isInlinableLiteral32( 1915 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1916 AsmParser->hasInv2PiInlineImm()); 1917 } 1918 1919 // We got int literal token. 1920 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1921 return AMDGPU::isInlinableLiteral64(Imm.Val, 1922 AsmParser->hasInv2PiInlineImm()); 1923 } 1924 1925 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1926 return false; 1927 } 1928 1929 if (type.getScalarSizeInBits() == 16) { 1930 return isInlineableLiteralOp16( 1931 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1932 type, AsmParser->hasInv2PiInlineImm()); 1933 } 1934 1935 return AMDGPU::isInlinableLiteral32( 1936 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1937 AsmParser->hasInv2PiInlineImm()); 1938 } 1939 1940 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1941 // Check that this immediate can be added as literal 1942 if (!isImmTy(ImmTyNone)) { 1943 return false; 1944 } 1945 1946 if (!Imm.IsFPImm) { 1947 // We got int literal token. 1948 1949 if (type == MVT::f64 && hasFPModifiers()) { 1950 // Cannot apply fp modifiers to int literals preserving the same semantics 1951 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1952 // disable these cases. 1953 return false; 1954 } 1955 1956 unsigned Size = type.getSizeInBits(); 1957 if (Size == 64) 1958 Size = 32; 1959 1960 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1961 // types. 1962 return isSafeTruncation(Imm.Val, Size); 1963 } 1964 1965 // We got fp literal token 1966 if (type == MVT::f64) { // Expected 64-bit fp operand 1967 // We would set low 64-bits of literal to zeroes but we accept this literals 1968 return true; 1969 } 1970 1971 if (type == MVT::i64) { // Expected 64-bit int operand 1972 // We don't allow fp literals in 64-bit integer instructions. It is 1973 // unclear how we should encode them. 1974 return false; 1975 } 1976 1977 // We allow fp literals with f16x2 operands assuming that the specified 1978 // literal goes into the lower half and the upper half is zero. We also 1979 // require that the literal may be losslessly converted to f16. 1980 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1981 (type == MVT::v2i16)? MVT::i16 : 1982 (type == MVT::v2f32)? MVT::f32 : type; 1983 1984 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1985 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1986 } 1987 1988 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1989 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1990 } 1991 1992 bool AMDGPUOperand::isVRegWithInputMods() const { 1993 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1994 // GFX90A allows DPP on 64-bit operands. 1995 (isRegClass(AMDGPU::VReg_64RegClassID) && 1996 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1997 } 1998 1999 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 2000 if (AsmParser->isVI()) 2001 return isVReg32(); 2002 else if (AsmParser->isGFX9Plus()) 2003 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2004 else 2005 return false; 2006 } 2007 2008 bool AMDGPUOperand::isSDWAFP16Operand() const { 2009 return isSDWAOperand(MVT::f16); 2010 } 2011 2012 bool AMDGPUOperand::isSDWAFP32Operand() const { 2013 return isSDWAOperand(MVT::f32); 2014 } 2015 2016 bool AMDGPUOperand::isSDWAInt16Operand() const { 2017 return isSDWAOperand(MVT::i16); 2018 } 2019 2020 bool AMDGPUOperand::isSDWAInt32Operand() const { 2021 return isSDWAOperand(MVT::i32); 2022 } 2023 2024 bool AMDGPUOperand::isBoolReg() const { 2025 auto FB = AsmParser->getFeatureBits(); 2026 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2027 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2028 } 2029 2030 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2031 { 2032 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2033 assert(Size == 2 || Size == 4 || Size == 8); 2034 2035 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2036 2037 if (Imm.Mods.Abs) { 2038 Val &= ~FpSignMask; 2039 } 2040 if (Imm.Mods.Neg) { 2041 Val ^= FpSignMask; 2042 } 2043 2044 return Val; 2045 } 2046 2047 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2048 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2049 Inst.getNumOperands())) { 2050 addLiteralImmOperand(Inst, Imm.Val, 2051 ApplyModifiers & 2052 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2053 } else { 2054 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2055 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2056 setImmKindNone(); 2057 } 2058 } 2059 2060 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2061 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2062 auto OpNum = Inst.getNumOperands(); 2063 // Check that this operand accepts literals 2064 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2065 2066 if (ApplyModifiers) { 2067 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2068 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2069 Val = applyInputFPModifiers(Val, Size); 2070 } 2071 2072 APInt Literal(64, Val); 2073 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2074 2075 if (Imm.IsFPImm) { // We got fp literal token 2076 switch (OpTy) { 2077 case AMDGPU::OPERAND_REG_IMM_INT64: 2078 case AMDGPU::OPERAND_REG_IMM_FP64: 2079 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2080 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2081 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2082 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2083 AsmParser->hasInv2PiInlineImm())) { 2084 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2085 setImmKindConst(); 2086 return; 2087 } 2088 2089 // Non-inlineable 2090 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2091 // For fp operands we check if low 32 bits are zeros 2092 if (Literal.getLoBits(32) != 0) { 2093 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2094 "Can't encode literal as exact 64-bit floating-point operand. " 2095 "Low 32-bits will be set to zero"); 2096 } 2097 2098 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2099 setImmKindLiteral(); 2100 return; 2101 } 2102 2103 // We don't allow fp literals in 64-bit integer instructions. It is 2104 // unclear how we should encode them. This case should be checked earlier 2105 // in predicate methods (isLiteralImm()) 2106 llvm_unreachable("fp literal in 64-bit integer instruction."); 2107 2108 case AMDGPU::OPERAND_REG_IMM_INT32: 2109 case AMDGPU::OPERAND_REG_IMM_FP32: 2110 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2111 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2112 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2113 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2114 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2115 case AMDGPU::OPERAND_REG_IMM_INT16: 2116 case AMDGPU::OPERAND_REG_IMM_FP16: 2117 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2118 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2119 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2120 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2121 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2122 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2123 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2124 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2125 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2126 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2127 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2128 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2129 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2130 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2131 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2132 case AMDGPU::OPERAND_KIMM32: 2133 case AMDGPU::OPERAND_KIMM16: { 2134 bool lost; 2135 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2136 // Convert literal to single precision 2137 FPLiteral.convert(*getOpFltSemantics(OpTy), 2138 APFloat::rmNearestTiesToEven, &lost); 2139 // We allow precision lost but not overflow or underflow. This should be 2140 // checked earlier in isLiteralImm() 2141 2142 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2143 Inst.addOperand(MCOperand::createImm(ImmVal)); 2144 setImmKindLiteral(); 2145 return; 2146 } 2147 default: 2148 llvm_unreachable("invalid operand size"); 2149 } 2150 2151 return; 2152 } 2153 2154 // We got int literal token. 2155 // Only sign extend inline immediates. 2156 switch (OpTy) { 2157 case AMDGPU::OPERAND_REG_IMM_INT32: 2158 case AMDGPU::OPERAND_REG_IMM_FP32: 2159 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2160 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2161 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2162 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2163 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2164 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2165 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2166 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2167 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2168 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2169 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2170 if (isSafeTruncation(Val, 32) && 2171 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2172 AsmParser->hasInv2PiInlineImm())) { 2173 Inst.addOperand(MCOperand::createImm(Val)); 2174 setImmKindConst(); 2175 return; 2176 } 2177 2178 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2179 setImmKindLiteral(); 2180 return; 2181 2182 case AMDGPU::OPERAND_REG_IMM_INT64: 2183 case AMDGPU::OPERAND_REG_IMM_FP64: 2184 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2185 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2186 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2187 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2188 Inst.addOperand(MCOperand::createImm(Val)); 2189 setImmKindConst(); 2190 return; 2191 } 2192 2193 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2194 setImmKindLiteral(); 2195 return; 2196 2197 case AMDGPU::OPERAND_REG_IMM_INT16: 2198 case AMDGPU::OPERAND_REG_IMM_FP16: 2199 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2200 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2201 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2202 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2203 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2204 if (isSafeTruncation(Val, 16) && 2205 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2206 AsmParser->hasInv2PiInlineImm())) { 2207 Inst.addOperand(MCOperand::createImm(Val)); 2208 setImmKindConst(); 2209 return; 2210 } 2211 2212 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2213 setImmKindLiteral(); 2214 return; 2215 2216 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2217 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2218 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2219 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2220 assert(isSafeTruncation(Val, 16)); 2221 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2222 AsmParser->hasInv2PiInlineImm())); 2223 2224 Inst.addOperand(MCOperand::createImm(Val)); 2225 return; 2226 } 2227 case AMDGPU::OPERAND_KIMM32: 2228 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2229 setImmKindNone(); 2230 return; 2231 case AMDGPU::OPERAND_KIMM16: 2232 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2233 setImmKindNone(); 2234 return; 2235 default: 2236 llvm_unreachable("invalid operand size"); 2237 } 2238 } 2239 2240 template <unsigned Bitwidth> 2241 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2242 APInt Literal(64, Imm.Val); 2243 setImmKindNone(); 2244 2245 if (!Imm.IsFPImm) { 2246 // We got int literal token. 2247 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2248 return; 2249 } 2250 2251 bool Lost; 2252 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2253 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2254 APFloat::rmNearestTiesToEven, &Lost); 2255 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2256 } 2257 2258 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2259 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2260 } 2261 2262 static bool isInlineValue(unsigned Reg) { 2263 switch (Reg) { 2264 case AMDGPU::SRC_SHARED_BASE: 2265 case AMDGPU::SRC_SHARED_LIMIT: 2266 case AMDGPU::SRC_PRIVATE_BASE: 2267 case AMDGPU::SRC_PRIVATE_LIMIT: 2268 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2269 return true; 2270 case AMDGPU::SRC_VCCZ: 2271 case AMDGPU::SRC_EXECZ: 2272 case AMDGPU::SRC_SCC: 2273 return true; 2274 case AMDGPU::SGPR_NULL: 2275 return true; 2276 default: 2277 return false; 2278 } 2279 } 2280 2281 bool AMDGPUOperand::isInlineValue() const { 2282 return isRegKind() && ::isInlineValue(getReg()); 2283 } 2284 2285 //===----------------------------------------------------------------------===// 2286 // AsmParser 2287 //===----------------------------------------------------------------------===// 2288 2289 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2290 if (Is == IS_VGPR) { 2291 switch (RegWidth) { 2292 default: return -1; 2293 case 32: 2294 return AMDGPU::VGPR_32RegClassID; 2295 case 64: 2296 return AMDGPU::VReg_64RegClassID; 2297 case 96: 2298 return AMDGPU::VReg_96RegClassID; 2299 case 128: 2300 return AMDGPU::VReg_128RegClassID; 2301 case 160: 2302 return AMDGPU::VReg_160RegClassID; 2303 case 192: 2304 return AMDGPU::VReg_192RegClassID; 2305 case 224: 2306 return AMDGPU::VReg_224RegClassID; 2307 case 256: 2308 return AMDGPU::VReg_256RegClassID; 2309 case 512: 2310 return AMDGPU::VReg_512RegClassID; 2311 case 1024: 2312 return AMDGPU::VReg_1024RegClassID; 2313 } 2314 } else if (Is == IS_TTMP) { 2315 switch (RegWidth) { 2316 default: return -1; 2317 case 32: 2318 return AMDGPU::TTMP_32RegClassID; 2319 case 64: 2320 return AMDGPU::TTMP_64RegClassID; 2321 case 128: 2322 return AMDGPU::TTMP_128RegClassID; 2323 case 256: 2324 return AMDGPU::TTMP_256RegClassID; 2325 case 512: 2326 return AMDGPU::TTMP_512RegClassID; 2327 } 2328 } else if (Is == IS_SGPR) { 2329 switch (RegWidth) { 2330 default: return -1; 2331 case 32: 2332 return AMDGPU::SGPR_32RegClassID; 2333 case 64: 2334 return AMDGPU::SGPR_64RegClassID; 2335 case 96: 2336 return AMDGPU::SGPR_96RegClassID; 2337 case 128: 2338 return AMDGPU::SGPR_128RegClassID; 2339 case 160: 2340 return AMDGPU::SGPR_160RegClassID; 2341 case 192: 2342 return AMDGPU::SGPR_192RegClassID; 2343 case 224: 2344 return AMDGPU::SGPR_224RegClassID; 2345 case 256: 2346 return AMDGPU::SGPR_256RegClassID; 2347 case 512: 2348 return AMDGPU::SGPR_512RegClassID; 2349 } 2350 } else if (Is == IS_AGPR) { 2351 switch (RegWidth) { 2352 default: return -1; 2353 case 32: 2354 return AMDGPU::AGPR_32RegClassID; 2355 case 64: 2356 return AMDGPU::AReg_64RegClassID; 2357 case 96: 2358 return AMDGPU::AReg_96RegClassID; 2359 case 128: 2360 return AMDGPU::AReg_128RegClassID; 2361 case 160: 2362 return AMDGPU::AReg_160RegClassID; 2363 case 192: 2364 return AMDGPU::AReg_192RegClassID; 2365 case 224: 2366 return AMDGPU::AReg_224RegClassID; 2367 case 256: 2368 return AMDGPU::AReg_256RegClassID; 2369 case 512: 2370 return AMDGPU::AReg_512RegClassID; 2371 case 1024: 2372 return AMDGPU::AReg_1024RegClassID; 2373 } 2374 } 2375 return -1; 2376 } 2377 2378 static unsigned getSpecialRegForName(StringRef RegName) { 2379 return StringSwitch<unsigned>(RegName) 2380 .Case("exec", AMDGPU::EXEC) 2381 .Case("vcc", AMDGPU::VCC) 2382 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2383 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2384 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2385 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2386 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2387 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2388 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2389 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2390 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2391 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2392 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2393 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2394 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2395 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2396 .Case("m0", AMDGPU::M0) 2397 .Case("vccz", AMDGPU::SRC_VCCZ) 2398 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2399 .Case("execz", AMDGPU::SRC_EXECZ) 2400 .Case("src_execz", AMDGPU::SRC_EXECZ) 2401 .Case("scc", AMDGPU::SRC_SCC) 2402 .Case("src_scc", AMDGPU::SRC_SCC) 2403 .Case("tba", AMDGPU::TBA) 2404 .Case("tma", AMDGPU::TMA) 2405 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2406 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2407 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2408 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2409 .Case("vcc_lo", AMDGPU::VCC_LO) 2410 .Case("vcc_hi", AMDGPU::VCC_HI) 2411 .Case("exec_lo", AMDGPU::EXEC_LO) 2412 .Case("exec_hi", AMDGPU::EXEC_HI) 2413 .Case("tma_lo", AMDGPU::TMA_LO) 2414 .Case("tma_hi", AMDGPU::TMA_HI) 2415 .Case("tba_lo", AMDGPU::TBA_LO) 2416 .Case("tba_hi", AMDGPU::TBA_HI) 2417 .Case("pc", AMDGPU::PC_REG) 2418 .Case("null", AMDGPU::SGPR_NULL) 2419 .Default(AMDGPU::NoRegister); 2420 } 2421 2422 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2423 SMLoc &EndLoc, bool RestoreOnFailure) { 2424 auto R = parseRegister(); 2425 if (!R) return true; 2426 assert(R->isReg()); 2427 RegNo = R->getReg(); 2428 StartLoc = R->getStartLoc(); 2429 EndLoc = R->getEndLoc(); 2430 return false; 2431 } 2432 2433 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2434 SMLoc &EndLoc) { 2435 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2436 } 2437 2438 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2439 SMLoc &StartLoc, 2440 SMLoc &EndLoc) { 2441 bool Result = 2442 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2443 bool PendingErrors = getParser().hasPendingError(); 2444 getParser().clearPendingErrors(); 2445 if (PendingErrors) 2446 return MatchOperand_ParseFail; 2447 if (Result) 2448 return MatchOperand_NoMatch; 2449 return MatchOperand_Success; 2450 } 2451 2452 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2453 RegisterKind RegKind, unsigned Reg1, 2454 SMLoc Loc) { 2455 switch (RegKind) { 2456 case IS_SPECIAL: 2457 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2458 Reg = AMDGPU::EXEC; 2459 RegWidth = 64; 2460 return true; 2461 } 2462 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2463 Reg = AMDGPU::FLAT_SCR; 2464 RegWidth = 64; 2465 return true; 2466 } 2467 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2468 Reg = AMDGPU::XNACK_MASK; 2469 RegWidth = 64; 2470 return true; 2471 } 2472 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2473 Reg = AMDGPU::VCC; 2474 RegWidth = 64; 2475 return true; 2476 } 2477 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2478 Reg = AMDGPU::TBA; 2479 RegWidth = 64; 2480 return true; 2481 } 2482 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2483 Reg = AMDGPU::TMA; 2484 RegWidth = 64; 2485 return true; 2486 } 2487 Error(Loc, "register does not fit in the list"); 2488 return false; 2489 case IS_VGPR: 2490 case IS_SGPR: 2491 case IS_AGPR: 2492 case IS_TTMP: 2493 if (Reg1 != Reg + RegWidth / 32) { 2494 Error(Loc, "registers in a list must have consecutive indices"); 2495 return false; 2496 } 2497 RegWidth += 32; 2498 return true; 2499 default: 2500 llvm_unreachable("unexpected register kind"); 2501 } 2502 } 2503 2504 struct RegInfo { 2505 StringLiteral Name; 2506 RegisterKind Kind; 2507 }; 2508 2509 static constexpr RegInfo RegularRegisters[] = { 2510 {{"v"}, IS_VGPR}, 2511 {{"s"}, IS_SGPR}, 2512 {{"ttmp"}, IS_TTMP}, 2513 {{"acc"}, IS_AGPR}, 2514 {{"a"}, IS_AGPR}, 2515 }; 2516 2517 static bool isRegularReg(RegisterKind Kind) { 2518 return Kind == IS_VGPR || 2519 Kind == IS_SGPR || 2520 Kind == IS_TTMP || 2521 Kind == IS_AGPR; 2522 } 2523 2524 static const RegInfo* getRegularRegInfo(StringRef Str) { 2525 for (const RegInfo &Reg : RegularRegisters) 2526 if (Str.startswith(Reg.Name)) 2527 return &Reg; 2528 return nullptr; 2529 } 2530 2531 static bool getRegNum(StringRef Str, unsigned& Num) { 2532 return !Str.getAsInteger(10, Num); 2533 } 2534 2535 bool 2536 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2537 const AsmToken &NextToken) const { 2538 2539 // A list of consecutive registers: [s0,s1,s2,s3] 2540 if (Token.is(AsmToken::LBrac)) 2541 return true; 2542 2543 if (!Token.is(AsmToken::Identifier)) 2544 return false; 2545 2546 // A single register like s0 or a range of registers like s[0:1] 2547 2548 StringRef Str = Token.getString(); 2549 const RegInfo *Reg = getRegularRegInfo(Str); 2550 if (Reg) { 2551 StringRef RegName = Reg->Name; 2552 StringRef RegSuffix = Str.substr(RegName.size()); 2553 if (!RegSuffix.empty()) { 2554 unsigned Num; 2555 // A single register with an index: rXX 2556 if (getRegNum(RegSuffix, Num)) 2557 return true; 2558 } else { 2559 // A range of registers: r[XX:YY]. 2560 if (NextToken.is(AsmToken::LBrac)) 2561 return true; 2562 } 2563 } 2564 2565 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2566 } 2567 2568 bool 2569 AMDGPUAsmParser::isRegister() 2570 { 2571 return isRegister(getToken(), peekToken()); 2572 } 2573 2574 unsigned 2575 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2576 unsigned RegNum, 2577 unsigned RegWidth, 2578 SMLoc Loc) { 2579 2580 assert(isRegularReg(RegKind)); 2581 2582 unsigned AlignSize = 1; 2583 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2584 // SGPR and TTMP registers must be aligned. 2585 // Max required alignment is 4 dwords. 2586 AlignSize = std::min(RegWidth / 32, 4u); 2587 } 2588 2589 if (RegNum % AlignSize != 0) { 2590 Error(Loc, "invalid register alignment"); 2591 return AMDGPU::NoRegister; 2592 } 2593 2594 unsigned RegIdx = RegNum / AlignSize; 2595 int RCID = getRegClass(RegKind, RegWidth); 2596 if (RCID == -1) { 2597 Error(Loc, "invalid or unsupported register size"); 2598 return AMDGPU::NoRegister; 2599 } 2600 2601 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2602 const MCRegisterClass RC = TRI->getRegClass(RCID); 2603 if (RegIdx >= RC.getNumRegs()) { 2604 Error(Loc, "register index is out of range"); 2605 return AMDGPU::NoRegister; 2606 } 2607 2608 return RC.getRegister(RegIdx); 2609 } 2610 2611 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2612 int64_t RegLo, RegHi; 2613 if (!skipToken(AsmToken::LBrac, "missing register index")) 2614 return false; 2615 2616 SMLoc FirstIdxLoc = getLoc(); 2617 SMLoc SecondIdxLoc; 2618 2619 if (!parseExpr(RegLo)) 2620 return false; 2621 2622 if (trySkipToken(AsmToken::Colon)) { 2623 SecondIdxLoc = getLoc(); 2624 if (!parseExpr(RegHi)) 2625 return false; 2626 } else { 2627 RegHi = RegLo; 2628 } 2629 2630 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2631 return false; 2632 2633 if (!isUInt<32>(RegLo)) { 2634 Error(FirstIdxLoc, "invalid register index"); 2635 return false; 2636 } 2637 2638 if (!isUInt<32>(RegHi)) { 2639 Error(SecondIdxLoc, "invalid register index"); 2640 return false; 2641 } 2642 2643 if (RegLo > RegHi) { 2644 Error(FirstIdxLoc, "first register index should not exceed second index"); 2645 return false; 2646 } 2647 2648 Num = static_cast<unsigned>(RegLo); 2649 RegWidth = 32 * ((RegHi - RegLo) + 1); 2650 return true; 2651 } 2652 2653 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2654 unsigned &RegNum, unsigned &RegWidth, 2655 SmallVectorImpl<AsmToken> &Tokens) { 2656 assert(isToken(AsmToken::Identifier)); 2657 unsigned Reg = getSpecialRegForName(getTokenStr()); 2658 if (Reg) { 2659 RegNum = 0; 2660 RegWidth = 32; 2661 RegKind = IS_SPECIAL; 2662 Tokens.push_back(getToken()); 2663 lex(); // skip register name 2664 } 2665 return Reg; 2666 } 2667 2668 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2669 unsigned &RegNum, unsigned &RegWidth, 2670 SmallVectorImpl<AsmToken> &Tokens) { 2671 assert(isToken(AsmToken::Identifier)); 2672 StringRef RegName = getTokenStr(); 2673 auto Loc = getLoc(); 2674 2675 const RegInfo *RI = getRegularRegInfo(RegName); 2676 if (!RI) { 2677 Error(Loc, "invalid register name"); 2678 return AMDGPU::NoRegister; 2679 } 2680 2681 Tokens.push_back(getToken()); 2682 lex(); // skip register name 2683 2684 RegKind = RI->Kind; 2685 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2686 if (!RegSuffix.empty()) { 2687 // Single 32-bit register: vXX. 2688 if (!getRegNum(RegSuffix, RegNum)) { 2689 Error(Loc, "invalid register index"); 2690 return AMDGPU::NoRegister; 2691 } 2692 RegWidth = 32; 2693 } else { 2694 // Range of registers: v[XX:YY]. ":YY" is optional. 2695 if (!ParseRegRange(RegNum, RegWidth)) 2696 return AMDGPU::NoRegister; 2697 } 2698 2699 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2700 } 2701 2702 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2703 unsigned &RegWidth, 2704 SmallVectorImpl<AsmToken> &Tokens) { 2705 unsigned Reg = AMDGPU::NoRegister; 2706 auto ListLoc = getLoc(); 2707 2708 if (!skipToken(AsmToken::LBrac, 2709 "expected a register or a list of registers")) { 2710 return AMDGPU::NoRegister; 2711 } 2712 2713 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2714 2715 auto Loc = getLoc(); 2716 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2717 return AMDGPU::NoRegister; 2718 if (RegWidth != 32) { 2719 Error(Loc, "expected a single 32-bit register"); 2720 return AMDGPU::NoRegister; 2721 } 2722 2723 for (; trySkipToken(AsmToken::Comma); ) { 2724 RegisterKind NextRegKind; 2725 unsigned NextReg, NextRegNum, NextRegWidth; 2726 Loc = getLoc(); 2727 2728 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2729 NextRegNum, NextRegWidth, 2730 Tokens)) { 2731 return AMDGPU::NoRegister; 2732 } 2733 if (NextRegWidth != 32) { 2734 Error(Loc, "expected a single 32-bit register"); 2735 return AMDGPU::NoRegister; 2736 } 2737 if (NextRegKind != RegKind) { 2738 Error(Loc, "registers in a list must be of the same kind"); 2739 return AMDGPU::NoRegister; 2740 } 2741 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2742 return AMDGPU::NoRegister; 2743 } 2744 2745 if (!skipToken(AsmToken::RBrac, 2746 "expected a comma or a closing square bracket")) { 2747 return AMDGPU::NoRegister; 2748 } 2749 2750 if (isRegularReg(RegKind)) 2751 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2752 2753 return Reg; 2754 } 2755 2756 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2757 unsigned &RegNum, unsigned &RegWidth, 2758 SmallVectorImpl<AsmToken> &Tokens) { 2759 auto Loc = getLoc(); 2760 Reg = AMDGPU::NoRegister; 2761 2762 if (isToken(AsmToken::Identifier)) { 2763 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2764 if (Reg == AMDGPU::NoRegister) 2765 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2766 } else { 2767 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2768 } 2769 2770 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2771 if (Reg == AMDGPU::NoRegister) { 2772 assert(Parser.hasPendingError()); 2773 return false; 2774 } 2775 2776 if (!subtargetHasRegister(*TRI, Reg)) { 2777 if (Reg == AMDGPU::SGPR_NULL) { 2778 Error(Loc, "'null' operand is not supported on this GPU"); 2779 } else { 2780 Error(Loc, "register not available on this GPU"); 2781 } 2782 return false; 2783 } 2784 2785 return true; 2786 } 2787 2788 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2789 unsigned &RegNum, unsigned &RegWidth, 2790 bool RestoreOnFailure /*=false*/) { 2791 Reg = AMDGPU::NoRegister; 2792 2793 SmallVector<AsmToken, 1> Tokens; 2794 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2795 if (RestoreOnFailure) { 2796 while (!Tokens.empty()) { 2797 getLexer().UnLex(Tokens.pop_back_val()); 2798 } 2799 } 2800 return true; 2801 } 2802 return false; 2803 } 2804 2805 Optional<StringRef> 2806 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2807 switch (RegKind) { 2808 case IS_VGPR: 2809 return StringRef(".amdgcn.next_free_vgpr"); 2810 case IS_SGPR: 2811 return StringRef(".amdgcn.next_free_sgpr"); 2812 default: 2813 return None; 2814 } 2815 } 2816 2817 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2818 auto SymbolName = getGprCountSymbolName(RegKind); 2819 assert(SymbolName && "initializing invalid register kind"); 2820 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2821 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2822 } 2823 2824 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2825 unsigned DwordRegIndex, 2826 unsigned RegWidth) { 2827 // Symbols are only defined for GCN targets 2828 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2829 return true; 2830 2831 auto SymbolName = getGprCountSymbolName(RegKind); 2832 if (!SymbolName) 2833 return true; 2834 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2835 2836 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2837 int64_t OldCount; 2838 2839 if (!Sym->isVariable()) 2840 return !Error(getLoc(), 2841 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2842 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2843 return !Error( 2844 getLoc(), 2845 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2846 2847 if (OldCount <= NewMax) 2848 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2849 2850 return true; 2851 } 2852 2853 std::unique_ptr<AMDGPUOperand> 2854 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2855 const auto &Tok = getToken(); 2856 SMLoc StartLoc = Tok.getLoc(); 2857 SMLoc EndLoc = Tok.getEndLoc(); 2858 RegisterKind RegKind; 2859 unsigned Reg, RegNum, RegWidth; 2860 2861 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2862 return nullptr; 2863 } 2864 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2865 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2866 return nullptr; 2867 } else 2868 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2869 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2870 } 2871 2872 OperandMatchResultTy 2873 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2874 // TODO: add syntactic sugar for 1/(2*PI) 2875 2876 assert(!isRegister()); 2877 assert(!isModifier()); 2878 2879 const auto& Tok = getToken(); 2880 const auto& NextTok = peekToken(); 2881 bool IsReal = Tok.is(AsmToken::Real); 2882 SMLoc S = getLoc(); 2883 bool Negate = false; 2884 2885 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2886 lex(); 2887 IsReal = true; 2888 Negate = true; 2889 } 2890 2891 if (IsReal) { 2892 // Floating-point expressions are not supported. 2893 // Can only allow floating-point literals with an 2894 // optional sign. 2895 2896 StringRef Num = getTokenStr(); 2897 lex(); 2898 2899 APFloat RealVal(APFloat::IEEEdouble()); 2900 auto roundMode = APFloat::rmNearestTiesToEven; 2901 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2902 return MatchOperand_ParseFail; 2903 } 2904 if (Negate) 2905 RealVal.changeSign(); 2906 2907 Operands.push_back( 2908 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2909 AMDGPUOperand::ImmTyNone, true)); 2910 2911 return MatchOperand_Success; 2912 2913 } else { 2914 int64_t IntVal; 2915 const MCExpr *Expr; 2916 SMLoc S = getLoc(); 2917 2918 if (HasSP3AbsModifier) { 2919 // This is a workaround for handling expressions 2920 // as arguments of SP3 'abs' modifier, for example: 2921 // |1.0| 2922 // |-1| 2923 // |1+x| 2924 // This syntax is not compatible with syntax of standard 2925 // MC expressions (due to the trailing '|'). 2926 SMLoc EndLoc; 2927 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2928 return MatchOperand_ParseFail; 2929 } else { 2930 if (Parser.parseExpression(Expr)) 2931 return MatchOperand_ParseFail; 2932 } 2933 2934 if (Expr->evaluateAsAbsolute(IntVal)) { 2935 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2936 } else { 2937 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2938 } 2939 2940 return MatchOperand_Success; 2941 } 2942 2943 return MatchOperand_NoMatch; 2944 } 2945 2946 OperandMatchResultTy 2947 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2948 if (!isRegister()) 2949 return MatchOperand_NoMatch; 2950 2951 if (auto R = parseRegister()) { 2952 assert(R->isReg()); 2953 Operands.push_back(std::move(R)); 2954 return MatchOperand_Success; 2955 } 2956 return MatchOperand_ParseFail; 2957 } 2958 2959 OperandMatchResultTy 2960 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2961 auto res = parseReg(Operands); 2962 if (res != MatchOperand_NoMatch) { 2963 return res; 2964 } else if (isModifier()) { 2965 return MatchOperand_NoMatch; 2966 } else { 2967 return parseImm(Operands, HasSP3AbsMod); 2968 } 2969 } 2970 2971 bool 2972 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2973 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2974 const auto &str = Token.getString(); 2975 return str == "abs" || str == "neg" || str == "sext"; 2976 } 2977 return false; 2978 } 2979 2980 bool 2981 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2982 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2983 } 2984 2985 bool 2986 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2987 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2988 } 2989 2990 bool 2991 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2992 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2993 } 2994 2995 // Check if this is an operand modifier or an opcode modifier 2996 // which may look like an expression but it is not. We should 2997 // avoid parsing these modifiers as expressions. Currently 2998 // recognized sequences are: 2999 // |...| 3000 // abs(...) 3001 // neg(...) 3002 // sext(...) 3003 // -reg 3004 // -|...| 3005 // -abs(...) 3006 // name:... 3007 // Note that simple opcode modifiers like 'gds' may be parsed as 3008 // expressions; this is a special case. See getExpressionAsToken. 3009 // 3010 bool 3011 AMDGPUAsmParser::isModifier() { 3012 3013 AsmToken Tok = getToken(); 3014 AsmToken NextToken[2]; 3015 peekTokens(NextToken); 3016 3017 return isOperandModifier(Tok, NextToken[0]) || 3018 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3019 isOpcodeModifierWithVal(Tok, NextToken[0]); 3020 } 3021 3022 // Check if the current token is an SP3 'neg' modifier. 3023 // Currently this modifier is allowed in the following context: 3024 // 3025 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3026 // 2. Before an 'abs' modifier: -abs(...) 3027 // 3. Before an SP3 'abs' modifier: -|...| 3028 // 3029 // In all other cases "-" is handled as a part 3030 // of an expression that follows the sign. 3031 // 3032 // Note: When "-" is followed by an integer literal, 3033 // this is interpreted as integer negation rather 3034 // than a floating-point NEG modifier applied to N. 3035 // Beside being contr-intuitive, such use of floating-point 3036 // NEG modifier would have resulted in different meaning 3037 // of integer literals used with VOP1/2/C and VOP3, 3038 // for example: 3039 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3040 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3041 // Negative fp literals with preceding "-" are 3042 // handled likewise for uniformity 3043 // 3044 bool 3045 AMDGPUAsmParser::parseSP3NegModifier() { 3046 3047 AsmToken NextToken[2]; 3048 peekTokens(NextToken); 3049 3050 if (isToken(AsmToken::Minus) && 3051 (isRegister(NextToken[0], NextToken[1]) || 3052 NextToken[0].is(AsmToken::Pipe) || 3053 isId(NextToken[0], "abs"))) { 3054 lex(); 3055 return true; 3056 } 3057 3058 return false; 3059 } 3060 3061 OperandMatchResultTy 3062 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3063 bool AllowImm) { 3064 bool Neg, SP3Neg; 3065 bool Abs, SP3Abs; 3066 SMLoc Loc; 3067 3068 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3069 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3070 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3071 return MatchOperand_ParseFail; 3072 } 3073 3074 SP3Neg = parseSP3NegModifier(); 3075 3076 Loc = getLoc(); 3077 Neg = trySkipId("neg"); 3078 if (Neg && SP3Neg) { 3079 Error(Loc, "expected register or immediate"); 3080 return MatchOperand_ParseFail; 3081 } 3082 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3083 return MatchOperand_ParseFail; 3084 3085 Abs = trySkipId("abs"); 3086 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3087 return MatchOperand_ParseFail; 3088 3089 Loc = getLoc(); 3090 SP3Abs = trySkipToken(AsmToken::Pipe); 3091 if (Abs && SP3Abs) { 3092 Error(Loc, "expected register or immediate"); 3093 return MatchOperand_ParseFail; 3094 } 3095 3096 OperandMatchResultTy Res; 3097 if (AllowImm) { 3098 Res = parseRegOrImm(Operands, SP3Abs); 3099 } else { 3100 Res = parseReg(Operands); 3101 } 3102 if (Res != MatchOperand_Success) { 3103 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3104 } 3105 3106 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3107 return MatchOperand_ParseFail; 3108 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3109 return MatchOperand_ParseFail; 3110 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3111 return MatchOperand_ParseFail; 3112 3113 AMDGPUOperand::Modifiers Mods; 3114 Mods.Abs = Abs || SP3Abs; 3115 Mods.Neg = Neg || SP3Neg; 3116 3117 if (Mods.hasFPModifiers()) { 3118 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3119 if (Op.isExpr()) { 3120 Error(Op.getStartLoc(), "expected an absolute expression"); 3121 return MatchOperand_ParseFail; 3122 } 3123 Op.setModifiers(Mods); 3124 } 3125 return MatchOperand_Success; 3126 } 3127 3128 OperandMatchResultTy 3129 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3130 bool AllowImm) { 3131 bool Sext = trySkipId("sext"); 3132 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3133 return MatchOperand_ParseFail; 3134 3135 OperandMatchResultTy Res; 3136 if (AllowImm) { 3137 Res = parseRegOrImm(Operands); 3138 } else { 3139 Res = parseReg(Operands); 3140 } 3141 if (Res != MatchOperand_Success) { 3142 return Sext? MatchOperand_ParseFail : Res; 3143 } 3144 3145 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3146 return MatchOperand_ParseFail; 3147 3148 AMDGPUOperand::Modifiers Mods; 3149 Mods.Sext = Sext; 3150 3151 if (Mods.hasIntModifiers()) { 3152 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3153 if (Op.isExpr()) { 3154 Error(Op.getStartLoc(), "expected an absolute expression"); 3155 return MatchOperand_ParseFail; 3156 } 3157 Op.setModifiers(Mods); 3158 } 3159 3160 return MatchOperand_Success; 3161 } 3162 3163 OperandMatchResultTy 3164 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3165 return parseRegOrImmWithFPInputMods(Operands, false); 3166 } 3167 3168 OperandMatchResultTy 3169 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3170 return parseRegOrImmWithIntInputMods(Operands, false); 3171 } 3172 3173 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3174 auto Loc = getLoc(); 3175 if (trySkipId("off")) { 3176 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3177 AMDGPUOperand::ImmTyOff, false)); 3178 return MatchOperand_Success; 3179 } 3180 3181 if (!isRegister()) 3182 return MatchOperand_NoMatch; 3183 3184 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3185 if (Reg) { 3186 Operands.push_back(std::move(Reg)); 3187 return MatchOperand_Success; 3188 } 3189 3190 return MatchOperand_ParseFail; 3191 3192 } 3193 3194 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3195 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3196 3197 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3198 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3199 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3200 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3201 return Match_InvalidOperand; 3202 3203 if ((TSFlags & SIInstrFlags::VOP3) && 3204 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3205 getForcedEncodingSize() != 64) 3206 return Match_PreferE32; 3207 3208 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3209 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3210 // v_mac_f32/16 allow only dst_sel == DWORD; 3211 auto OpNum = 3212 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3213 const auto &Op = Inst.getOperand(OpNum); 3214 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3215 return Match_InvalidOperand; 3216 } 3217 } 3218 3219 return Match_Success; 3220 } 3221 3222 static ArrayRef<unsigned> getAllVariants() { 3223 static const unsigned Variants[] = { 3224 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3225 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3226 }; 3227 3228 return makeArrayRef(Variants); 3229 } 3230 3231 // What asm variants we should check 3232 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3233 if (getForcedEncodingSize() == 32) { 3234 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3235 return makeArrayRef(Variants); 3236 } 3237 3238 if (isForcedVOP3()) { 3239 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3240 return makeArrayRef(Variants); 3241 } 3242 3243 if (isForcedSDWA()) { 3244 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3245 AMDGPUAsmVariants::SDWA9}; 3246 return makeArrayRef(Variants); 3247 } 3248 3249 if (isForcedDPP()) { 3250 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3251 return makeArrayRef(Variants); 3252 } 3253 3254 return getAllVariants(); 3255 } 3256 3257 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3258 if (getForcedEncodingSize() == 32) 3259 return "e32"; 3260 3261 if (isForcedVOP3()) 3262 return "e64"; 3263 3264 if (isForcedSDWA()) 3265 return "sdwa"; 3266 3267 if (isForcedDPP()) 3268 return "dpp"; 3269 3270 return ""; 3271 } 3272 3273 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3274 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3275 const unsigned Num = Desc.getNumImplicitUses(); 3276 for (unsigned i = 0; i < Num; ++i) { 3277 unsigned Reg = Desc.ImplicitUses[i]; 3278 switch (Reg) { 3279 case AMDGPU::FLAT_SCR: 3280 case AMDGPU::VCC: 3281 case AMDGPU::VCC_LO: 3282 case AMDGPU::VCC_HI: 3283 case AMDGPU::M0: 3284 return Reg; 3285 default: 3286 break; 3287 } 3288 } 3289 return AMDGPU::NoRegister; 3290 } 3291 3292 // NB: This code is correct only when used to check constant 3293 // bus limitations because GFX7 support no f16 inline constants. 3294 // Note that there are no cases when a GFX7 opcode violates 3295 // constant bus limitations due to the use of an f16 constant. 3296 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3297 unsigned OpIdx) const { 3298 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3299 3300 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3301 return false; 3302 } 3303 3304 const MCOperand &MO = Inst.getOperand(OpIdx); 3305 3306 int64_t Val = MO.getImm(); 3307 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3308 3309 switch (OpSize) { // expected operand size 3310 case 8: 3311 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3312 case 4: 3313 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3314 case 2: { 3315 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3316 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3317 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3318 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3319 return AMDGPU::isInlinableIntLiteral(Val); 3320 3321 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3322 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3323 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3324 return AMDGPU::isInlinableIntLiteralV216(Val); 3325 3326 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3327 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3328 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3329 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3330 3331 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3332 } 3333 default: 3334 llvm_unreachable("invalid operand size"); 3335 } 3336 } 3337 3338 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3339 if (!isGFX10Plus()) 3340 return 1; 3341 3342 switch (Opcode) { 3343 // 64-bit shift instructions can use only one scalar value input 3344 case AMDGPU::V_LSHLREV_B64_e64: 3345 case AMDGPU::V_LSHLREV_B64_gfx10: 3346 case AMDGPU::V_LSHLREV_B64_e64_gfx11: 3347 case AMDGPU::V_LSHRREV_B64_e64: 3348 case AMDGPU::V_LSHRREV_B64_gfx10: 3349 case AMDGPU::V_LSHRREV_B64_e64_gfx11: 3350 case AMDGPU::V_ASHRREV_I64_e64: 3351 case AMDGPU::V_ASHRREV_I64_gfx10: 3352 case AMDGPU::V_ASHRREV_I64_e64_gfx11: 3353 case AMDGPU::V_LSHL_B64_e64: 3354 case AMDGPU::V_LSHR_B64_e64: 3355 case AMDGPU::V_ASHR_I64_e64: 3356 return 1; 3357 default: 3358 return 2; 3359 } 3360 } 3361 3362 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3363 const MCOperand &MO = Inst.getOperand(OpIdx); 3364 if (MO.isImm()) { 3365 return !isInlineConstant(Inst, OpIdx); 3366 } else if (MO.isReg()) { 3367 auto Reg = MO.getReg(); 3368 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3369 auto PReg = mc2PseudoReg(Reg); 3370 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3371 } else { 3372 return true; 3373 } 3374 } 3375 3376 bool 3377 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3378 const OperandVector &Operands) { 3379 const unsigned Opcode = Inst.getOpcode(); 3380 const MCInstrDesc &Desc = MII.get(Opcode); 3381 unsigned LastSGPR = AMDGPU::NoRegister; 3382 unsigned ConstantBusUseCount = 0; 3383 unsigned NumLiterals = 0; 3384 unsigned LiteralSize; 3385 3386 if (Desc.TSFlags & 3387 (SIInstrFlags::VOPC | 3388 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3389 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3390 SIInstrFlags::SDWA)) { 3391 // Check special imm operands (used by madmk, etc) 3392 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3393 ++NumLiterals; 3394 LiteralSize = 4; 3395 } 3396 3397 SmallDenseSet<unsigned> SGPRsUsed; 3398 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3399 if (SGPRUsed != AMDGPU::NoRegister) { 3400 SGPRsUsed.insert(SGPRUsed); 3401 ++ConstantBusUseCount; 3402 } 3403 3404 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3405 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3406 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3407 3408 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3409 3410 for (int OpIdx : OpIndices) { 3411 if (OpIdx == -1) break; 3412 3413 const MCOperand &MO = Inst.getOperand(OpIdx); 3414 if (usesConstantBus(Inst, OpIdx)) { 3415 if (MO.isReg()) { 3416 LastSGPR = mc2PseudoReg(MO.getReg()); 3417 // Pairs of registers with a partial intersections like these 3418 // s0, s[0:1] 3419 // flat_scratch_lo, flat_scratch 3420 // flat_scratch_lo, flat_scratch_hi 3421 // are theoretically valid but they are disabled anyway. 3422 // Note that this code mimics SIInstrInfo::verifyInstruction 3423 if (!SGPRsUsed.count(LastSGPR)) { 3424 SGPRsUsed.insert(LastSGPR); 3425 ++ConstantBusUseCount; 3426 } 3427 } else { // Expression or a literal 3428 3429 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3430 continue; // special operand like VINTERP attr_chan 3431 3432 // An instruction may use only one literal. 3433 // This has been validated on the previous step. 3434 // See validateVOPLiteral. 3435 // This literal may be used as more than one operand. 3436 // If all these operands are of the same size, 3437 // this literal counts as one scalar value. 3438 // Otherwise it counts as 2 scalar values. 3439 // See "GFX10 Shader Programming", section 3.6.2.3. 3440 3441 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3442 if (Size < 4) Size = 4; 3443 3444 if (NumLiterals == 0) { 3445 NumLiterals = 1; 3446 LiteralSize = Size; 3447 } else if (LiteralSize != Size) { 3448 NumLiterals = 2; 3449 } 3450 } 3451 } 3452 } 3453 } 3454 ConstantBusUseCount += NumLiterals; 3455 3456 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3457 return true; 3458 3459 SMLoc LitLoc = getLitLoc(Operands); 3460 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3461 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3462 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3463 return false; 3464 } 3465 3466 bool 3467 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3468 const OperandVector &Operands) { 3469 const unsigned Opcode = Inst.getOpcode(); 3470 const MCInstrDesc &Desc = MII.get(Opcode); 3471 3472 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3473 if (DstIdx == -1 || 3474 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3475 return true; 3476 } 3477 3478 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3479 3480 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3481 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3482 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3483 3484 assert(DstIdx != -1); 3485 const MCOperand &Dst = Inst.getOperand(DstIdx); 3486 assert(Dst.isReg()); 3487 3488 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3489 3490 for (int SrcIdx : SrcIndices) { 3491 if (SrcIdx == -1) break; 3492 const MCOperand &Src = Inst.getOperand(SrcIdx); 3493 if (Src.isReg()) { 3494 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3495 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3496 Error(getRegLoc(SrcReg, Operands), 3497 "destination must be different than all sources"); 3498 return false; 3499 } 3500 } 3501 } 3502 3503 return true; 3504 } 3505 3506 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3507 3508 const unsigned Opc = Inst.getOpcode(); 3509 const MCInstrDesc &Desc = MII.get(Opc); 3510 3511 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3512 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3513 assert(ClampIdx != -1); 3514 return Inst.getOperand(ClampIdx).getImm() == 0; 3515 } 3516 3517 return true; 3518 } 3519 3520 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3521 3522 const unsigned Opc = Inst.getOpcode(); 3523 const MCInstrDesc &Desc = MII.get(Opc); 3524 3525 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3526 return None; 3527 3528 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3529 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3530 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3531 3532 assert(VDataIdx != -1); 3533 3534 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3535 return None; 3536 3537 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3538 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3539 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3540 if (DMask == 0) 3541 DMask = 1; 3542 3543 bool isPackedD16 = false; 3544 unsigned DataSize = 3545 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3546 if (hasPackedD16()) { 3547 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3548 isPackedD16 = D16Idx >= 0; 3549 if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) 3550 DataSize = (DataSize + 1) / 2; 3551 } 3552 3553 if ((VDataSize / 4) == DataSize + TFESize) 3554 return None; 3555 3556 return StringRef(isPackedD16 3557 ? "image data size does not match dmask, d16 and tfe" 3558 : "image data size does not match dmask and tfe"); 3559 } 3560 3561 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3562 const unsigned Opc = Inst.getOpcode(); 3563 const MCInstrDesc &Desc = MII.get(Opc); 3564 3565 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3566 return true; 3567 3568 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3569 3570 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3571 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3572 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3573 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3574 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3575 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3576 3577 assert(VAddr0Idx != -1); 3578 assert(SrsrcIdx != -1); 3579 assert(SrsrcIdx > VAddr0Idx); 3580 3581 if (DimIdx == -1) 3582 return true; // intersect_ray 3583 3584 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3585 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3586 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3587 unsigned ActualAddrSize = 3588 IsNSA ? SrsrcIdx - VAddr0Idx 3589 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3590 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3591 3592 unsigned ExpectedAddrSize = 3593 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3594 3595 if (!IsNSA) { 3596 if (ExpectedAddrSize > 8) 3597 ExpectedAddrSize = 16; 3598 3599 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3600 // This provides backward compatibility for assembly created 3601 // before 160b/192b/224b types were directly supported. 3602 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3603 return true; 3604 } 3605 3606 return ActualAddrSize == ExpectedAddrSize; 3607 } 3608 3609 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3610 3611 const unsigned Opc = Inst.getOpcode(); 3612 const MCInstrDesc &Desc = MII.get(Opc); 3613 3614 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3615 return true; 3616 if (!Desc.mayLoad() || !Desc.mayStore()) 3617 return true; // Not atomic 3618 3619 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3620 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3621 3622 // This is an incomplete check because image_atomic_cmpswap 3623 // may only use 0x3 and 0xf while other atomic operations 3624 // may use 0x1 and 0x3. However these limitations are 3625 // verified when we check that dmask matches dst size. 3626 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3627 } 3628 3629 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3630 3631 const unsigned Opc = Inst.getOpcode(); 3632 const MCInstrDesc &Desc = MII.get(Opc); 3633 3634 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3635 return true; 3636 3637 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3638 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3639 3640 // GATHER4 instructions use dmask in a different fashion compared to 3641 // other MIMG instructions. The only useful DMASK values are 3642 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3643 // (red,red,red,red) etc.) The ISA document doesn't mention 3644 // this. 3645 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3646 } 3647 3648 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3649 const unsigned Opc = Inst.getOpcode(); 3650 const MCInstrDesc &Desc = MII.get(Opc); 3651 3652 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3653 return true; 3654 3655 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3656 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3657 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3658 3659 if (!BaseOpcode->MSAA) 3660 return true; 3661 3662 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3663 assert(DimIdx != -1); 3664 3665 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3666 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3667 3668 return DimInfo->MSAA; 3669 } 3670 3671 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3672 { 3673 switch (Opcode) { 3674 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3675 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3676 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3677 return true; 3678 default: 3679 return false; 3680 } 3681 } 3682 3683 // movrels* opcodes should only allow VGPRS as src0. 3684 // This is specified in .td description for vop1/vop3, 3685 // but sdwa is handled differently. See isSDWAOperand. 3686 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3687 const OperandVector &Operands) { 3688 3689 const unsigned Opc = Inst.getOpcode(); 3690 const MCInstrDesc &Desc = MII.get(Opc); 3691 3692 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3693 return true; 3694 3695 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3696 assert(Src0Idx != -1); 3697 3698 SMLoc ErrLoc; 3699 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3700 if (Src0.isReg()) { 3701 auto Reg = mc2PseudoReg(Src0.getReg()); 3702 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3703 if (!isSGPR(Reg, TRI)) 3704 return true; 3705 ErrLoc = getRegLoc(Reg, Operands); 3706 } else { 3707 ErrLoc = getConstLoc(Operands); 3708 } 3709 3710 Error(ErrLoc, "source operand must be a VGPR"); 3711 return false; 3712 } 3713 3714 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3715 const OperandVector &Operands) { 3716 3717 const unsigned Opc = Inst.getOpcode(); 3718 3719 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3720 return true; 3721 3722 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3723 assert(Src0Idx != -1); 3724 3725 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3726 if (!Src0.isReg()) 3727 return true; 3728 3729 auto Reg = mc2PseudoReg(Src0.getReg()); 3730 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3731 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3732 Error(getRegLoc(Reg, Operands), 3733 "source operand must be either a VGPR or an inline constant"); 3734 return false; 3735 } 3736 3737 return true; 3738 } 3739 3740 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3741 const OperandVector &Operands) { 3742 const unsigned Opc = Inst.getOpcode(); 3743 const MCInstrDesc &Desc = MII.get(Opc); 3744 3745 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3746 return true; 3747 3748 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3749 if (Src2Idx == -1) 3750 return true; 3751 3752 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3753 if (!Src2.isReg()) 3754 return true; 3755 3756 MCRegister Src2Reg = Src2.getReg(); 3757 MCRegister DstReg = Inst.getOperand(0).getReg(); 3758 if (Src2Reg == DstReg) 3759 return true; 3760 3761 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3762 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3763 return true; 3764 3765 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3766 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3767 "source 2 operand must not partially overlap with dst"); 3768 return false; 3769 } 3770 3771 return true; 3772 } 3773 3774 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3775 switch (Inst.getOpcode()) { 3776 default: 3777 return true; 3778 case V_DIV_SCALE_F32_gfx6_gfx7: 3779 case V_DIV_SCALE_F32_vi: 3780 case V_DIV_SCALE_F32_gfx10: 3781 case V_DIV_SCALE_F64_gfx6_gfx7: 3782 case V_DIV_SCALE_F64_vi: 3783 case V_DIV_SCALE_F64_gfx10: 3784 break; 3785 } 3786 3787 // TODO: Check that src0 = src1 or src2. 3788 3789 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3790 AMDGPU::OpName::src2_modifiers, 3791 AMDGPU::OpName::src2_modifiers}) { 3792 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3793 .getImm() & 3794 SISrcMods::ABS) { 3795 return false; 3796 } 3797 } 3798 3799 return true; 3800 } 3801 3802 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3803 3804 const unsigned Opc = Inst.getOpcode(); 3805 const MCInstrDesc &Desc = MII.get(Opc); 3806 3807 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3808 return true; 3809 3810 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3811 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3812 if (isCI() || isSI()) 3813 return false; 3814 } 3815 3816 return true; 3817 } 3818 3819 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3820 const unsigned Opc = Inst.getOpcode(); 3821 const MCInstrDesc &Desc = MII.get(Opc); 3822 3823 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3824 return true; 3825 3826 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3827 if (DimIdx < 0) 3828 return true; 3829 3830 long Imm = Inst.getOperand(DimIdx).getImm(); 3831 if (Imm < 0 || Imm >= 8) 3832 return false; 3833 3834 return true; 3835 } 3836 3837 static bool IsRevOpcode(const unsigned Opcode) 3838 { 3839 switch (Opcode) { 3840 case AMDGPU::V_SUBREV_F32_e32: 3841 case AMDGPU::V_SUBREV_F32_e64: 3842 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3843 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3844 case AMDGPU::V_SUBREV_F32_e32_vi: 3845 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3846 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3847 case AMDGPU::V_SUBREV_F32_e64_vi: 3848 3849 case AMDGPU::V_SUBREV_CO_U32_e32: 3850 case AMDGPU::V_SUBREV_CO_U32_e64: 3851 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3852 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3853 3854 case AMDGPU::V_SUBBREV_U32_e32: 3855 case AMDGPU::V_SUBBREV_U32_e64: 3856 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3857 case AMDGPU::V_SUBBREV_U32_e32_vi: 3858 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3859 case AMDGPU::V_SUBBREV_U32_e64_vi: 3860 3861 case AMDGPU::V_SUBREV_U32_e32: 3862 case AMDGPU::V_SUBREV_U32_e64: 3863 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3864 case AMDGPU::V_SUBREV_U32_e32_vi: 3865 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3866 case AMDGPU::V_SUBREV_U32_e64_vi: 3867 3868 case AMDGPU::V_SUBREV_F16_e32: 3869 case AMDGPU::V_SUBREV_F16_e64: 3870 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3871 case AMDGPU::V_SUBREV_F16_e32_vi: 3872 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3873 case AMDGPU::V_SUBREV_F16_e64_vi: 3874 3875 case AMDGPU::V_SUBREV_U16_e32: 3876 case AMDGPU::V_SUBREV_U16_e64: 3877 case AMDGPU::V_SUBREV_U16_e32_vi: 3878 case AMDGPU::V_SUBREV_U16_e64_vi: 3879 3880 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3881 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3882 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3883 3884 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3885 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3886 3887 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3888 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3889 3890 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3891 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3892 3893 case AMDGPU::V_LSHRREV_B32_e32: 3894 case AMDGPU::V_LSHRREV_B32_e64: 3895 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3896 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3897 case AMDGPU::V_LSHRREV_B32_e32_vi: 3898 case AMDGPU::V_LSHRREV_B32_e64_vi: 3899 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3900 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3901 3902 case AMDGPU::V_ASHRREV_I32_e32: 3903 case AMDGPU::V_ASHRREV_I32_e64: 3904 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3905 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3906 case AMDGPU::V_ASHRREV_I32_e32_vi: 3907 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3908 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3909 case AMDGPU::V_ASHRREV_I32_e64_vi: 3910 3911 case AMDGPU::V_LSHLREV_B32_e32: 3912 case AMDGPU::V_LSHLREV_B32_e64: 3913 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3914 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3915 case AMDGPU::V_LSHLREV_B32_e32_vi: 3916 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3917 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3918 case AMDGPU::V_LSHLREV_B32_e64_vi: 3919 3920 case AMDGPU::V_LSHLREV_B16_e32: 3921 case AMDGPU::V_LSHLREV_B16_e64: 3922 case AMDGPU::V_LSHLREV_B16_e32_vi: 3923 case AMDGPU::V_LSHLREV_B16_e64_vi: 3924 case AMDGPU::V_LSHLREV_B16_gfx10: 3925 3926 case AMDGPU::V_LSHRREV_B16_e32: 3927 case AMDGPU::V_LSHRREV_B16_e64: 3928 case AMDGPU::V_LSHRREV_B16_e32_vi: 3929 case AMDGPU::V_LSHRREV_B16_e64_vi: 3930 case AMDGPU::V_LSHRREV_B16_gfx10: 3931 3932 case AMDGPU::V_ASHRREV_I16_e32: 3933 case AMDGPU::V_ASHRREV_I16_e64: 3934 case AMDGPU::V_ASHRREV_I16_e32_vi: 3935 case AMDGPU::V_ASHRREV_I16_e64_vi: 3936 case AMDGPU::V_ASHRREV_I16_gfx10: 3937 3938 case AMDGPU::V_LSHLREV_B64_e64: 3939 case AMDGPU::V_LSHLREV_B64_gfx10: 3940 case AMDGPU::V_LSHLREV_B64_vi: 3941 3942 case AMDGPU::V_LSHRREV_B64_e64: 3943 case AMDGPU::V_LSHRREV_B64_gfx10: 3944 case AMDGPU::V_LSHRREV_B64_vi: 3945 3946 case AMDGPU::V_ASHRREV_I64_e64: 3947 case AMDGPU::V_ASHRREV_I64_gfx10: 3948 case AMDGPU::V_ASHRREV_I64_vi: 3949 3950 case AMDGPU::V_PK_LSHLREV_B16: 3951 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3952 case AMDGPU::V_PK_LSHLREV_B16_vi: 3953 3954 case AMDGPU::V_PK_LSHRREV_B16: 3955 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3956 case AMDGPU::V_PK_LSHRREV_B16_vi: 3957 case AMDGPU::V_PK_ASHRREV_I16: 3958 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3959 case AMDGPU::V_PK_ASHRREV_I16_vi: 3960 return true; 3961 default: 3962 return false; 3963 } 3964 } 3965 3966 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3967 3968 using namespace SIInstrFlags; 3969 const unsigned Opcode = Inst.getOpcode(); 3970 const MCInstrDesc &Desc = MII.get(Opcode); 3971 3972 // lds_direct register is defined so that it can be used 3973 // with 9-bit operands only. Ignore encodings which do not accept these. 3974 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3975 if ((Desc.TSFlags & Enc) == 0) 3976 return None; 3977 3978 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3979 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3980 if (SrcIdx == -1) 3981 break; 3982 const auto &Src = Inst.getOperand(SrcIdx); 3983 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3984 3985 if (isGFX90A() || isGFX11Plus()) 3986 return StringRef("lds_direct is not supported on this GPU"); 3987 3988 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3989 return StringRef("lds_direct cannot be used with this instruction"); 3990 3991 if (SrcName != OpName::src0) 3992 return StringRef("lds_direct may be used as src0 only"); 3993 } 3994 } 3995 3996 return None; 3997 } 3998 3999 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 4000 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4001 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4002 if (Op.isFlatOffset()) 4003 return Op.getStartLoc(); 4004 } 4005 return getLoc(); 4006 } 4007 4008 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4009 const OperandVector &Operands) { 4010 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4011 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4012 return true; 4013 4014 auto Opcode = Inst.getOpcode(); 4015 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4016 assert(OpNum != -1); 4017 4018 const auto &Op = Inst.getOperand(OpNum); 4019 if (!hasFlatOffsets() && Op.getImm() != 0) { 4020 Error(getFlatOffsetLoc(Operands), 4021 "flat offset modifier is not supported on this GPU"); 4022 return false; 4023 } 4024 4025 // For FLAT segment the offset must be positive; 4026 // MSB is ignored and forced to zero. 4027 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 4028 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 4029 if (!isIntN(OffsetSize, Op.getImm())) { 4030 Error(getFlatOffsetLoc(Operands), 4031 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4032 return false; 4033 } 4034 } else { 4035 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 4036 if (!isUIntN(OffsetSize, Op.getImm())) { 4037 Error(getFlatOffsetLoc(Operands), 4038 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4039 return false; 4040 } 4041 } 4042 4043 return true; 4044 } 4045 4046 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4047 // Start with second operand because SMEM Offset cannot be dst or src0. 4048 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4049 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4050 if (Op.isSMEMOffset()) 4051 return Op.getStartLoc(); 4052 } 4053 return getLoc(); 4054 } 4055 4056 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4057 const OperandVector &Operands) { 4058 if (isCI() || isSI()) 4059 return true; 4060 4061 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4062 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4063 return true; 4064 4065 auto Opcode = Inst.getOpcode(); 4066 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4067 if (OpNum == -1) 4068 return true; 4069 4070 const auto &Op = Inst.getOperand(OpNum); 4071 if (!Op.isImm()) 4072 return true; 4073 4074 uint64_t Offset = Op.getImm(); 4075 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4076 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4077 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4078 return true; 4079 4080 Error(getSMEMOffsetLoc(Operands), 4081 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4082 "expected a 21-bit signed offset"); 4083 4084 return false; 4085 } 4086 4087 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4088 unsigned Opcode = Inst.getOpcode(); 4089 const MCInstrDesc &Desc = MII.get(Opcode); 4090 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4091 return true; 4092 4093 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4094 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4095 4096 const int OpIndices[] = { Src0Idx, Src1Idx }; 4097 4098 unsigned NumExprs = 0; 4099 unsigned NumLiterals = 0; 4100 uint32_t LiteralValue; 4101 4102 for (int OpIdx : OpIndices) { 4103 if (OpIdx == -1) break; 4104 4105 const MCOperand &MO = Inst.getOperand(OpIdx); 4106 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4107 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4108 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4109 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4110 if (NumLiterals == 0 || LiteralValue != Value) { 4111 LiteralValue = Value; 4112 ++NumLiterals; 4113 } 4114 } else if (MO.isExpr()) { 4115 ++NumExprs; 4116 } 4117 } 4118 } 4119 4120 return NumLiterals + NumExprs <= 1; 4121 } 4122 4123 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4124 const unsigned Opc = Inst.getOpcode(); 4125 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4126 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4127 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4128 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4129 4130 if (OpSel & ~3) 4131 return false; 4132 } 4133 4134 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4135 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4136 if (OpSelIdx != -1) { 4137 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4138 return false; 4139 } 4140 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4141 if (OpSelHiIdx != -1) { 4142 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4143 return false; 4144 } 4145 } 4146 4147 return true; 4148 } 4149 4150 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4151 const OperandVector &Operands) { 4152 const unsigned Opc = Inst.getOpcode(); 4153 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4154 if (DppCtrlIdx < 0) 4155 return true; 4156 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4157 4158 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4159 // DPP64 is supported for row_newbcast only. 4160 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4161 if (Src0Idx >= 0 && 4162 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4163 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4164 Error(S, "64 bit dpp only supports row_newbcast"); 4165 return false; 4166 } 4167 } 4168 4169 return true; 4170 } 4171 4172 // Check if VCC register matches wavefront size 4173 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4174 auto FB = getFeatureBits(); 4175 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4176 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4177 } 4178 4179 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4180 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4181 const OperandVector &Operands) { 4182 unsigned Opcode = Inst.getOpcode(); 4183 const MCInstrDesc &Desc = MII.get(Opcode); 4184 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4185 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4186 ImmIdx == -1) 4187 return true; 4188 4189 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4190 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4191 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4192 4193 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4194 4195 unsigned NumExprs = 0; 4196 unsigned NumLiterals = 0; 4197 uint32_t LiteralValue; 4198 4199 for (int OpIdx : OpIndices) { 4200 if (OpIdx == -1) 4201 continue; 4202 4203 const MCOperand &MO = Inst.getOperand(OpIdx); 4204 if (!MO.isImm() && !MO.isExpr()) 4205 continue; 4206 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4207 continue; 4208 4209 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4210 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4211 Error(getConstLoc(Operands), 4212 "inline constants are not allowed for this operand"); 4213 return false; 4214 } 4215 4216 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4217 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4218 if (NumLiterals == 0 || LiteralValue != Value) { 4219 LiteralValue = Value; 4220 ++NumLiterals; 4221 } 4222 } else if (MO.isExpr()) { 4223 ++NumExprs; 4224 } 4225 } 4226 NumLiterals += NumExprs; 4227 4228 if (!NumLiterals) 4229 return true; 4230 4231 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4232 Error(getLitLoc(Operands), "literal operands are not supported"); 4233 return false; 4234 } 4235 4236 if (NumLiterals > 1) { 4237 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4238 return false; 4239 } 4240 4241 return true; 4242 } 4243 4244 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4245 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4246 const MCRegisterInfo *MRI) { 4247 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4248 if (OpIdx < 0) 4249 return -1; 4250 4251 const MCOperand &Op = Inst.getOperand(OpIdx); 4252 if (!Op.isReg()) 4253 return -1; 4254 4255 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4256 auto Reg = Sub ? Sub : Op.getReg(); 4257 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4258 return AGPR32.contains(Reg) ? 1 : 0; 4259 } 4260 4261 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4262 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4263 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4264 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4265 SIInstrFlags::DS)) == 0) 4266 return true; 4267 4268 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4269 : AMDGPU::OpName::vdata; 4270 4271 const MCRegisterInfo *MRI = getMRI(); 4272 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4273 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4274 4275 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4276 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4277 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4278 return false; 4279 } 4280 4281 auto FB = getFeatureBits(); 4282 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4283 if (DataAreg < 0 || DstAreg < 0) 4284 return true; 4285 return DstAreg == DataAreg; 4286 } 4287 4288 return DstAreg < 1 && DataAreg < 1; 4289 } 4290 4291 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4292 auto FB = getFeatureBits(); 4293 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4294 return true; 4295 4296 const MCRegisterInfo *MRI = getMRI(); 4297 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4298 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4299 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4300 const MCOperand &Op = Inst.getOperand(I); 4301 if (!Op.isReg()) 4302 continue; 4303 4304 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4305 if (!Sub) 4306 continue; 4307 4308 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4309 return false; 4310 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4311 return false; 4312 } 4313 4314 return true; 4315 } 4316 4317 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4318 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4319 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4320 if (Op.isBLGP()) 4321 return Op.getStartLoc(); 4322 } 4323 return SMLoc(); 4324 } 4325 4326 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4327 const OperandVector &Operands) { 4328 unsigned Opc = Inst.getOpcode(); 4329 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4330 if (BlgpIdx == -1) 4331 return true; 4332 SMLoc BLGPLoc = getBLGPLoc(Operands); 4333 if (!BLGPLoc.isValid()) 4334 return true; 4335 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4336 auto FB = getFeatureBits(); 4337 bool UsesNeg = false; 4338 if (FB[AMDGPU::FeatureGFX940Insts]) { 4339 switch (Opc) { 4340 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4341 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4342 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4343 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4344 UsesNeg = true; 4345 } 4346 } 4347 4348 if (IsNeg == UsesNeg) 4349 return true; 4350 4351 Error(BLGPLoc, 4352 UsesNeg ? "invalid modifier: blgp is not supported" 4353 : "invalid modifier: neg is not supported"); 4354 4355 return false; 4356 } 4357 4358 // gfx90a has an undocumented limitation: 4359 // DS_GWS opcodes must use even aligned registers. 4360 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4361 const OperandVector &Operands) { 4362 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4363 return true; 4364 4365 int Opc = Inst.getOpcode(); 4366 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4367 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4368 return true; 4369 4370 const MCRegisterInfo *MRI = getMRI(); 4371 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4372 int Data0Pos = 4373 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4374 assert(Data0Pos != -1); 4375 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4376 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4377 if (RegIdx & 1) { 4378 SMLoc RegLoc = getRegLoc(Reg, Operands); 4379 Error(RegLoc, "vgpr must be even aligned"); 4380 return false; 4381 } 4382 4383 return true; 4384 } 4385 4386 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4387 const OperandVector &Operands, 4388 const SMLoc &IDLoc) { 4389 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4390 AMDGPU::OpName::cpol); 4391 if (CPolPos == -1) 4392 return true; 4393 4394 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4395 4396 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4397 if (TSFlags & SIInstrFlags::SMRD) { 4398 if (CPol && (isSI() || isCI())) { 4399 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4400 Error(S, "cache policy is not supported for SMRD instructions"); 4401 return false; 4402 } 4403 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4404 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4405 return false; 4406 } 4407 } 4408 4409 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4410 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4411 StringRef CStr(S.getPointer()); 4412 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4413 Error(S, "scc is not supported on this GPU"); 4414 return false; 4415 } 4416 4417 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4418 return true; 4419 4420 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4421 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4422 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4423 : "instruction must use glc"); 4424 return false; 4425 } 4426 } else { 4427 if (CPol & CPol::GLC) { 4428 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4429 StringRef CStr(S.getPointer()); 4430 S = SMLoc::getFromPointer( 4431 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4432 Error(S, isGFX940() ? "instruction must not use sc0" 4433 : "instruction must not use glc"); 4434 return false; 4435 } 4436 } 4437 4438 return true; 4439 } 4440 4441 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst, 4442 const OperandVector &Operands, 4443 const SMLoc &IDLoc) { 4444 if (isGFX940()) 4445 return true; 4446 4447 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4448 if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) != 4449 (SIInstrFlags::VALU | SIInstrFlags::FLAT)) 4450 return true; 4451 // This is FLAT LDS DMA. 4452 4453 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands); 4454 StringRef CStr(S.getPointer()); 4455 if (!CStr.startswith("lds")) { 4456 // This is incorrectly selected LDS DMA version of a FLAT load opcode. 4457 // And LDS version should have 'lds' modifier, but it follows optional 4458 // operands so its absense is ignored by the matcher. 4459 Error(IDLoc, "invalid operands for instruction"); 4460 return false; 4461 } 4462 4463 return true; 4464 } 4465 4466 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4467 const SMLoc &IDLoc, 4468 const OperandVector &Operands) { 4469 if (auto ErrMsg = validateLdsDirect(Inst)) { 4470 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4471 return false; 4472 } 4473 if (!validateSOPLiteral(Inst)) { 4474 Error(getLitLoc(Operands), 4475 "only one literal operand is allowed"); 4476 return false; 4477 } 4478 if (!validateVOPLiteral(Inst, Operands)) { 4479 return false; 4480 } 4481 if (!validateConstantBusLimitations(Inst, Operands)) { 4482 return false; 4483 } 4484 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4485 return false; 4486 } 4487 if (!validateIntClampSupported(Inst)) { 4488 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4489 "integer clamping is not supported on this GPU"); 4490 return false; 4491 } 4492 if (!validateOpSel(Inst)) { 4493 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4494 "invalid op_sel operand"); 4495 return false; 4496 } 4497 if (!validateDPP(Inst, Operands)) { 4498 return false; 4499 } 4500 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4501 if (!validateMIMGD16(Inst)) { 4502 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4503 "d16 modifier is not supported on this GPU"); 4504 return false; 4505 } 4506 if (!validateMIMGDim(Inst)) { 4507 Error(IDLoc, "dim modifier is required on this GPU"); 4508 return false; 4509 } 4510 if (!validateMIMGMSAA(Inst)) { 4511 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4512 "invalid dim; must be MSAA type"); 4513 return false; 4514 } 4515 if (auto ErrMsg = validateMIMGDataSize(Inst)) { 4516 Error(IDLoc, *ErrMsg); 4517 return false; 4518 } 4519 if (!validateMIMGAddrSize(Inst)) { 4520 Error(IDLoc, 4521 "image address size does not match dim and a16"); 4522 return false; 4523 } 4524 if (!validateMIMGAtomicDMask(Inst)) { 4525 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4526 "invalid atomic image dmask"); 4527 return false; 4528 } 4529 if (!validateMIMGGatherDMask(Inst)) { 4530 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4531 "invalid image_gather dmask: only one bit must be set"); 4532 return false; 4533 } 4534 if (!validateMovrels(Inst, Operands)) { 4535 return false; 4536 } 4537 if (!validateFlatOffset(Inst, Operands)) { 4538 return false; 4539 } 4540 if (!validateSMEMOffset(Inst, Operands)) { 4541 return false; 4542 } 4543 if (!validateMAIAccWrite(Inst, Operands)) { 4544 return false; 4545 } 4546 if (!validateMFMA(Inst, Operands)) { 4547 return false; 4548 } 4549 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4550 return false; 4551 } 4552 4553 if (!validateAGPRLdSt(Inst)) { 4554 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4555 ? "invalid register class: data and dst should be all VGPR or AGPR" 4556 : "invalid register class: agpr loads and stores not supported on this GPU" 4557 ); 4558 return false; 4559 } 4560 if (!validateVGPRAlign(Inst)) { 4561 Error(IDLoc, 4562 "invalid register class: vgpr tuples must be 64 bit aligned"); 4563 return false; 4564 } 4565 if (!validateGWS(Inst, Operands)) { 4566 return false; 4567 } 4568 4569 if (!validateBLGP(Inst, Operands)) { 4570 return false; 4571 } 4572 4573 if (!validateDivScale(Inst)) { 4574 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4575 return false; 4576 } 4577 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4578 return false; 4579 } 4580 4581 if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) { 4582 return false; 4583 } 4584 4585 return true; 4586 } 4587 4588 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4589 const FeatureBitset &FBS, 4590 unsigned VariantID = 0); 4591 4592 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4593 const FeatureBitset &AvailableFeatures, 4594 unsigned VariantID); 4595 4596 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4597 const FeatureBitset &FBS) { 4598 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4599 } 4600 4601 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4602 const FeatureBitset &FBS, 4603 ArrayRef<unsigned> Variants) { 4604 for (auto Variant : Variants) { 4605 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4606 return true; 4607 } 4608 4609 return false; 4610 } 4611 4612 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4613 const SMLoc &IDLoc) { 4614 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4615 4616 // Check if requested instruction variant is supported. 4617 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4618 return false; 4619 4620 // This instruction is not supported. 4621 // Clear any other pending errors because they are no longer relevant. 4622 getParser().clearPendingErrors(); 4623 4624 // Requested instruction variant is not supported. 4625 // Check if any other variants are supported. 4626 StringRef VariantName = getMatchedVariantName(); 4627 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4628 return Error(IDLoc, 4629 Twine(VariantName, 4630 " variant of this instruction is not supported")); 4631 } 4632 4633 // Finally check if this instruction is supported on any other GPU. 4634 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4635 return Error(IDLoc, "instruction not supported on this GPU"); 4636 } 4637 4638 // Instruction not supported on any GPU. Probably a typo. 4639 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4640 return Error(IDLoc, "invalid instruction" + Suggestion); 4641 } 4642 4643 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4644 OperandVector &Operands, 4645 MCStreamer &Out, 4646 uint64_t &ErrorInfo, 4647 bool MatchingInlineAsm) { 4648 MCInst Inst; 4649 unsigned Result = Match_Success; 4650 for (auto Variant : getMatchedVariants()) { 4651 uint64_t EI; 4652 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4653 Variant); 4654 // We order match statuses from least to most specific. We use most specific 4655 // status as resulting 4656 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4657 if ((R == Match_Success) || 4658 (R == Match_PreferE32) || 4659 (R == Match_MissingFeature && Result != Match_PreferE32) || 4660 (R == Match_InvalidOperand && Result != Match_MissingFeature 4661 && Result != Match_PreferE32) || 4662 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4663 && Result != Match_MissingFeature 4664 && Result != Match_PreferE32)) { 4665 Result = R; 4666 ErrorInfo = EI; 4667 } 4668 if (R == Match_Success) 4669 break; 4670 } 4671 4672 if (Result == Match_Success) { 4673 if (!validateInstruction(Inst, IDLoc, Operands)) { 4674 return true; 4675 } 4676 Inst.setLoc(IDLoc); 4677 Out.emitInstruction(Inst, getSTI()); 4678 return false; 4679 } 4680 4681 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4682 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4683 return true; 4684 } 4685 4686 switch (Result) { 4687 default: break; 4688 case Match_MissingFeature: 4689 // It has been verified that the specified instruction 4690 // mnemonic is valid. A match was found but it requires 4691 // features which are not supported on this GPU. 4692 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4693 4694 case Match_InvalidOperand: { 4695 SMLoc ErrorLoc = IDLoc; 4696 if (ErrorInfo != ~0ULL) { 4697 if (ErrorInfo >= Operands.size()) { 4698 return Error(IDLoc, "too few operands for instruction"); 4699 } 4700 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4701 if (ErrorLoc == SMLoc()) 4702 ErrorLoc = IDLoc; 4703 } 4704 return Error(ErrorLoc, "invalid operand for instruction"); 4705 } 4706 4707 case Match_PreferE32: 4708 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4709 "should be encoded as e32"); 4710 case Match_MnemonicFail: 4711 llvm_unreachable("Invalid instructions should have been handled already"); 4712 } 4713 llvm_unreachable("Implement any new match types added!"); 4714 } 4715 4716 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4717 int64_t Tmp = -1; 4718 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4719 return true; 4720 } 4721 if (getParser().parseAbsoluteExpression(Tmp)) { 4722 return true; 4723 } 4724 Ret = static_cast<uint32_t>(Tmp); 4725 return false; 4726 } 4727 4728 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4729 uint32_t &Minor) { 4730 if (ParseAsAbsoluteExpression(Major)) 4731 return TokError("invalid major version"); 4732 4733 if (!trySkipToken(AsmToken::Comma)) 4734 return TokError("minor version number required, comma expected"); 4735 4736 if (ParseAsAbsoluteExpression(Minor)) 4737 return TokError("invalid minor version"); 4738 4739 return false; 4740 } 4741 4742 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4743 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4744 return TokError("directive only supported for amdgcn architecture"); 4745 4746 std::string TargetIDDirective; 4747 SMLoc TargetStart = getTok().getLoc(); 4748 if (getParser().parseEscapedString(TargetIDDirective)) 4749 return true; 4750 4751 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4752 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4753 return getParser().Error(TargetRange.Start, 4754 (Twine(".amdgcn_target directive's target id ") + 4755 Twine(TargetIDDirective) + 4756 Twine(" does not match the specified target id ") + 4757 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4758 4759 return false; 4760 } 4761 4762 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4763 return Error(Range.Start, "value out of range", Range); 4764 } 4765 4766 bool AMDGPUAsmParser::calculateGPRBlocks( 4767 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4768 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4769 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4770 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4771 // TODO(scott.linder): These calculations are duplicated from 4772 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4773 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4774 4775 unsigned NumVGPRs = NextFreeVGPR; 4776 unsigned NumSGPRs = NextFreeSGPR; 4777 4778 if (Version.Major >= 10) 4779 NumSGPRs = 0; 4780 else { 4781 unsigned MaxAddressableNumSGPRs = 4782 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4783 4784 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4785 NumSGPRs > MaxAddressableNumSGPRs) 4786 return OutOfRangeError(SGPRRange); 4787 4788 NumSGPRs += 4789 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4790 4791 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4792 NumSGPRs > MaxAddressableNumSGPRs) 4793 return OutOfRangeError(SGPRRange); 4794 4795 if (Features.test(FeatureSGPRInitBug)) 4796 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4797 } 4798 4799 VGPRBlocks = 4800 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4801 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4802 4803 return false; 4804 } 4805 4806 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4807 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4808 return TokError("directive only supported for amdgcn architecture"); 4809 4810 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4811 return TokError("directive only supported for amdhsa OS"); 4812 4813 StringRef KernelName; 4814 if (getParser().parseIdentifier(KernelName)) 4815 return true; 4816 4817 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4818 4819 StringSet<> Seen; 4820 4821 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4822 4823 SMRange VGPRRange; 4824 uint64_t NextFreeVGPR = 0; 4825 uint64_t AccumOffset = 0; 4826 uint64_t SharedVGPRCount = 0; 4827 SMRange SGPRRange; 4828 uint64_t NextFreeSGPR = 0; 4829 4830 // Count the number of user SGPRs implied from the enabled feature bits. 4831 unsigned ImpliedUserSGPRCount = 0; 4832 4833 // Track if the asm explicitly contains the directive for the user SGPR 4834 // count. 4835 Optional<unsigned> ExplicitUserSGPRCount; 4836 bool ReserveVCC = true; 4837 bool ReserveFlatScr = true; 4838 Optional<bool> EnableWavefrontSize32; 4839 4840 while (true) { 4841 while (trySkipToken(AsmToken::EndOfStatement)); 4842 4843 StringRef ID; 4844 SMRange IDRange = getTok().getLocRange(); 4845 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4846 return true; 4847 4848 if (ID == ".end_amdhsa_kernel") 4849 break; 4850 4851 if (Seen.find(ID) != Seen.end()) 4852 return TokError(".amdhsa_ directives cannot be repeated"); 4853 Seen.insert(ID); 4854 4855 SMLoc ValStart = getLoc(); 4856 int64_t IVal; 4857 if (getParser().parseAbsoluteExpression(IVal)) 4858 return true; 4859 SMLoc ValEnd = getLoc(); 4860 SMRange ValRange = SMRange(ValStart, ValEnd); 4861 4862 if (IVal < 0) 4863 return OutOfRangeError(ValRange); 4864 4865 uint64_t Val = IVal; 4866 4867 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4868 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4869 return OutOfRangeError(RANGE); \ 4870 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4871 4872 if (ID == ".amdhsa_group_segment_fixed_size") { 4873 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4874 return OutOfRangeError(ValRange); 4875 KD.group_segment_fixed_size = Val; 4876 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4877 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4878 return OutOfRangeError(ValRange); 4879 KD.private_segment_fixed_size = Val; 4880 } else if (ID == ".amdhsa_kernarg_size") { 4881 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4882 return OutOfRangeError(ValRange); 4883 KD.kernarg_size = Val; 4884 } else if (ID == ".amdhsa_user_sgpr_count") { 4885 ExplicitUserSGPRCount = Val; 4886 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4887 if (hasArchitectedFlatScratch()) 4888 return Error(IDRange.Start, 4889 "directive is not supported with architected flat scratch", 4890 IDRange); 4891 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4892 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4893 Val, ValRange); 4894 if (Val) 4895 ImpliedUserSGPRCount += 4; 4896 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4897 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4898 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4899 ValRange); 4900 if (Val) 4901 ImpliedUserSGPRCount += 2; 4902 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4903 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4904 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4905 ValRange); 4906 if (Val) 4907 ImpliedUserSGPRCount += 2; 4908 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4909 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4910 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4911 Val, ValRange); 4912 if (Val) 4913 ImpliedUserSGPRCount += 2; 4914 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4915 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4916 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4917 ValRange); 4918 if (Val) 4919 ImpliedUserSGPRCount += 2; 4920 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4921 if (hasArchitectedFlatScratch()) 4922 return Error(IDRange.Start, 4923 "directive is not supported with architected flat scratch", 4924 IDRange); 4925 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4926 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4927 ValRange); 4928 if (Val) 4929 ImpliedUserSGPRCount += 2; 4930 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4931 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4932 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4933 Val, ValRange); 4934 if (Val) 4935 ImpliedUserSGPRCount += 1; 4936 } else if (ID == ".amdhsa_wavefront_size32") { 4937 if (IVersion.Major < 10) 4938 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4939 EnableWavefrontSize32 = Val; 4940 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4941 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4942 Val, ValRange); 4943 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4944 if (hasArchitectedFlatScratch()) 4945 return Error(IDRange.Start, 4946 "directive is not supported with architected flat scratch", 4947 IDRange); 4948 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4949 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4950 } else if (ID == ".amdhsa_enable_private_segment") { 4951 if (!hasArchitectedFlatScratch()) 4952 return Error( 4953 IDRange.Start, 4954 "directive is not supported without architected flat scratch", 4955 IDRange); 4956 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4957 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4958 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4959 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4960 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4961 ValRange); 4962 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4963 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4964 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4965 ValRange); 4966 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4967 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4968 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4969 ValRange); 4970 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4971 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4972 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4973 ValRange); 4974 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4975 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4976 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4977 ValRange); 4978 } else if (ID == ".amdhsa_next_free_vgpr") { 4979 VGPRRange = ValRange; 4980 NextFreeVGPR = Val; 4981 } else if (ID == ".amdhsa_next_free_sgpr") { 4982 SGPRRange = ValRange; 4983 NextFreeSGPR = Val; 4984 } else if (ID == ".amdhsa_accum_offset") { 4985 if (!isGFX90A()) 4986 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4987 AccumOffset = Val; 4988 } else if (ID == ".amdhsa_reserve_vcc") { 4989 if (!isUInt<1>(Val)) 4990 return OutOfRangeError(ValRange); 4991 ReserveVCC = Val; 4992 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4993 if (IVersion.Major < 7) 4994 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4995 if (hasArchitectedFlatScratch()) 4996 return Error(IDRange.Start, 4997 "directive is not supported with architected flat scratch", 4998 IDRange); 4999 if (!isUInt<1>(Val)) 5000 return OutOfRangeError(ValRange); 5001 ReserveFlatScr = Val; 5002 } else if (ID == ".amdhsa_reserve_xnack_mask") { 5003 if (IVersion.Major < 8) 5004 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 5005 if (!isUInt<1>(Val)) 5006 return OutOfRangeError(ValRange); 5007 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5008 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5009 IDRange); 5010 } else if (ID == ".amdhsa_float_round_mode_32") { 5011 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5012 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 5013 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5014 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5015 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 5016 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5017 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5018 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5019 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5020 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5021 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5022 ValRange); 5023 } else if (ID == ".amdhsa_dx10_clamp") { 5024 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5025 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 5026 } else if (ID == ".amdhsa_ieee_mode") { 5027 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 5028 Val, ValRange); 5029 } else if (ID == ".amdhsa_fp16_overflow") { 5030 if (IVersion.Major < 9) 5031 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5032 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 5033 ValRange); 5034 } else if (ID == ".amdhsa_tg_split") { 5035 if (!isGFX90A()) 5036 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5037 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5038 ValRange); 5039 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5040 if (IVersion.Major < 10) 5041 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5042 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 5043 ValRange); 5044 } else if (ID == ".amdhsa_memory_ordered") { 5045 if (IVersion.Major < 10) 5046 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5047 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 5048 ValRange); 5049 } else if (ID == ".amdhsa_forward_progress") { 5050 if (IVersion.Major < 10) 5051 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5052 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 5053 ValRange); 5054 } else if (ID == ".amdhsa_shared_vgpr_count") { 5055 if (IVersion.Major < 10) 5056 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5057 SharedVGPRCount = Val; 5058 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5059 COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val, 5060 ValRange); 5061 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5062 PARSE_BITS_ENTRY( 5063 KD.compute_pgm_rsrc2, 5064 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5065 ValRange); 5066 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5067 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5068 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5069 Val, ValRange); 5070 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5071 PARSE_BITS_ENTRY( 5072 KD.compute_pgm_rsrc2, 5073 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5074 ValRange); 5075 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5076 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5077 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5078 Val, ValRange); 5079 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5080 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5081 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5082 Val, ValRange); 5083 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5084 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5085 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5086 Val, ValRange); 5087 } else if (ID == ".amdhsa_exception_int_div_zero") { 5088 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5089 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5090 Val, ValRange); 5091 } else { 5092 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5093 } 5094 5095 #undef PARSE_BITS_ENTRY 5096 } 5097 5098 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5099 return TokError(".amdhsa_next_free_vgpr directive is required"); 5100 5101 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5102 return TokError(".amdhsa_next_free_sgpr directive is required"); 5103 5104 unsigned VGPRBlocks; 5105 unsigned SGPRBlocks; 5106 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5107 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5108 EnableWavefrontSize32, NextFreeVGPR, 5109 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5110 SGPRBlocks)) 5111 return true; 5112 5113 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5114 VGPRBlocks)) 5115 return OutOfRangeError(VGPRRange); 5116 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5117 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5118 5119 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5120 SGPRBlocks)) 5121 return OutOfRangeError(SGPRRange); 5122 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5123 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5124 SGPRBlocks); 5125 5126 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5127 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5128 "enabled user SGPRs"); 5129 5130 unsigned UserSGPRCount = 5131 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5132 5133 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5134 return TokError("too many user SGPRs enabled"); 5135 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5136 UserSGPRCount); 5137 5138 if (isGFX90A()) { 5139 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5140 return TokError(".amdhsa_accum_offset directive is required"); 5141 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5142 return TokError("accum_offset should be in range [4..256] in " 5143 "increments of 4"); 5144 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5145 return TokError("accum_offset exceeds total VGPR allocation"); 5146 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5147 (AccumOffset / 4 - 1)); 5148 } 5149 5150 if (IVersion.Major == 10) { 5151 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5152 if (SharedVGPRCount && EnableWavefrontSize32) { 5153 return TokError("shared_vgpr_count directive not valid on " 5154 "wavefront size 32"); 5155 } 5156 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5157 return TokError("shared_vgpr_count*2 + " 5158 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5159 "exceed 63\n"); 5160 } 5161 } 5162 5163 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5164 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5165 ReserveFlatScr); 5166 return false; 5167 } 5168 5169 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5170 uint32_t Major; 5171 uint32_t Minor; 5172 5173 if (ParseDirectiveMajorMinor(Major, Minor)) 5174 return true; 5175 5176 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5177 return false; 5178 } 5179 5180 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5181 uint32_t Major; 5182 uint32_t Minor; 5183 uint32_t Stepping; 5184 StringRef VendorName; 5185 StringRef ArchName; 5186 5187 // If this directive has no arguments, then use the ISA version for the 5188 // targeted GPU. 5189 if (isToken(AsmToken::EndOfStatement)) { 5190 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5191 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5192 ISA.Stepping, 5193 "AMD", "AMDGPU"); 5194 return false; 5195 } 5196 5197 if (ParseDirectiveMajorMinor(Major, Minor)) 5198 return true; 5199 5200 if (!trySkipToken(AsmToken::Comma)) 5201 return TokError("stepping version number required, comma expected"); 5202 5203 if (ParseAsAbsoluteExpression(Stepping)) 5204 return TokError("invalid stepping version"); 5205 5206 if (!trySkipToken(AsmToken::Comma)) 5207 return TokError("vendor name required, comma expected"); 5208 5209 if (!parseString(VendorName, "invalid vendor name")) 5210 return true; 5211 5212 if (!trySkipToken(AsmToken::Comma)) 5213 return TokError("arch name required, comma expected"); 5214 5215 if (!parseString(ArchName, "invalid arch name")) 5216 return true; 5217 5218 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5219 VendorName, ArchName); 5220 return false; 5221 } 5222 5223 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5224 amd_kernel_code_t &Header) { 5225 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5226 // assembly for backwards compatibility. 5227 if (ID == "max_scratch_backing_memory_byte_size") { 5228 Parser.eatToEndOfStatement(); 5229 return false; 5230 } 5231 5232 SmallString<40> ErrStr; 5233 raw_svector_ostream Err(ErrStr); 5234 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5235 return TokError(Err.str()); 5236 } 5237 Lex(); 5238 5239 if (ID == "enable_wavefront_size32") { 5240 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5241 if (!isGFX10Plus()) 5242 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5243 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5244 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5245 } else { 5246 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5247 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5248 } 5249 } 5250 5251 if (ID == "wavefront_size") { 5252 if (Header.wavefront_size == 5) { 5253 if (!isGFX10Plus()) 5254 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5255 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5256 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5257 } else if (Header.wavefront_size == 6) { 5258 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5259 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5260 } 5261 } 5262 5263 if (ID == "enable_wgp_mode") { 5264 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5265 !isGFX10Plus()) 5266 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5267 } 5268 5269 if (ID == "enable_mem_ordered") { 5270 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5271 !isGFX10Plus()) 5272 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5273 } 5274 5275 if (ID == "enable_fwd_progress") { 5276 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5277 !isGFX10Plus()) 5278 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5279 } 5280 5281 return false; 5282 } 5283 5284 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5285 amd_kernel_code_t Header; 5286 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5287 5288 while (true) { 5289 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5290 // will set the current token to EndOfStatement. 5291 while(trySkipToken(AsmToken::EndOfStatement)); 5292 5293 StringRef ID; 5294 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5295 return true; 5296 5297 if (ID == ".end_amd_kernel_code_t") 5298 break; 5299 5300 if (ParseAMDKernelCodeTValue(ID, Header)) 5301 return true; 5302 } 5303 5304 getTargetStreamer().EmitAMDKernelCodeT(Header); 5305 5306 return false; 5307 } 5308 5309 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5310 StringRef KernelName; 5311 if (!parseId(KernelName, "expected symbol name")) 5312 return true; 5313 5314 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5315 ELF::STT_AMDGPU_HSA_KERNEL); 5316 5317 KernelScope.initialize(getContext()); 5318 return false; 5319 } 5320 5321 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5322 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5323 return Error(getLoc(), 5324 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5325 "architectures"); 5326 } 5327 5328 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5329 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5330 return Error(getParser().getTok().getLoc(), "target id must match options"); 5331 5332 getTargetStreamer().EmitISAVersion(); 5333 Lex(); 5334 5335 return false; 5336 } 5337 5338 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5339 const char *AssemblerDirectiveBegin; 5340 const char *AssemblerDirectiveEnd; 5341 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5342 isHsaAbiVersion3AndAbove(&getSTI()) 5343 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5344 HSAMD::V3::AssemblerDirectiveEnd) 5345 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5346 HSAMD::AssemblerDirectiveEnd); 5347 5348 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5349 return Error(getLoc(), 5350 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5351 "not available on non-amdhsa OSes")).str()); 5352 } 5353 5354 std::string HSAMetadataString; 5355 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5356 HSAMetadataString)) 5357 return true; 5358 5359 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5360 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5361 return Error(getLoc(), "invalid HSA metadata"); 5362 } else { 5363 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5364 return Error(getLoc(), "invalid HSA metadata"); 5365 } 5366 5367 return false; 5368 } 5369 5370 /// Common code to parse out a block of text (typically YAML) between start and 5371 /// end directives. 5372 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5373 const char *AssemblerDirectiveEnd, 5374 std::string &CollectString) { 5375 5376 raw_string_ostream CollectStream(CollectString); 5377 5378 getLexer().setSkipSpace(false); 5379 5380 bool FoundEnd = false; 5381 while (!isToken(AsmToken::Eof)) { 5382 while (isToken(AsmToken::Space)) { 5383 CollectStream << getTokenStr(); 5384 Lex(); 5385 } 5386 5387 if (trySkipId(AssemblerDirectiveEnd)) { 5388 FoundEnd = true; 5389 break; 5390 } 5391 5392 CollectStream << Parser.parseStringToEndOfStatement() 5393 << getContext().getAsmInfo()->getSeparatorString(); 5394 5395 Parser.eatToEndOfStatement(); 5396 } 5397 5398 getLexer().setSkipSpace(true); 5399 5400 if (isToken(AsmToken::Eof) && !FoundEnd) { 5401 return TokError(Twine("expected directive ") + 5402 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5403 } 5404 5405 CollectStream.flush(); 5406 return false; 5407 } 5408 5409 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5410 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5411 std::string String; 5412 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5413 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5414 return true; 5415 5416 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5417 if (!PALMetadata->setFromString(String)) 5418 return Error(getLoc(), "invalid PAL metadata"); 5419 return false; 5420 } 5421 5422 /// Parse the assembler directive for old linear-format PAL metadata. 5423 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5424 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5425 return Error(getLoc(), 5426 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5427 "not available on non-amdpal OSes")).str()); 5428 } 5429 5430 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5431 PALMetadata->setLegacy(); 5432 for (;;) { 5433 uint32_t Key, Value; 5434 if (ParseAsAbsoluteExpression(Key)) { 5435 return TokError(Twine("invalid value in ") + 5436 Twine(PALMD::AssemblerDirective)); 5437 } 5438 if (!trySkipToken(AsmToken::Comma)) { 5439 return TokError(Twine("expected an even number of values in ") + 5440 Twine(PALMD::AssemblerDirective)); 5441 } 5442 if (ParseAsAbsoluteExpression(Value)) { 5443 return TokError(Twine("invalid value in ") + 5444 Twine(PALMD::AssemblerDirective)); 5445 } 5446 PALMetadata->setRegister(Key, Value); 5447 if (!trySkipToken(AsmToken::Comma)) 5448 break; 5449 } 5450 return false; 5451 } 5452 5453 /// ParseDirectiveAMDGPULDS 5454 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5455 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5456 if (getParser().checkForValidSection()) 5457 return true; 5458 5459 StringRef Name; 5460 SMLoc NameLoc = getLoc(); 5461 if (getParser().parseIdentifier(Name)) 5462 return TokError("expected identifier in directive"); 5463 5464 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5465 if (parseToken(AsmToken::Comma, "expected ','")) 5466 return true; 5467 5468 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5469 5470 int64_t Size; 5471 SMLoc SizeLoc = getLoc(); 5472 if (getParser().parseAbsoluteExpression(Size)) 5473 return true; 5474 if (Size < 0) 5475 return Error(SizeLoc, "size must be non-negative"); 5476 if (Size > LocalMemorySize) 5477 return Error(SizeLoc, "size is too large"); 5478 5479 int64_t Alignment = 4; 5480 if (trySkipToken(AsmToken::Comma)) { 5481 SMLoc AlignLoc = getLoc(); 5482 if (getParser().parseAbsoluteExpression(Alignment)) 5483 return true; 5484 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5485 return Error(AlignLoc, "alignment must be a power of two"); 5486 5487 // Alignment larger than the size of LDS is possible in theory, as long 5488 // as the linker manages to place to symbol at address 0, but we do want 5489 // to make sure the alignment fits nicely into a 32-bit integer. 5490 if (Alignment >= 1u << 31) 5491 return Error(AlignLoc, "alignment is too large"); 5492 } 5493 5494 if (parseToken(AsmToken::EndOfStatement, 5495 "unexpected token in '.amdgpu_lds' directive")) 5496 return true; 5497 5498 Symbol->redefineIfPossible(); 5499 if (!Symbol->isUndefined()) 5500 return Error(NameLoc, "invalid symbol redefinition"); 5501 5502 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5503 return false; 5504 } 5505 5506 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5507 StringRef IDVal = DirectiveID.getString(); 5508 5509 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5510 if (IDVal == ".amdhsa_kernel") 5511 return ParseDirectiveAMDHSAKernel(); 5512 5513 // TODO: Restructure/combine with PAL metadata directive. 5514 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5515 return ParseDirectiveHSAMetadata(); 5516 } else { 5517 if (IDVal == ".hsa_code_object_version") 5518 return ParseDirectiveHSACodeObjectVersion(); 5519 5520 if (IDVal == ".hsa_code_object_isa") 5521 return ParseDirectiveHSACodeObjectISA(); 5522 5523 if (IDVal == ".amd_kernel_code_t") 5524 return ParseDirectiveAMDKernelCodeT(); 5525 5526 if (IDVal == ".amdgpu_hsa_kernel") 5527 return ParseDirectiveAMDGPUHsaKernel(); 5528 5529 if (IDVal == ".amd_amdgpu_isa") 5530 return ParseDirectiveISAVersion(); 5531 5532 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5533 return ParseDirectiveHSAMetadata(); 5534 } 5535 5536 if (IDVal == ".amdgcn_target") 5537 return ParseDirectiveAMDGCNTarget(); 5538 5539 if (IDVal == ".amdgpu_lds") 5540 return ParseDirectiveAMDGPULDS(); 5541 5542 if (IDVal == PALMD::AssemblerDirectiveBegin) 5543 return ParseDirectivePALMetadataBegin(); 5544 5545 if (IDVal == PALMD::AssemblerDirective) 5546 return ParseDirectivePALMetadata(); 5547 5548 return true; 5549 } 5550 5551 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5552 unsigned RegNo) { 5553 5554 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5555 return isGFX9Plus(); 5556 5557 // GFX10 has 2 more SGPRs 104 and 105. 5558 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5559 return hasSGPR104_SGPR105(); 5560 5561 switch (RegNo) { 5562 case AMDGPU::SRC_SHARED_BASE: 5563 case AMDGPU::SRC_SHARED_LIMIT: 5564 case AMDGPU::SRC_PRIVATE_BASE: 5565 case AMDGPU::SRC_PRIVATE_LIMIT: 5566 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5567 return isGFX9Plus(); 5568 case AMDGPU::TBA: 5569 case AMDGPU::TBA_LO: 5570 case AMDGPU::TBA_HI: 5571 case AMDGPU::TMA: 5572 case AMDGPU::TMA_LO: 5573 case AMDGPU::TMA_HI: 5574 return !isGFX9Plus(); 5575 case AMDGPU::XNACK_MASK: 5576 case AMDGPU::XNACK_MASK_LO: 5577 case AMDGPU::XNACK_MASK_HI: 5578 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5579 case AMDGPU::SGPR_NULL: 5580 return isGFX10Plus(); 5581 default: 5582 break; 5583 } 5584 5585 if (isCI()) 5586 return true; 5587 5588 if (isSI() || isGFX10Plus()) { 5589 // No flat_scr on SI. 5590 // On GFX10 flat scratch is not a valid register operand and can only be 5591 // accessed with s_setreg/s_getreg. 5592 switch (RegNo) { 5593 case AMDGPU::FLAT_SCR: 5594 case AMDGPU::FLAT_SCR_LO: 5595 case AMDGPU::FLAT_SCR_HI: 5596 return false; 5597 default: 5598 return true; 5599 } 5600 } 5601 5602 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5603 // SI/CI have. 5604 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5605 return hasSGPR102_SGPR103(); 5606 5607 return true; 5608 } 5609 5610 OperandMatchResultTy 5611 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5612 OperandMode Mode) { 5613 // Try to parse with a custom parser 5614 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5615 5616 // If we successfully parsed the operand or if there as an error parsing, 5617 // we are done. 5618 // 5619 // If we are parsing after we reach EndOfStatement then this means we 5620 // are appending default values to the Operands list. This is only done 5621 // by custom parser, so we shouldn't continue on to the generic parsing. 5622 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5623 isToken(AsmToken::EndOfStatement)) 5624 return ResTy; 5625 5626 SMLoc RBraceLoc; 5627 SMLoc LBraceLoc = getLoc(); 5628 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5629 unsigned Prefix = Operands.size(); 5630 5631 for (;;) { 5632 auto Loc = getLoc(); 5633 ResTy = parseReg(Operands); 5634 if (ResTy == MatchOperand_NoMatch) 5635 Error(Loc, "expected a register"); 5636 if (ResTy != MatchOperand_Success) 5637 return MatchOperand_ParseFail; 5638 5639 RBraceLoc = getLoc(); 5640 if (trySkipToken(AsmToken::RBrac)) 5641 break; 5642 5643 if (!skipToken(AsmToken::Comma, 5644 "expected a comma or a closing square bracket")) { 5645 return MatchOperand_ParseFail; 5646 } 5647 } 5648 5649 if (Operands.size() - Prefix > 1) { 5650 Operands.insert(Operands.begin() + Prefix, 5651 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5652 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5653 } 5654 5655 return MatchOperand_Success; 5656 } 5657 5658 return parseRegOrImm(Operands); 5659 } 5660 5661 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5662 // Clear any forced encodings from the previous instruction. 5663 setForcedEncodingSize(0); 5664 setForcedDPP(false); 5665 setForcedSDWA(false); 5666 5667 if (Name.endswith("_e64")) { 5668 setForcedEncodingSize(64); 5669 return Name.substr(0, Name.size() - 4); 5670 } else if (Name.endswith("_e32")) { 5671 setForcedEncodingSize(32); 5672 return Name.substr(0, Name.size() - 4); 5673 } else if (Name.endswith("_dpp")) { 5674 setForcedDPP(true); 5675 return Name.substr(0, Name.size() - 4); 5676 } else if (Name.endswith("_sdwa")) { 5677 setForcedSDWA(true); 5678 return Name.substr(0, Name.size() - 5); 5679 } 5680 return Name; 5681 } 5682 5683 static void applyMnemonicAliases(StringRef &Mnemonic, 5684 const FeatureBitset &Features, 5685 unsigned VariantID); 5686 5687 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5688 StringRef Name, 5689 SMLoc NameLoc, OperandVector &Operands) { 5690 // Add the instruction mnemonic 5691 Name = parseMnemonicSuffix(Name); 5692 5693 // If the target architecture uses MnemonicAlias, call it here to parse 5694 // operands correctly. 5695 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 5696 5697 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5698 5699 bool IsMIMG = Name.startswith("image_"); 5700 5701 while (!trySkipToken(AsmToken::EndOfStatement)) { 5702 OperandMode Mode = OperandMode_Default; 5703 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5704 Mode = OperandMode_NSA; 5705 CPolSeen = 0; 5706 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5707 5708 if (Res != MatchOperand_Success) { 5709 checkUnsupportedInstruction(Name, NameLoc); 5710 if (!Parser.hasPendingError()) { 5711 // FIXME: use real operand location rather than the current location. 5712 StringRef Msg = 5713 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5714 "not a valid operand."; 5715 Error(getLoc(), Msg); 5716 } 5717 while (!trySkipToken(AsmToken::EndOfStatement)) { 5718 lex(); 5719 } 5720 return true; 5721 } 5722 5723 // Eat the comma or space if there is one. 5724 trySkipToken(AsmToken::Comma); 5725 } 5726 5727 return false; 5728 } 5729 5730 //===----------------------------------------------------------------------===// 5731 // Utility functions 5732 //===----------------------------------------------------------------------===// 5733 5734 OperandMatchResultTy 5735 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5736 5737 if (!trySkipId(Prefix, AsmToken::Colon)) 5738 return MatchOperand_NoMatch; 5739 5740 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5741 } 5742 5743 OperandMatchResultTy 5744 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5745 AMDGPUOperand::ImmTy ImmTy, 5746 bool (*ConvertResult)(int64_t&)) { 5747 SMLoc S = getLoc(); 5748 int64_t Value = 0; 5749 5750 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5751 if (Res != MatchOperand_Success) 5752 return Res; 5753 5754 if (ConvertResult && !ConvertResult(Value)) { 5755 Error(S, "invalid " + StringRef(Prefix) + " value."); 5756 } 5757 5758 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5759 return MatchOperand_Success; 5760 } 5761 5762 OperandMatchResultTy 5763 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5764 OperandVector &Operands, 5765 AMDGPUOperand::ImmTy ImmTy, 5766 bool (*ConvertResult)(int64_t&)) { 5767 SMLoc S = getLoc(); 5768 if (!trySkipId(Prefix, AsmToken::Colon)) 5769 return MatchOperand_NoMatch; 5770 5771 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5772 return MatchOperand_ParseFail; 5773 5774 unsigned Val = 0; 5775 const unsigned MaxSize = 4; 5776 5777 // FIXME: How to verify the number of elements matches the number of src 5778 // operands? 5779 for (int I = 0; ; ++I) { 5780 int64_t Op; 5781 SMLoc Loc = getLoc(); 5782 if (!parseExpr(Op)) 5783 return MatchOperand_ParseFail; 5784 5785 if (Op != 0 && Op != 1) { 5786 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5787 return MatchOperand_ParseFail; 5788 } 5789 5790 Val |= (Op << I); 5791 5792 if (trySkipToken(AsmToken::RBrac)) 5793 break; 5794 5795 if (I + 1 == MaxSize) { 5796 Error(getLoc(), "expected a closing square bracket"); 5797 return MatchOperand_ParseFail; 5798 } 5799 5800 if (!skipToken(AsmToken::Comma, "expected a comma")) 5801 return MatchOperand_ParseFail; 5802 } 5803 5804 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5805 return MatchOperand_Success; 5806 } 5807 5808 OperandMatchResultTy 5809 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5810 AMDGPUOperand::ImmTy ImmTy) { 5811 int64_t Bit; 5812 SMLoc S = getLoc(); 5813 5814 if (trySkipId(Name)) { 5815 Bit = 1; 5816 } else if (trySkipId("no", Name)) { 5817 Bit = 0; 5818 } else { 5819 return MatchOperand_NoMatch; 5820 } 5821 5822 if (Name == "r128" && !hasMIMG_R128()) { 5823 Error(S, "r128 modifier is not supported on this GPU"); 5824 return MatchOperand_ParseFail; 5825 } 5826 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5827 Error(S, "a16 modifier is not supported on this GPU"); 5828 return MatchOperand_ParseFail; 5829 } 5830 5831 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5832 ImmTy = AMDGPUOperand::ImmTyR128A16; 5833 5834 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5835 return MatchOperand_Success; 5836 } 5837 5838 OperandMatchResultTy 5839 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5840 unsigned CPolOn = 0; 5841 unsigned CPolOff = 0; 5842 SMLoc S = getLoc(); 5843 5844 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5845 if (isGFX940() && !Mnemo.startswith("s_")) { 5846 if (trySkipId("sc0")) 5847 CPolOn = AMDGPU::CPol::SC0; 5848 else if (trySkipId("nosc0")) 5849 CPolOff = AMDGPU::CPol::SC0; 5850 else if (trySkipId("nt")) 5851 CPolOn = AMDGPU::CPol::NT; 5852 else if (trySkipId("nont")) 5853 CPolOff = AMDGPU::CPol::NT; 5854 else if (trySkipId("sc1")) 5855 CPolOn = AMDGPU::CPol::SC1; 5856 else if (trySkipId("nosc1")) 5857 CPolOff = AMDGPU::CPol::SC1; 5858 else 5859 return MatchOperand_NoMatch; 5860 } 5861 else if (trySkipId("glc")) 5862 CPolOn = AMDGPU::CPol::GLC; 5863 else if (trySkipId("noglc")) 5864 CPolOff = AMDGPU::CPol::GLC; 5865 else if (trySkipId("slc")) 5866 CPolOn = AMDGPU::CPol::SLC; 5867 else if (trySkipId("noslc")) 5868 CPolOff = AMDGPU::CPol::SLC; 5869 else if (trySkipId("dlc")) 5870 CPolOn = AMDGPU::CPol::DLC; 5871 else if (trySkipId("nodlc")) 5872 CPolOff = AMDGPU::CPol::DLC; 5873 else if (trySkipId("scc")) 5874 CPolOn = AMDGPU::CPol::SCC; 5875 else if (trySkipId("noscc")) 5876 CPolOff = AMDGPU::CPol::SCC; 5877 else 5878 return MatchOperand_NoMatch; 5879 5880 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5881 Error(S, "dlc modifier is not supported on this GPU"); 5882 return MatchOperand_ParseFail; 5883 } 5884 5885 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5886 Error(S, "scc modifier is not supported on this GPU"); 5887 return MatchOperand_ParseFail; 5888 } 5889 5890 if (CPolSeen & (CPolOn | CPolOff)) { 5891 Error(S, "duplicate cache policy modifier"); 5892 return MatchOperand_ParseFail; 5893 } 5894 5895 CPolSeen |= (CPolOn | CPolOff); 5896 5897 for (unsigned I = 1; I != Operands.size(); ++I) { 5898 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5899 if (Op.isCPol()) { 5900 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5901 return MatchOperand_Success; 5902 } 5903 } 5904 5905 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5906 AMDGPUOperand::ImmTyCPol)); 5907 5908 return MatchOperand_Success; 5909 } 5910 5911 static void addOptionalImmOperand( 5912 MCInst& Inst, const OperandVector& Operands, 5913 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5914 AMDGPUOperand::ImmTy ImmT, 5915 int64_t Default = 0) { 5916 auto i = OptionalIdx.find(ImmT); 5917 if (i != OptionalIdx.end()) { 5918 unsigned Idx = i->second; 5919 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5920 } else { 5921 Inst.addOperand(MCOperand::createImm(Default)); 5922 } 5923 } 5924 5925 OperandMatchResultTy 5926 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5927 StringRef &Value, 5928 SMLoc &StringLoc) { 5929 if (!trySkipId(Prefix, AsmToken::Colon)) 5930 return MatchOperand_NoMatch; 5931 5932 StringLoc = getLoc(); 5933 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5934 : MatchOperand_ParseFail; 5935 } 5936 5937 //===----------------------------------------------------------------------===// 5938 // MTBUF format 5939 //===----------------------------------------------------------------------===// 5940 5941 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5942 int64_t MaxVal, 5943 int64_t &Fmt) { 5944 int64_t Val; 5945 SMLoc Loc = getLoc(); 5946 5947 auto Res = parseIntWithPrefix(Pref, Val); 5948 if (Res == MatchOperand_ParseFail) 5949 return false; 5950 if (Res == MatchOperand_NoMatch) 5951 return true; 5952 5953 if (Val < 0 || Val > MaxVal) { 5954 Error(Loc, Twine("out of range ", StringRef(Pref))); 5955 return false; 5956 } 5957 5958 Fmt = Val; 5959 return true; 5960 } 5961 5962 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5963 // values to live in a joint format operand in the MCInst encoding. 5964 OperandMatchResultTy 5965 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5966 using namespace llvm::AMDGPU::MTBUFFormat; 5967 5968 int64_t Dfmt = DFMT_UNDEF; 5969 int64_t Nfmt = NFMT_UNDEF; 5970 5971 // dfmt and nfmt can appear in either order, and each is optional. 5972 for (int I = 0; I < 2; ++I) { 5973 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5974 return MatchOperand_ParseFail; 5975 5976 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5977 return MatchOperand_ParseFail; 5978 } 5979 // Skip optional comma between dfmt/nfmt 5980 // but guard against 2 commas following each other. 5981 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5982 !peekToken().is(AsmToken::Comma)) { 5983 trySkipToken(AsmToken::Comma); 5984 } 5985 } 5986 5987 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5988 return MatchOperand_NoMatch; 5989 5990 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5991 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5992 5993 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5994 return MatchOperand_Success; 5995 } 5996 5997 OperandMatchResultTy 5998 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5999 using namespace llvm::AMDGPU::MTBUFFormat; 6000 6001 int64_t Fmt = UFMT_UNDEF; 6002 6003 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 6004 return MatchOperand_ParseFail; 6005 6006 if (Fmt == UFMT_UNDEF) 6007 return MatchOperand_NoMatch; 6008 6009 Format = Fmt; 6010 return MatchOperand_Success; 6011 } 6012 6013 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6014 int64_t &Nfmt, 6015 StringRef FormatStr, 6016 SMLoc Loc) { 6017 using namespace llvm::AMDGPU::MTBUFFormat; 6018 int64_t Format; 6019 6020 Format = getDfmt(FormatStr); 6021 if (Format != DFMT_UNDEF) { 6022 Dfmt = Format; 6023 return true; 6024 } 6025 6026 Format = getNfmt(FormatStr, getSTI()); 6027 if (Format != NFMT_UNDEF) { 6028 Nfmt = Format; 6029 return true; 6030 } 6031 6032 Error(Loc, "unsupported format"); 6033 return false; 6034 } 6035 6036 OperandMatchResultTy 6037 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6038 SMLoc FormatLoc, 6039 int64_t &Format) { 6040 using namespace llvm::AMDGPU::MTBUFFormat; 6041 6042 int64_t Dfmt = DFMT_UNDEF; 6043 int64_t Nfmt = NFMT_UNDEF; 6044 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6045 return MatchOperand_ParseFail; 6046 6047 if (trySkipToken(AsmToken::Comma)) { 6048 StringRef Str; 6049 SMLoc Loc = getLoc(); 6050 if (!parseId(Str, "expected a format string") || 6051 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 6052 return MatchOperand_ParseFail; 6053 } 6054 if (Dfmt == DFMT_UNDEF) { 6055 Error(Loc, "duplicate numeric format"); 6056 return MatchOperand_ParseFail; 6057 } else if (Nfmt == NFMT_UNDEF) { 6058 Error(Loc, "duplicate data format"); 6059 return MatchOperand_ParseFail; 6060 } 6061 } 6062 6063 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6064 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6065 6066 if (isGFX10Plus()) { 6067 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6068 if (Ufmt == UFMT_UNDEF) { 6069 Error(FormatLoc, "unsupported format"); 6070 return MatchOperand_ParseFail; 6071 } 6072 Format = Ufmt; 6073 } else { 6074 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6075 } 6076 6077 return MatchOperand_Success; 6078 } 6079 6080 OperandMatchResultTy 6081 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6082 SMLoc Loc, 6083 int64_t &Format) { 6084 using namespace llvm::AMDGPU::MTBUFFormat; 6085 6086 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6087 if (Id == UFMT_UNDEF) 6088 return MatchOperand_NoMatch; 6089 6090 if (!isGFX10Plus()) { 6091 Error(Loc, "unified format is not supported on this GPU"); 6092 return MatchOperand_ParseFail; 6093 } 6094 6095 Format = Id; 6096 return MatchOperand_Success; 6097 } 6098 6099 OperandMatchResultTy 6100 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6101 using namespace llvm::AMDGPU::MTBUFFormat; 6102 SMLoc Loc = getLoc(); 6103 6104 if (!parseExpr(Format)) 6105 return MatchOperand_ParseFail; 6106 if (!isValidFormatEncoding(Format, getSTI())) { 6107 Error(Loc, "out of range format"); 6108 return MatchOperand_ParseFail; 6109 } 6110 6111 return MatchOperand_Success; 6112 } 6113 6114 OperandMatchResultTy 6115 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6116 using namespace llvm::AMDGPU::MTBUFFormat; 6117 6118 if (!trySkipId("format", AsmToken::Colon)) 6119 return MatchOperand_NoMatch; 6120 6121 if (trySkipToken(AsmToken::LBrac)) { 6122 StringRef FormatStr; 6123 SMLoc Loc = getLoc(); 6124 if (!parseId(FormatStr, "expected a format string")) 6125 return MatchOperand_ParseFail; 6126 6127 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6128 if (Res == MatchOperand_NoMatch) 6129 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6130 if (Res != MatchOperand_Success) 6131 return Res; 6132 6133 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6134 return MatchOperand_ParseFail; 6135 6136 return MatchOperand_Success; 6137 } 6138 6139 return parseNumericFormat(Format); 6140 } 6141 6142 OperandMatchResultTy 6143 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6144 using namespace llvm::AMDGPU::MTBUFFormat; 6145 6146 int64_t Format = getDefaultFormatEncoding(getSTI()); 6147 OperandMatchResultTy Res; 6148 SMLoc Loc = getLoc(); 6149 6150 // Parse legacy format syntax. 6151 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6152 if (Res == MatchOperand_ParseFail) 6153 return Res; 6154 6155 bool FormatFound = (Res == MatchOperand_Success); 6156 6157 Operands.push_back( 6158 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6159 6160 if (FormatFound) 6161 trySkipToken(AsmToken::Comma); 6162 6163 if (isToken(AsmToken::EndOfStatement)) { 6164 // We are expecting an soffset operand, 6165 // but let matcher handle the error. 6166 return MatchOperand_Success; 6167 } 6168 6169 // Parse soffset. 6170 Res = parseRegOrImm(Operands); 6171 if (Res != MatchOperand_Success) 6172 return Res; 6173 6174 trySkipToken(AsmToken::Comma); 6175 6176 if (!FormatFound) { 6177 Res = parseSymbolicOrNumericFormat(Format); 6178 if (Res == MatchOperand_ParseFail) 6179 return Res; 6180 if (Res == MatchOperand_Success) { 6181 auto Size = Operands.size(); 6182 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6183 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6184 Op.setImm(Format); 6185 } 6186 return MatchOperand_Success; 6187 } 6188 6189 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6190 Error(getLoc(), "duplicate format"); 6191 return MatchOperand_ParseFail; 6192 } 6193 return MatchOperand_Success; 6194 } 6195 6196 //===----------------------------------------------------------------------===// 6197 // ds 6198 //===----------------------------------------------------------------------===// 6199 6200 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6201 const OperandVector &Operands) { 6202 OptionalImmIndexMap OptionalIdx; 6203 6204 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6205 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6206 6207 // Add the register arguments 6208 if (Op.isReg()) { 6209 Op.addRegOperands(Inst, 1); 6210 continue; 6211 } 6212 6213 // Handle optional arguments 6214 OptionalIdx[Op.getImmTy()] = i; 6215 } 6216 6217 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6218 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6219 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6220 6221 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6222 } 6223 6224 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6225 bool IsGdsHardcoded) { 6226 OptionalImmIndexMap OptionalIdx; 6227 6228 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6229 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6230 6231 // Add the register arguments 6232 if (Op.isReg()) { 6233 Op.addRegOperands(Inst, 1); 6234 continue; 6235 } 6236 6237 if (Op.isToken() && Op.getToken() == "gds") { 6238 IsGdsHardcoded = true; 6239 continue; 6240 } 6241 6242 // Handle optional arguments 6243 OptionalIdx[Op.getImmTy()] = i; 6244 } 6245 6246 AMDGPUOperand::ImmTy OffsetType = 6247 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6248 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6249 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6250 AMDGPUOperand::ImmTyOffset; 6251 6252 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6253 6254 if (!IsGdsHardcoded) { 6255 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6256 } 6257 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6258 } 6259 6260 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6261 OptionalImmIndexMap OptionalIdx; 6262 6263 unsigned OperandIdx[4]; 6264 unsigned EnMask = 0; 6265 int SrcIdx = 0; 6266 6267 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6268 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6269 6270 // Add the register arguments 6271 if (Op.isReg()) { 6272 assert(SrcIdx < 4); 6273 OperandIdx[SrcIdx] = Inst.size(); 6274 Op.addRegOperands(Inst, 1); 6275 ++SrcIdx; 6276 continue; 6277 } 6278 6279 if (Op.isOff()) { 6280 assert(SrcIdx < 4); 6281 OperandIdx[SrcIdx] = Inst.size(); 6282 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6283 ++SrcIdx; 6284 continue; 6285 } 6286 6287 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6288 Op.addImmOperands(Inst, 1); 6289 continue; 6290 } 6291 6292 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 6293 continue; 6294 6295 // Handle optional arguments 6296 OptionalIdx[Op.getImmTy()] = i; 6297 } 6298 6299 assert(SrcIdx == 4); 6300 6301 bool Compr = false; 6302 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6303 Compr = true; 6304 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6305 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6306 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6307 } 6308 6309 for (auto i = 0; i < SrcIdx; ++i) { 6310 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6311 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6312 } 6313 } 6314 6315 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6316 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6317 6318 Inst.addOperand(MCOperand::createImm(EnMask)); 6319 } 6320 6321 //===----------------------------------------------------------------------===// 6322 // s_waitcnt 6323 //===----------------------------------------------------------------------===// 6324 6325 static bool 6326 encodeCnt( 6327 const AMDGPU::IsaVersion ISA, 6328 int64_t &IntVal, 6329 int64_t CntVal, 6330 bool Saturate, 6331 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6332 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6333 { 6334 bool Failed = false; 6335 6336 IntVal = encode(ISA, IntVal, CntVal); 6337 if (CntVal != decode(ISA, IntVal)) { 6338 if (Saturate) { 6339 IntVal = encode(ISA, IntVal, -1); 6340 } else { 6341 Failed = true; 6342 } 6343 } 6344 return Failed; 6345 } 6346 6347 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6348 6349 SMLoc CntLoc = getLoc(); 6350 StringRef CntName = getTokenStr(); 6351 6352 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6353 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6354 return false; 6355 6356 int64_t CntVal; 6357 SMLoc ValLoc = getLoc(); 6358 if (!parseExpr(CntVal)) 6359 return false; 6360 6361 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6362 6363 bool Failed = true; 6364 bool Sat = CntName.endswith("_sat"); 6365 6366 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6367 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6368 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6369 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6370 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6371 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6372 } else { 6373 Error(CntLoc, "invalid counter name " + CntName); 6374 return false; 6375 } 6376 6377 if (Failed) { 6378 Error(ValLoc, "too large value for " + CntName); 6379 return false; 6380 } 6381 6382 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6383 return false; 6384 6385 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6386 if (isToken(AsmToken::EndOfStatement)) { 6387 Error(getLoc(), "expected a counter name"); 6388 return false; 6389 } 6390 } 6391 6392 return true; 6393 } 6394 6395 OperandMatchResultTy 6396 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6397 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6398 int64_t Waitcnt = getWaitcntBitMask(ISA); 6399 SMLoc S = getLoc(); 6400 6401 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6402 while (!isToken(AsmToken::EndOfStatement)) { 6403 if (!parseCnt(Waitcnt)) 6404 return MatchOperand_ParseFail; 6405 } 6406 } else { 6407 if (!parseExpr(Waitcnt)) 6408 return MatchOperand_ParseFail; 6409 } 6410 6411 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6412 return MatchOperand_Success; 6413 } 6414 6415 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6416 SMLoc FieldLoc = getLoc(); 6417 StringRef FieldName = getTokenStr(); 6418 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6419 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6420 return false; 6421 6422 SMLoc ValueLoc = getLoc(); 6423 StringRef ValueName = getTokenStr(); 6424 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6425 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6426 return false; 6427 6428 unsigned Shift; 6429 if (FieldName == "instid0") { 6430 Shift = 0; 6431 } else if (FieldName == "instskip") { 6432 Shift = 4; 6433 } else if (FieldName == "instid1") { 6434 Shift = 7; 6435 } else { 6436 Error(FieldLoc, "invalid field name " + FieldName); 6437 return false; 6438 } 6439 6440 int Value; 6441 if (Shift == 4) { 6442 // Parse values for instskip. 6443 Value = StringSwitch<int>(ValueName) 6444 .Case("SAME", 0) 6445 .Case("NEXT", 1) 6446 .Case("SKIP_1", 2) 6447 .Case("SKIP_2", 3) 6448 .Case("SKIP_3", 4) 6449 .Case("SKIP_4", 5) 6450 .Default(-1); 6451 } else { 6452 // Parse values for instid0 and instid1. 6453 Value = StringSwitch<int>(ValueName) 6454 .Case("NO_DEP", 0) 6455 .Case("VALU_DEP_1", 1) 6456 .Case("VALU_DEP_2", 2) 6457 .Case("VALU_DEP_3", 3) 6458 .Case("VALU_DEP_4", 4) 6459 .Case("TRANS32_DEP_1", 5) 6460 .Case("TRANS32_DEP_2", 6) 6461 .Case("TRANS32_DEP_3", 7) 6462 .Case("FMA_ACCUM_CYCLE_1", 8) 6463 .Case("SALU_CYCLE_1", 9) 6464 .Case("SALU_CYCLE_2", 10) 6465 .Case("SALU_CYCLE_3", 11) 6466 .Default(-1); 6467 } 6468 if (Value < 0) { 6469 Error(ValueLoc, "invalid value name " + ValueName); 6470 return false; 6471 } 6472 6473 Delay |= Value << Shift; 6474 return true; 6475 } 6476 6477 OperandMatchResultTy 6478 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) { 6479 int64_t Delay = 0; 6480 SMLoc S = getLoc(); 6481 6482 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6483 do { 6484 if (!parseDelay(Delay)) 6485 return MatchOperand_ParseFail; 6486 } while (trySkipToken(AsmToken::Pipe)); 6487 } else { 6488 if (!parseExpr(Delay)) 6489 return MatchOperand_ParseFail; 6490 } 6491 6492 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6493 return MatchOperand_Success; 6494 } 6495 6496 bool 6497 AMDGPUOperand::isSWaitCnt() const { 6498 return isImm(); 6499 } 6500 6501 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); } 6502 6503 //===----------------------------------------------------------------------===// 6504 // DepCtr 6505 //===----------------------------------------------------------------------===// 6506 6507 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6508 StringRef DepCtrName) { 6509 switch (ErrorId) { 6510 case OPR_ID_UNKNOWN: 6511 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6512 return; 6513 case OPR_ID_UNSUPPORTED: 6514 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6515 return; 6516 case OPR_ID_DUPLICATE: 6517 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6518 return; 6519 case OPR_VAL_INVALID: 6520 Error(Loc, Twine("invalid value for ", DepCtrName)); 6521 return; 6522 default: 6523 assert(false); 6524 } 6525 } 6526 6527 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6528 6529 using namespace llvm::AMDGPU::DepCtr; 6530 6531 SMLoc DepCtrLoc = getLoc(); 6532 StringRef DepCtrName = getTokenStr(); 6533 6534 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6535 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6536 return false; 6537 6538 int64_t ExprVal; 6539 if (!parseExpr(ExprVal)) 6540 return false; 6541 6542 unsigned PrevOprMask = UsedOprMask; 6543 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6544 6545 if (CntVal < 0) { 6546 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6547 return false; 6548 } 6549 6550 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6551 return false; 6552 6553 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6554 if (isToken(AsmToken::EndOfStatement)) { 6555 Error(getLoc(), "expected a counter name"); 6556 return false; 6557 } 6558 } 6559 6560 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6561 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6562 return true; 6563 } 6564 6565 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6566 using namespace llvm::AMDGPU::DepCtr; 6567 6568 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6569 SMLoc Loc = getLoc(); 6570 6571 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6572 unsigned UsedOprMask = 0; 6573 while (!isToken(AsmToken::EndOfStatement)) { 6574 if (!parseDepCtr(DepCtr, UsedOprMask)) 6575 return MatchOperand_ParseFail; 6576 } 6577 } else { 6578 if (!parseExpr(DepCtr)) 6579 return MatchOperand_ParseFail; 6580 } 6581 6582 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6583 return MatchOperand_Success; 6584 } 6585 6586 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6587 6588 //===----------------------------------------------------------------------===// 6589 // hwreg 6590 //===----------------------------------------------------------------------===// 6591 6592 bool 6593 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6594 OperandInfoTy &Offset, 6595 OperandInfoTy &Width) { 6596 using namespace llvm::AMDGPU::Hwreg; 6597 6598 // The register may be specified by name or using a numeric code 6599 HwReg.Loc = getLoc(); 6600 if (isToken(AsmToken::Identifier) && 6601 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6602 HwReg.IsSymbolic = true; 6603 lex(); // skip register name 6604 } else if (!parseExpr(HwReg.Id, "a register name")) { 6605 return false; 6606 } 6607 6608 if (trySkipToken(AsmToken::RParen)) 6609 return true; 6610 6611 // parse optional params 6612 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6613 return false; 6614 6615 Offset.Loc = getLoc(); 6616 if (!parseExpr(Offset.Id)) 6617 return false; 6618 6619 if (!skipToken(AsmToken::Comma, "expected a comma")) 6620 return false; 6621 6622 Width.Loc = getLoc(); 6623 return parseExpr(Width.Id) && 6624 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6625 } 6626 6627 bool 6628 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6629 const OperandInfoTy &Offset, 6630 const OperandInfoTy &Width) { 6631 6632 using namespace llvm::AMDGPU::Hwreg; 6633 6634 if (HwReg.IsSymbolic) { 6635 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6636 Error(HwReg.Loc, 6637 "specified hardware register is not supported on this GPU"); 6638 return false; 6639 } 6640 } else { 6641 if (!isValidHwreg(HwReg.Id)) { 6642 Error(HwReg.Loc, 6643 "invalid code of hardware register: only 6-bit values are legal"); 6644 return false; 6645 } 6646 } 6647 if (!isValidHwregOffset(Offset.Id)) { 6648 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6649 return false; 6650 } 6651 if (!isValidHwregWidth(Width.Id)) { 6652 Error(Width.Loc, 6653 "invalid bitfield width: only values from 1 to 32 are legal"); 6654 return false; 6655 } 6656 return true; 6657 } 6658 6659 OperandMatchResultTy 6660 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6661 using namespace llvm::AMDGPU::Hwreg; 6662 6663 int64_t ImmVal = 0; 6664 SMLoc Loc = getLoc(); 6665 6666 if (trySkipId("hwreg", AsmToken::LParen)) { 6667 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6668 OperandInfoTy Offset(OFFSET_DEFAULT_); 6669 OperandInfoTy Width(WIDTH_DEFAULT_); 6670 if (parseHwregBody(HwReg, Offset, Width) && 6671 validateHwreg(HwReg, Offset, Width)) { 6672 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6673 } else { 6674 return MatchOperand_ParseFail; 6675 } 6676 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6677 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6678 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6679 return MatchOperand_ParseFail; 6680 } 6681 } else { 6682 return MatchOperand_ParseFail; 6683 } 6684 6685 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6686 return MatchOperand_Success; 6687 } 6688 6689 bool AMDGPUOperand::isHwreg() const { 6690 return isImmTy(ImmTyHwreg); 6691 } 6692 6693 //===----------------------------------------------------------------------===// 6694 // sendmsg 6695 //===----------------------------------------------------------------------===// 6696 6697 bool 6698 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6699 OperandInfoTy &Op, 6700 OperandInfoTy &Stream) { 6701 using namespace llvm::AMDGPU::SendMsg; 6702 6703 Msg.Loc = getLoc(); 6704 if (isToken(AsmToken::Identifier) && 6705 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6706 Msg.IsSymbolic = true; 6707 lex(); // skip message name 6708 } else if (!parseExpr(Msg.Id, "a message name")) { 6709 return false; 6710 } 6711 6712 if (trySkipToken(AsmToken::Comma)) { 6713 Op.IsDefined = true; 6714 Op.Loc = getLoc(); 6715 if (isToken(AsmToken::Identifier) && 6716 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6717 lex(); // skip operation name 6718 } else if (!parseExpr(Op.Id, "an operation name")) { 6719 return false; 6720 } 6721 6722 if (trySkipToken(AsmToken::Comma)) { 6723 Stream.IsDefined = true; 6724 Stream.Loc = getLoc(); 6725 if (!parseExpr(Stream.Id)) 6726 return false; 6727 } 6728 } 6729 6730 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6731 } 6732 6733 bool 6734 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6735 const OperandInfoTy &Op, 6736 const OperandInfoTy &Stream) { 6737 using namespace llvm::AMDGPU::SendMsg; 6738 6739 // Validation strictness depends on whether message is specified 6740 // in a symbolic or in a numeric form. In the latter case 6741 // only encoding possibility is checked. 6742 bool Strict = Msg.IsSymbolic; 6743 6744 if (Strict) { 6745 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6746 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6747 return false; 6748 } 6749 } else { 6750 if (!isValidMsgId(Msg.Id, getSTI())) { 6751 Error(Msg.Loc, "invalid message id"); 6752 return false; 6753 } 6754 } 6755 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 6756 if (Op.IsDefined) { 6757 Error(Op.Loc, "message does not support operations"); 6758 } else { 6759 Error(Msg.Loc, "missing message operation"); 6760 } 6761 return false; 6762 } 6763 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6764 Error(Op.Loc, "invalid operation id"); 6765 return false; 6766 } 6767 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 6768 Stream.IsDefined) { 6769 Error(Stream.Loc, "message operation does not support streams"); 6770 return false; 6771 } 6772 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6773 Error(Stream.Loc, "invalid message stream id"); 6774 return false; 6775 } 6776 return true; 6777 } 6778 6779 OperandMatchResultTy 6780 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6781 using namespace llvm::AMDGPU::SendMsg; 6782 6783 int64_t ImmVal = 0; 6784 SMLoc Loc = getLoc(); 6785 6786 if (trySkipId("sendmsg", AsmToken::LParen)) { 6787 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6788 OperandInfoTy Op(OP_NONE_); 6789 OperandInfoTy Stream(STREAM_ID_NONE_); 6790 if (parseSendMsgBody(Msg, Op, Stream) && 6791 validateSendMsg(Msg, Op, Stream)) { 6792 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6793 } else { 6794 return MatchOperand_ParseFail; 6795 } 6796 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6797 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6798 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6799 return MatchOperand_ParseFail; 6800 } 6801 } else { 6802 return MatchOperand_ParseFail; 6803 } 6804 6805 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6806 return MatchOperand_Success; 6807 } 6808 6809 bool AMDGPUOperand::isSendMsg() const { 6810 return isImmTy(ImmTySendMsg); 6811 } 6812 6813 //===----------------------------------------------------------------------===// 6814 // v_interp 6815 //===----------------------------------------------------------------------===// 6816 6817 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6818 StringRef Str; 6819 SMLoc S = getLoc(); 6820 6821 if (!parseId(Str)) 6822 return MatchOperand_NoMatch; 6823 6824 int Slot = StringSwitch<int>(Str) 6825 .Case("p10", 0) 6826 .Case("p20", 1) 6827 .Case("p0", 2) 6828 .Default(-1); 6829 6830 if (Slot == -1) { 6831 Error(S, "invalid interpolation slot"); 6832 return MatchOperand_ParseFail; 6833 } 6834 6835 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6836 AMDGPUOperand::ImmTyInterpSlot)); 6837 return MatchOperand_Success; 6838 } 6839 6840 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6841 StringRef Str; 6842 SMLoc S = getLoc(); 6843 6844 if (!parseId(Str)) 6845 return MatchOperand_NoMatch; 6846 6847 if (!Str.startswith("attr")) { 6848 Error(S, "invalid interpolation attribute"); 6849 return MatchOperand_ParseFail; 6850 } 6851 6852 StringRef Chan = Str.take_back(2); 6853 int AttrChan = StringSwitch<int>(Chan) 6854 .Case(".x", 0) 6855 .Case(".y", 1) 6856 .Case(".z", 2) 6857 .Case(".w", 3) 6858 .Default(-1); 6859 if (AttrChan == -1) { 6860 Error(S, "invalid or missing interpolation attribute channel"); 6861 return MatchOperand_ParseFail; 6862 } 6863 6864 Str = Str.drop_back(2).drop_front(4); 6865 6866 uint8_t Attr; 6867 if (Str.getAsInteger(10, Attr)) { 6868 Error(S, "invalid or missing interpolation attribute number"); 6869 return MatchOperand_ParseFail; 6870 } 6871 6872 if (Attr > 63) { 6873 Error(S, "out of bounds interpolation attribute number"); 6874 return MatchOperand_ParseFail; 6875 } 6876 6877 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6878 6879 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6880 AMDGPUOperand::ImmTyInterpAttr)); 6881 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6882 AMDGPUOperand::ImmTyAttrChan)); 6883 return MatchOperand_Success; 6884 } 6885 6886 //===----------------------------------------------------------------------===// 6887 // exp 6888 //===----------------------------------------------------------------------===// 6889 6890 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6891 using namespace llvm::AMDGPU::Exp; 6892 6893 StringRef Str; 6894 SMLoc S = getLoc(); 6895 6896 if (!parseId(Str)) 6897 return MatchOperand_NoMatch; 6898 6899 unsigned Id = getTgtId(Str); 6900 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6901 Error(S, (Id == ET_INVALID) ? 6902 "invalid exp target" : 6903 "exp target is not supported on this GPU"); 6904 return MatchOperand_ParseFail; 6905 } 6906 6907 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6908 AMDGPUOperand::ImmTyExpTgt)); 6909 return MatchOperand_Success; 6910 } 6911 6912 //===----------------------------------------------------------------------===// 6913 // parser helpers 6914 //===----------------------------------------------------------------------===// 6915 6916 bool 6917 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6918 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6919 } 6920 6921 bool 6922 AMDGPUAsmParser::isId(const StringRef Id) const { 6923 return isId(getToken(), Id); 6924 } 6925 6926 bool 6927 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6928 return getTokenKind() == Kind; 6929 } 6930 6931 bool 6932 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6933 if (isId(Id)) { 6934 lex(); 6935 return true; 6936 } 6937 return false; 6938 } 6939 6940 bool 6941 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6942 if (isToken(AsmToken::Identifier)) { 6943 StringRef Tok = getTokenStr(); 6944 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6945 lex(); 6946 return true; 6947 } 6948 } 6949 return false; 6950 } 6951 6952 bool 6953 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6954 if (isId(Id) && peekToken().is(Kind)) { 6955 lex(); 6956 lex(); 6957 return true; 6958 } 6959 return false; 6960 } 6961 6962 bool 6963 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6964 if (isToken(Kind)) { 6965 lex(); 6966 return true; 6967 } 6968 return false; 6969 } 6970 6971 bool 6972 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6973 const StringRef ErrMsg) { 6974 if (!trySkipToken(Kind)) { 6975 Error(getLoc(), ErrMsg); 6976 return false; 6977 } 6978 return true; 6979 } 6980 6981 bool 6982 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6983 SMLoc S = getLoc(); 6984 6985 const MCExpr *Expr; 6986 if (Parser.parseExpression(Expr)) 6987 return false; 6988 6989 if (Expr->evaluateAsAbsolute(Imm)) 6990 return true; 6991 6992 if (Expected.empty()) { 6993 Error(S, "expected absolute expression"); 6994 } else { 6995 Error(S, Twine("expected ", Expected) + 6996 Twine(" or an absolute expression")); 6997 } 6998 return false; 6999 } 7000 7001 bool 7002 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 7003 SMLoc S = getLoc(); 7004 7005 const MCExpr *Expr; 7006 if (Parser.parseExpression(Expr)) 7007 return false; 7008 7009 int64_t IntVal; 7010 if (Expr->evaluateAsAbsolute(IntVal)) { 7011 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7012 } else { 7013 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7014 } 7015 return true; 7016 } 7017 7018 bool 7019 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7020 if (isToken(AsmToken::String)) { 7021 Val = getToken().getStringContents(); 7022 lex(); 7023 return true; 7024 } else { 7025 Error(getLoc(), ErrMsg); 7026 return false; 7027 } 7028 } 7029 7030 bool 7031 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7032 if (isToken(AsmToken::Identifier)) { 7033 Val = getTokenStr(); 7034 lex(); 7035 return true; 7036 } else { 7037 if (!ErrMsg.empty()) 7038 Error(getLoc(), ErrMsg); 7039 return false; 7040 } 7041 } 7042 7043 AsmToken 7044 AMDGPUAsmParser::getToken() const { 7045 return Parser.getTok(); 7046 } 7047 7048 AsmToken 7049 AMDGPUAsmParser::peekToken() { 7050 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 7051 } 7052 7053 void 7054 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7055 auto TokCount = getLexer().peekTokens(Tokens); 7056 7057 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7058 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7059 } 7060 7061 AsmToken::TokenKind 7062 AMDGPUAsmParser::getTokenKind() const { 7063 return getLexer().getKind(); 7064 } 7065 7066 SMLoc 7067 AMDGPUAsmParser::getLoc() const { 7068 return getToken().getLoc(); 7069 } 7070 7071 StringRef 7072 AMDGPUAsmParser::getTokenStr() const { 7073 return getToken().getString(); 7074 } 7075 7076 void 7077 AMDGPUAsmParser::lex() { 7078 Parser.Lex(); 7079 } 7080 7081 SMLoc 7082 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7083 const OperandVector &Operands) const { 7084 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7085 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7086 if (Test(Op)) 7087 return Op.getStartLoc(); 7088 } 7089 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7090 } 7091 7092 SMLoc 7093 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7094 const OperandVector &Operands) const { 7095 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7096 return getOperandLoc(Test, Operands); 7097 } 7098 7099 SMLoc 7100 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7101 const OperandVector &Operands) const { 7102 auto Test = [=](const AMDGPUOperand& Op) { 7103 return Op.isRegKind() && Op.getReg() == Reg; 7104 }; 7105 return getOperandLoc(Test, Operands); 7106 } 7107 7108 SMLoc 7109 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 7110 auto Test = [](const AMDGPUOperand& Op) { 7111 return Op.IsImmKindLiteral() || Op.isExpr(); 7112 }; 7113 return getOperandLoc(Test, Operands); 7114 } 7115 7116 SMLoc 7117 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7118 auto Test = [](const AMDGPUOperand& Op) { 7119 return Op.isImmKindConst(); 7120 }; 7121 return getOperandLoc(Test, Operands); 7122 } 7123 7124 //===----------------------------------------------------------------------===// 7125 // swizzle 7126 //===----------------------------------------------------------------------===// 7127 7128 LLVM_READNONE 7129 static unsigned 7130 encodeBitmaskPerm(const unsigned AndMask, 7131 const unsigned OrMask, 7132 const unsigned XorMask) { 7133 using namespace llvm::AMDGPU::Swizzle; 7134 7135 return BITMASK_PERM_ENC | 7136 (AndMask << BITMASK_AND_SHIFT) | 7137 (OrMask << BITMASK_OR_SHIFT) | 7138 (XorMask << BITMASK_XOR_SHIFT); 7139 } 7140 7141 bool 7142 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7143 const unsigned MinVal, 7144 const unsigned MaxVal, 7145 const StringRef ErrMsg, 7146 SMLoc &Loc) { 7147 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7148 return false; 7149 } 7150 Loc = getLoc(); 7151 if (!parseExpr(Op)) { 7152 return false; 7153 } 7154 if (Op < MinVal || Op > MaxVal) { 7155 Error(Loc, ErrMsg); 7156 return false; 7157 } 7158 7159 return true; 7160 } 7161 7162 bool 7163 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7164 const unsigned MinVal, 7165 const unsigned MaxVal, 7166 const StringRef ErrMsg) { 7167 SMLoc Loc; 7168 for (unsigned i = 0; i < OpNum; ++i) { 7169 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7170 return false; 7171 } 7172 7173 return true; 7174 } 7175 7176 bool 7177 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7178 using namespace llvm::AMDGPU::Swizzle; 7179 7180 int64_t Lane[LANE_NUM]; 7181 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7182 "expected a 2-bit lane id")) { 7183 Imm = QUAD_PERM_ENC; 7184 for (unsigned I = 0; I < LANE_NUM; ++I) { 7185 Imm |= Lane[I] << (LANE_SHIFT * I); 7186 } 7187 return true; 7188 } 7189 return false; 7190 } 7191 7192 bool 7193 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7194 using namespace llvm::AMDGPU::Swizzle; 7195 7196 SMLoc Loc; 7197 int64_t GroupSize; 7198 int64_t LaneIdx; 7199 7200 if (!parseSwizzleOperand(GroupSize, 7201 2, 32, 7202 "group size must be in the interval [2,32]", 7203 Loc)) { 7204 return false; 7205 } 7206 if (!isPowerOf2_64(GroupSize)) { 7207 Error(Loc, "group size must be a power of two"); 7208 return false; 7209 } 7210 if (parseSwizzleOperand(LaneIdx, 7211 0, GroupSize - 1, 7212 "lane id must be in the interval [0,group size - 1]", 7213 Loc)) { 7214 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7215 return true; 7216 } 7217 return false; 7218 } 7219 7220 bool 7221 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7222 using namespace llvm::AMDGPU::Swizzle; 7223 7224 SMLoc Loc; 7225 int64_t GroupSize; 7226 7227 if (!parseSwizzleOperand(GroupSize, 7228 2, 32, 7229 "group size must be in the interval [2,32]", 7230 Loc)) { 7231 return false; 7232 } 7233 if (!isPowerOf2_64(GroupSize)) { 7234 Error(Loc, "group size must be a power of two"); 7235 return false; 7236 } 7237 7238 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7239 return true; 7240 } 7241 7242 bool 7243 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7244 using namespace llvm::AMDGPU::Swizzle; 7245 7246 SMLoc Loc; 7247 int64_t GroupSize; 7248 7249 if (!parseSwizzleOperand(GroupSize, 7250 1, 16, 7251 "group size must be in the interval [1,16]", 7252 Loc)) { 7253 return false; 7254 } 7255 if (!isPowerOf2_64(GroupSize)) { 7256 Error(Loc, "group size must be a power of two"); 7257 return false; 7258 } 7259 7260 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7261 return true; 7262 } 7263 7264 bool 7265 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7266 using namespace llvm::AMDGPU::Swizzle; 7267 7268 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7269 return false; 7270 } 7271 7272 StringRef Ctl; 7273 SMLoc StrLoc = getLoc(); 7274 if (!parseString(Ctl)) { 7275 return false; 7276 } 7277 if (Ctl.size() != BITMASK_WIDTH) { 7278 Error(StrLoc, "expected a 5-character mask"); 7279 return false; 7280 } 7281 7282 unsigned AndMask = 0; 7283 unsigned OrMask = 0; 7284 unsigned XorMask = 0; 7285 7286 for (size_t i = 0; i < Ctl.size(); ++i) { 7287 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7288 switch(Ctl[i]) { 7289 default: 7290 Error(StrLoc, "invalid mask"); 7291 return false; 7292 case '0': 7293 break; 7294 case '1': 7295 OrMask |= Mask; 7296 break; 7297 case 'p': 7298 AndMask |= Mask; 7299 break; 7300 case 'i': 7301 AndMask |= Mask; 7302 XorMask |= Mask; 7303 break; 7304 } 7305 } 7306 7307 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7308 return true; 7309 } 7310 7311 bool 7312 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7313 7314 SMLoc OffsetLoc = getLoc(); 7315 7316 if (!parseExpr(Imm, "a swizzle macro")) { 7317 return false; 7318 } 7319 if (!isUInt<16>(Imm)) { 7320 Error(OffsetLoc, "expected a 16-bit offset"); 7321 return false; 7322 } 7323 return true; 7324 } 7325 7326 bool 7327 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7328 using namespace llvm::AMDGPU::Swizzle; 7329 7330 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7331 7332 SMLoc ModeLoc = getLoc(); 7333 bool Ok = false; 7334 7335 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7336 Ok = parseSwizzleQuadPerm(Imm); 7337 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7338 Ok = parseSwizzleBitmaskPerm(Imm); 7339 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7340 Ok = parseSwizzleBroadcast(Imm); 7341 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7342 Ok = parseSwizzleSwap(Imm); 7343 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7344 Ok = parseSwizzleReverse(Imm); 7345 } else { 7346 Error(ModeLoc, "expected a swizzle mode"); 7347 } 7348 7349 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7350 } 7351 7352 return false; 7353 } 7354 7355 OperandMatchResultTy 7356 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7357 SMLoc S = getLoc(); 7358 int64_t Imm = 0; 7359 7360 if (trySkipId("offset")) { 7361 7362 bool Ok = false; 7363 if (skipToken(AsmToken::Colon, "expected a colon")) { 7364 if (trySkipId("swizzle")) { 7365 Ok = parseSwizzleMacro(Imm); 7366 } else { 7367 Ok = parseSwizzleOffset(Imm); 7368 } 7369 } 7370 7371 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7372 7373 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7374 } else { 7375 // Swizzle "offset" operand is optional. 7376 // If it is omitted, try parsing other optional operands. 7377 return parseOptionalOpr(Operands); 7378 } 7379 } 7380 7381 bool 7382 AMDGPUOperand::isSwizzle() const { 7383 return isImmTy(ImmTySwizzle); 7384 } 7385 7386 //===----------------------------------------------------------------------===// 7387 // VGPR Index Mode 7388 //===----------------------------------------------------------------------===// 7389 7390 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7391 7392 using namespace llvm::AMDGPU::VGPRIndexMode; 7393 7394 if (trySkipToken(AsmToken::RParen)) { 7395 return OFF; 7396 } 7397 7398 int64_t Imm = 0; 7399 7400 while (true) { 7401 unsigned Mode = 0; 7402 SMLoc S = getLoc(); 7403 7404 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7405 if (trySkipId(IdSymbolic[ModeId])) { 7406 Mode = 1 << ModeId; 7407 break; 7408 } 7409 } 7410 7411 if (Mode == 0) { 7412 Error(S, (Imm == 0)? 7413 "expected a VGPR index mode or a closing parenthesis" : 7414 "expected a VGPR index mode"); 7415 return UNDEF; 7416 } 7417 7418 if (Imm & Mode) { 7419 Error(S, "duplicate VGPR index mode"); 7420 return UNDEF; 7421 } 7422 Imm |= Mode; 7423 7424 if (trySkipToken(AsmToken::RParen)) 7425 break; 7426 if (!skipToken(AsmToken::Comma, 7427 "expected a comma or a closing parenthesis")) 7428 return UNDEF; 7429 } 7430 7431 return Imm; 7432 } 7433 7434 OperandMatchResultTy 7435 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7436 7437 using namespace llvm::AMDGPU::VGPRIndexMode; 7438 7439 int64_t Imm = 0; 7440 SMLoc S = getLoc(); 7441 7442 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7443 Imm = parseGPRIdxMacro(); 7444 if (Imm == UNDEF) 7445 return MatchOperand_ParseFail; 7446 } else { 7447 if (getParser().parseAbsoluteExpression(Imm)) 7448 return MatchOperand_ParseFail; 7449 if (Imm < 0 || !isUInt<4>(Imm)) { 7450 Error(S, "invalid immediate: only 4-bit values are legal"); 7451 return MatchOperand_ParseFail; 7452 } 7453 } 7454 7455 Operands.push_back( 7456 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7457 return MatchOperand_Success; 7458 } 7459 7460 bool AMDGPUOperand::isGPRIdxMode() const { 7461 return isImmTy(ImmTyGprIdxMode); 7462 } 7463 7464 //===----------------------------------------------------------------------===// 7465 // sopp branch targets 7466 //===----------------------------------------------------------------------===// 7467 7468 OperandMatchResultTy 7469 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7470 7471 // Make sure we are not parsing something 7472 // that looks like a label or an expression but is not. 7473 // This will improve error messages. 7474 if (isRegister() || isModifier()) 7475 return MatchOperand_NoMatch; 7476 7477 if (!parseExpr(Operands)) 7478 return MatchOperand_ParseFail; 7479 7480 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7481 assert(Opr.isImm() || Opr.isExpr()); 7482 SMLoc Loc = Opr.getStartLoc(); 7483 7484 // Currently we do not support arbitrary expressions as branch targets. 7485 // Only labels and absolute expressions are accepted. 7486 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7487 Error(Loc, "expected an absolute expression or a label"); 7488 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7489 Error(Loc, "expected a 16-bit signed jump offset"); 7490 } 7491 7492 return MatchOperand_Success; 7493 } 7494 7495 //===----------------------------------------------------------------------===// 7496 // Boolean holding registers 7497 //===----------------------------------------------------------------------===// 7498 7499 OperandMatchResultTy 7500 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7501 return parseReg(Operands); 7502 } 7503 7504 //===----------------------------------------------------------------------===// 7505 // mubuf 7506 //===----------------------------------------------------------------------===// 7507 7508 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7509 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7510 } 7511 7512 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7513 const OperandVector &Operands, 7514 bool IsAtomic, 7515 bool IsLds) { 7516 OptionalImmIndexMap OptionalIdx; 7517 unsigned FirstOperandIdx = 1; 7518 bool IsAtomicReturn = false; 7519 7520 if (IsAtomic) { 7521 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7522 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7523 if (!Op.isCPol()) 7524 continue; 7525 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7526 break; 7527 } 7528 7529 if (!IsAtomicReturn) { 7530 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7531 if (NewOpc != -1) 7532 Inst.setOpcode(NewOpc); 7533 } 7534 7535 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7536 SIInstrFlags::IsAtomicRet; 7537 } 7538 7539 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7540 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7541 7542 // Add the register arguments 7543 if (Op.isReg()) { 7544 Op.addRegOperands(Inst, 1); 7545 // Insert a tied src for atomic return dst. 7546 // This cannot be postponed as subsequent calls to 7547 // addImmOperands rely on correct number of MC operands. 7548 if (IsAtomicReturn && i == FirstOperandIdx) 7549 Op.addRegOperands(Inst, 1); 7550 continue; 7551 } 7552 7553 // Handle the case where soffset is an immediate 7554 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7555 Op.addImmOperands(Inst, 1); 7556 continue; 7557 } 7558 7559 // Handle tokens like 'offen' which are sometimes hard-coded into the 7560 // asm string. There are no MCInst operands for these. 7561 if (Op.isToken()) { 7562 continue; 7563 } 7564 assert(Op.isImm()); 7565 7566 // Handle optional arguments 7567 OptionalIdx[Op.getImmTy()] = i; 7568 } 7569 7570 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7571 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7572 7573 if (!IsLds) { // tfe is not legal with lds opcodes 7574 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7575 } 7576 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7577 } 7578 7579 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7580 OptionalImmIndexMap OptionalIdx; 7581 7582 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7583 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7584 7585 // Add the register arguments 7586 if (Op.isReg()) { 7587 Op.addRegOperands(Inst, 1); 7588 continue; 7589 } 7590 7591 // Handle the case where soffset is an immediate 7592 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7593 Op.addImmOperands(Inst, 1); 7594 continue; 7595 } 7596 7597 // Handle tokens like 'offen' which are sometimes hard-coded into the 7598 // asm string. There are no MCInst operands for these. 7599 if (Op.isToken()) { 7600 continue; 7601 } 7602 assert(Op.isImm()); 7603 7604 // Handle optional arguments 7605 OptionalIdx[Op.getImmTy()] = i; 7606 } 7607 7608 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7609 AMDGPUOperand::ImmTyOffset); 7610 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7611 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7612 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7613 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7614 } 7615 7616 //===----------------------------------------------------------------------===// 7617 // mimg 7618 //===----------------------------------------------------------------------===// 7619 7620 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7621 bool IsAtomic) { 7622 unsigned I = 1; 7623 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7624 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7625 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7626 } 7627 7628 if (IsAtomic) { 7629 // Add src, same as dst 7630 assert(Desc.getNumDefs() == 1); 7631 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7632 } 7633 7634 OptionalImmIndexMap OptionalIdx; 7635 7636 for (unsigned E = Operands.size(); I != E; ++I) { 7637 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7638 7639 // Add the register arguments 7640 if (Op.isReg()) { 7641 Op.addRegOperands(Inst, 1); 7642 } else if (Op.isImmModifier()) { 7643 OptionalIdx[Op.getImmTy()] = I; 7644 } else if (!Op.isToken()) { 7645 llvm_unreachable("unexpected operand type"); 7646 } 7647 } 7648 7649 bool IsGFX10Plus = isGFX10Plus(); 7650 7651 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7652 if (IsGFX10Plus) 7653 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7654 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7655 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7656 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7657 if (IsGFX10Plus) 7658 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7659 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7660 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7661 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7662 if (!IsGFX10Plus) 7663 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7664 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7665 } 7666 7667 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7668 cvtMIMG(Inst, Operands, true); 7669 } 7670 7671 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7672 OptionalImmIndexMap OptionalIdx; 7673 bool IsAtomicReturn = false; 7674 7675 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7676 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7677 if (!Op.isCPol()) 7678 continue; 7679 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7680 break; 7681 } 7682 7683 if (!IsAtomicReturn) { 7684 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7685 if (NewOpc != -1) 7686 Inst.setOpcode(NewOpc); 7687 } 7688 7689 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7690 SIInstrFlags::IsAtomicRet; 7691 7692 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7693 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7694 7695 // Add the register arguments 7696 if (Op.isReg()) { 7697 Op.addRegOperands(Inst, 1); 7698 if (IsAtomicReturn && i == 1) 7699 Op.addRegOperands(Inst, 1); 7700 continue; 7701 } 7702 7703 // Handle the case where soffset is an immediate 7704 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7705 Op.addImmOperands(Inst, 1); 7706 continue; 7707 } 7708 7709 // Handle tokens like 'offen' which are sometimes hard-coded into the 7710 // asm string. There are no MCInst operands for these. 7711 if (Op.isToken()) { 7712 continue; 7713 } 7714 assert(Op.isImm()); 7715 7716 // Handle optional arguments 7717 OptionalIdx[Op.getImmTy()] = i; 7718 } 7719 7720 if ((int)Inst.getNumOperands() <= 7721 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7722 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7723 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7724 } 7725 7726 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7727 const OperandVector &Operands) { 7728 for (unsigned I = 1; I < Operands.size(); ++I) { 7729 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7730 if (Operand.isReg()) 7731 Operand.addRegOperands(Inst, 1); 7732 } 7733 7734 Inst.addOperand(MCOperand::createImm(1)); // a16 7735 } 7736 7737 //===----------------------------------------------------------------------===// 7738 // smrd 7739 //===----------------------------------------------------------------------===// 7740 7741 bool AMDGPUOperand::isSMRDOffset8() const { 7742 return isImm() && isUInt<8>(getImm()); 7743 } 7744 7745 bool AMDGPUOperand::isSMEMOffset() const { 7746 return isImmTy(ImmTyNone) || 7747 isImmTy(ImmTyOffset); // Offset range is checked later by validator. 7748 } 7749 7750 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7751 // 32-bit literals are only supported on CI and we only want to use them 7752 // when the offset is > 8-bits. 7753 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7754 } 7755 7756 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7757 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7758 } 7759 7760 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7761 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7762 } 7763 7764 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7765 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7766 } 7767 7768 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7769 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7770 } 7771 7772 //===----------------------------------------------------------------------===// 7773 // vop3 7774 //===----------------------------------------------------------------------===// 7775 7776 static bool ConvertOmodMul(int64_t &Mul) { 7777 if (Mul != 1 && Mul != 2 && Mul != 4) 7778 return false; 7779 7780 Mul >>= 1; 7781 return true; 7782 } 7783 7784 static bool ConvertOmodDiv(int64_t &Div) { 7785 if (Div == 1) { 7786 Div = 0; 7787 return true; 7788 } 7789 7790 if (Div == 2) { 7791 Div = 3; 7792 return true; 7793 } 7794 7795 return false; 7796 } 7797 7798 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7799 // This is intentional and ensures compatibility with sp3. 7800 // See bug 35397 for details. 7801 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7802 if (BoundCtrl == 0 || BoundCtrl == 1) { 7803 BoundCtrl = 1; 7804 return true; 7805 } 7806 return false; 7807 } 7808 7809 // Note: the order in this table matches the order of operands in AsmString. 7810 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7811 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7812 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7813 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7814 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7815 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7816 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7817 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7818 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7819 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7820 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7821 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7822 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7823 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7824 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7825 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7826 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7827 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7828 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7829 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7830 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7831 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7832 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7833 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7834 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7835 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7836 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7837 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7838 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7839 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7840 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7841 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7842 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7843 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7844 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7845 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7846 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7847 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7848 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7849 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7850 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7851 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}, 7852 {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr}, 7853 {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr} 7854 }; 7855 7856 void AMDGPUAsmParser::onBeginOfFile() { 7857 if (!getParser().getStreamer().getTargetStreamer() || 7858 getSTI().getTargetTriple().getArch() == Triple::r600) 7859 return; 7860 7861 if (!getTargetStreamer().getTargetID()) 7862 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7863 7864 if (isHsaAbiVersion3AndAbove(&getSTI())) 7865 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7866 } 7867 7868 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7869 7870 OperandMatchResultTy res = parseOptionalOpr(Operands); 7871 7872 // This is a hack to enable hardcoded mandatory operands which follow 7873 // optional operands. 7874 // 7875 // Current design assumes that all operands after the first optional operand 7876 // are also optional. However implementation of some instructions violates 7877 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7878 // 7879 // To alleviate this problem, we have to (implicitly) parse extra operands 7880 // to make sure autogenerated parser of custom operands never hit hardcoded 7881 // mandatory operands. 7882 7883 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7884 if (res != MatchOperand_Success || 7885 isToken(AsmToken::EndOfStatement)) 7886 break; 7887 7888 trySkipToken(AsmToken::Comma); 7889 res = parseOptionalOpr(Operands); 7890 } 7891 7892 return res; 7893 } 7894 7895 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7896 OperandMatchResultTy res; 7897 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7898 // try to parse any optional operand here 7899 if (Op.IsBit) { 7900 res = parseNamedBit(Op.Name, Operands, Op.Type); 7901 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7902 res = parseOModOperand(Operands); 7903 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7904 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7905 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7906 res = parseSDWASel(Operands, Op.Name, Op.Type); 7907 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7908 res = parseSDWADstUnused(Operands); 7909 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7910 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7911 Op.Type == AMDGPUOperand::ImmTyNegLo || 7912 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7913 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7914 Op.ConvertResult); 7915 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7916 res = parseDim(Operands); 7917 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7918 res = parseCPol(Operands); 7919 } else { 7920 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7921 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 7922 res = parseOperandArrayWithPrefix("neg", Operands, 7923 AMDGPUOperand::ImmTyBLGP, 7924 nullptr); 7925 } 7926 } 7927 if (res != MatchOperand_NoMatch) { 7928 return res; 7929 } 7930 } 7931 return MatchOperand_NoMatch; 7932 } 7933 7934 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7935 StringRef Name = getTokenStr(); 7936 if (Name == "mul") { 7937 return parseIntWithPrefix("mul", Operands, 7938 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7939 } 7940 7941 if (Name == "div") { 7942 return parseIntWithPrefix("div", Operands, 7943 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7944 } 7945 7946 return MatchOperand_NoMatch; 7947 } 7948 7949 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7950 cvtVOP3P(Inst, Operands); 7951 7952 int Opc = Inst.getOpcode(); 7953 7954 int SrcNum; 7955 const int Ops[] = { AMDGPU::OpName::src0, 7956 AMDGPU::OpName::src1, 7957 AMDGPU::OpName::src2 }; 7958 for (SrcNum = 0; 7959 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7960 ++SrcNum); 7961 assert(SrcNum > 0); 7962 7963 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7964 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7965 7966 if ((OpSel & (1 << SrcNum)) != 0) { 7967 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7968 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7969 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7970 } 7971 } 7972 7973 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7974 // 1. This operand is input modifiers 7975 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7976 // 2. This is not last operand 7977 && Desc.NumOperands > (OpNum + 1) 7978 // 3. Next operand is register class 7979 && Desc.OpInfo[OpNum + 1].RegClass != -1 7980 // 4. Next register is not tied to any other operand 7981 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7982 } 7983 7984 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7985 { 7986 OptionalImmIndexMap OptionalIdx; 7987 unsigned Opc = Inst.getOpcode(); 7988 7989 unsigned I = 1; 7990 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7991 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7992 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7993 } 7994 7995 for (unsigned E = Operands.size(); I != E; ++I) { 7996 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7997 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7998 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7999 } else if (Op.isInterpSlot() || 8000 Op.isInterpAttr() || 8001 Op.isAttrChan()) { 8002 Inst.addOperand(MCOperand::createImm(Op.getImm())); 8003 } else if (Op.isImmModifier()) { 8004 OptionalIdx[Op.getImmTy()] = I; 8005 } else { 8006 llvm_unreachable("unhandled operand type"); 8007 } 8008 } 8009 8010 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 8011 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 8012 } 8013 8014 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8015 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8016 } 8017 8018 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8019 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8020 } 8021 } 8022 8023 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 8024 { 8025 OptionalImmIndexMap OptionalIdx; 8026 unsigned Opc = Inst.getOpcode(); 8027 8028 unsigned I = 1; 8029 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8030 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8031 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8032 } 8033 8034 for (unsigned E = Operands.size(); I != E; ++I) { 8035 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8036 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8037 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8038 } else if (Op.isImmModifier()) { 8039 OptionalIdx[Op.getImmTy()] = I; 8040 } else { 8041 llvm_unreachable("unhandled operand type"); 8042 } 8043 } 8044 8045 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8046 8047 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8048 if (OpSelIdx != -1) 8049 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8050 8051 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 8052 8053 if (OpSelIdx == -1) 8054 return; 8055 8056 const int Ops[] = { AMDGPU::OpName::src0, 8057 AMDGPU::OpName::src1, 8058 AMDGPU::OpName::src2 }; 8059 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8060 AMDGPU::OpName::src1_modifiers, 8061 AMDGPU::OpName::src2_modifiers }; 8062 8063 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8064 8065 for (int J = 0; J < 3; ++J) { 8066 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8067 if (OpIdx == -1) 8068 break; 8069 8070 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8071 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8072 8073 if ((OpSel & (1 << J)) != 0) 8074 ModVal |= SISrcMods::OP_SEL_0; 8075 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 8076 (OpSel & (1 << 3)) != 0) 8077 ModVal |= SISrcMods::DST_OP_SEL; 8078 8079 Inst.getOperand(ModIdx).setImm(ModVal); 8080 } 8081 } 8082 8083 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8084 OptionalImmIndexMap &OptionalIdx) { 8085 unsigned Opc = Inst.getOpcode(); 8086 8087 unsigned I = 1; 8088 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8089 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8090 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8091 } 8092 8093 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 8094 // This instruction has src modifiers 8095 for (unsigned E = Operands.size(); I != E; ++I) { 8096 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8097 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8098 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8099 } else if (Op.isImmModifier()) { 8100 OptionalIdx[Op.getImmTy()] = I; 8101 } else if (Op.isRegOrImm()) { 8102 Op.addRegOrImmOperands(Inst, 1); 8103 } else { 8104 llvm_unreachable("unhandled operand type"); 8105 } 8106 } 8107 } else { 8108 // No src modifiers 8109 for (unsigned E = Operands.size(); I != E; ++I) { 8110 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8111 if (Op.isMod()) { 8112 OptionalIdx[Op.getImmTy()] = I; 8113 } else { 8114 Op.addRegOrImmOperands(Inst, 1); 8115 } 8116 } 8117 } 8118 8119 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8120 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8121 } 8122 8123 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8124 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8125 } 8126 8127 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8128 // it has src2 register operand that is tied to dst operand 8129 // we don't allow modifiers for this operand in assembler so src2_modifiers 8130 // should be 0. 8131 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 8132 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 8133 Opc == AMDGPU::V_MAC_F32_e64_vi || 8134 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 8135 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 8136 Opc == AMDGPU::V_MAC_F16_e64_vi || 8137 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 8138 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 8139 Opc == AMDGPU::V_FMAC_F32_e64_vi || 8140 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 8141 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 8142 auto it = Inst.begin(); 8143 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8144 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8145 ++it; 8146 // Copy the operand to ensure it's not invalidated when Inst grows. 8147 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8148 } 8149 } 8150 8151 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8152 OptionalImmIndexMap OptionalIdx; 8153 cvtVOP3(Inst, Operands, OptionalIdx); 8154 } 8155 8156 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8157 OptionalImmIndexMap &OptIdx) { 8158 const int Opc = Inst.getOpcode(); 8159 const MCInstrDesc &Desc = MII.get(Opc); 8160 8161 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8162 8163 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 8164 assert(!IsPacked); 8165 Inst.addOperand(Inst.getOperand(0)); 8166 } 8167 8168 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8169 // instruction, and then figure out where to actually put the modifiers 8170 8171 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8172 if (OpSelIdx != -1) { 8173 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8174 } 8175 8176 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8177 if (OpSelHiIdx != -1) { 8178 int DefaultVal = IsPacked ? -1 : 0; 8179 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8180 DefaultVal); 8181 } 8182 8183 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8184 if (NegLoIdx != -1) { 8185 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8186 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8187 } 8188 8189 const int Ops[] = { AMDGPU::OpName::src0, 8190 AMDGPU::OpName::src1, 8191 AMDGPU::OpName::src2 }; 8192 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8193 AMDGPU::OpName::src1_modifiers, 8194 AMDGPU::OpName::src2_modifiers }; 8195 8196 unsigned OpSel = 0; 8197 unsigned OpSelHi = 0; 8198 unsigned NegLo = 0; 8199 unsigned NegHi = 0; 8200 8201 if (OpSelIdx != -1) 8202 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8203 8204 if (OpSelHiIdx != -1) 8205 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8206 8207 if (NegLoIdx != -1) { 8208 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8209 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8210 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8211 } 8212 8213 for (int J = 0; J < 3; ++J) { 8214 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8215 if (OpIdx == -1) 8216 break; 8217 8218 uint32_t ModVal = 0; 8219 8220 if ((OpSel & (1 << J)) != 0) 8221 ModVal |= SISrcMods::OP_SEL_0; 8222 8223 if ((OpSelHi & (1 << J)) != 0) 8224 ModVal |= SISrcMods::OP_SEL_1; 8225 8226 if ((NegLo & (1 << J)) != 0) 8227 ModVal |= SISrcMods::NEG; 8228 8229 if ((NegHi & (1 << J)) != 0) 8230 ModVal |= SISrcMods::NEG_HI; 8231 8232 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8233 8234 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8235 } 8236 } 8237 8238 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8239 OptionalImmIndexMap OptIdx; 8240 cvtVOP3(Inst, Operands, OptIdx); 8241 cvtVOP3P(Inst, Operands, OptIdx); 8242 } 8243 8244 //===----------------------------------------------------------------------===// 8245 // dpp 8246 //===----------------------------------------------------------------------===// 8247 8248 bool AMDGPUOperand::isDPP8() const { 8249 return isImmTy(ImmTyDPP8); 8250 } 8251 8252 bool AMDGPUOperand::isDPPCtrl() const { 8253 using namespace AMDGPU::DPP; 8254 8255 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8256 if (result) { 8257 int64_t Imm = getImm(); 8258 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8259 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8260 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8261 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8262 (Imm == DppCtrl::WAVE_SHL1) || 8263 (Imm == DppCtrl::WAVE_ROL1) || 8264 (Imm == DppCtrl::WAVE_SHR1) || 8265 (Imm == DppCtrl::WAVE_ROR1) || 8266 (Imm == DppCtrl::ROW_MIRROR) || 8267 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8268 (Imm == DppCtrl::BCAST15) || 8269 (Imm == DppCtrl::BCAST31) || 8270 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8271 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8272 } 8273 return false; 8274 } 8275 8276 //===----------------------------------------------------------------------===// 8277 // mAI 8278 //===----------------------------------------------------------------------===// 8279 8280 bool AMDGPUOperand::isBLGP() const { 8281 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8282 } 8283 8284 bool AMDGPUOperand::isCBSZ() const { 8285 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8286 } 8287 8288 bool AMDGPUOperand::isABID() const { 8289 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8290 } 8291 8292 bool AMDGPUOperand::isS16Imm() const { 8293 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8294 } 8295 8296 bool AMDGPUOperand::isU16Imm() const { 8297 return isImm() && isUInt<16>(getImm()); 8298 } 8299 8300 //===----------------------------------------------------------------------===// 8301 // dim 8302 //===----------------------------------------------------------------------===// 8303 8304 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8305 // We want to allow "dim:1D" etc., 8306 // but the initial 1 is tokenized as an integer. 8307 std::string Token; 8308 if (isToken(AsmToken::Integer)) { 8309 SMLoc Loc = getToken().getEndLoc(); 8310 Token = std::string(getTokenStr()); 8311 lex(); 8312 if (getLoc() != Loc) 8313 return false; 8314 } 8315 8316 StringRef Suffix; 8317 if (!parseId(Suffix)) 8318 return false; 8319 Token += Suffix; 8320 8321 StringRef DimId = Token; 8322 if (DimId.startswith("SQ_RSRC_IMG_")) 8323 DimId = DimId.drop_front(12); 8324 8325 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8326 if (!DimInfo) 8327 return false; 8328 8329 Encoding = DimInfo->Encoding; 8330 return true; 8331 } 8332 8333 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8334 if (!isGFX10Plus()) 8335 return MatchOperand_NoMatch; 8336 8337 SMLoc S = getLoc(); 8338 8339 if (!trySkipId("dim", AsmToken::Colon)) 8340 return MatchOperand_NoMatch; 8341 8342 unsigned Encoding; 8343 SMLoc Loc = getLoc(); 8344 if (!parseDimId(Encoding)) { 8345 Error(Loc, "invalid dim value"); 8346 return MatchOperand_ParseFail; 8347 } 8348 8349 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8350 AMDGPUOperand::ImmTyDim)); 8351 return MatchOperand_Success; 8352 } 8353 8354 //===----------------------------------------------------------------------===// 8355 // dpp 8356 //===----------------------------------------------------------------------===// 8357 8358 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8359 SMLoc S = getLoc(); 8360 8361 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8362 return MatchOperand_NoMatch; 8363 8364 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8365 8366 int64_t Sels[8]; 8367 8368 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8369 return MatchOperand_ParseFail; 8370 8371 for (size_t i = 0; i < 8; ++i) { 8372 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8373 return MatchOperand_ParseFail; 8374 8375 SMLoc Loc = getLoc(); 8376 if (getParser().parseAbsoluteExpression(Sels[i])) 8377 return MatchOperand_ParseFail; 8378 if (0 > Sels[i] || 7 < Sels[i]) { 8379 Error(Loc, "expected a 3-bit value"); 8380 return MatchOperand_ParseFail; 8381 } 8382 } 8383 8384 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8385 return MatchOperand_ParseFail; 8386 8387 unsigned DPP8 = 0; 8388 for (size_t i = 0; i < 8; ++i) 8389 DPP8 |= (Sels[i] << (i * 3)); 8390 8391 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8392 return MatchOperand_Success; 8393 } 8394 8395 bool 8396 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8397 const OperandVector &Operands) { 8398 if (Ctrl == "row_newbcast") 8399 return isGFX90A(); 8400 8401 if (Ctrl == "row_share" || 8402 Ctrl == "row_xmask") 8403 return isGFX10Plus(); 8404 8405 if (Ctrl == "wave_shl" || 8406 Ctrl == "wave_shr" || 8407 Ctrl == "wave_rol" || 8408 Ctrl == "wave_ror" || 8409 Ctrl == "row_bcast") 8410 return isVI() || isGFX9(); 8411 8412 return Ctrl == "row_mirror" || 8413 Ctrl == "row_half_mirror" || 8414 Ctrl == "quad_perm" || 8415 Ctrl == "row_shl" || 8416 Ctrl == "row_shr" || 8417 Ctrl == "row_ror"; 8418 } 8419 8420 int64_t 8421 AMDGPUAsmParser::parseDPPCtrlPerm() { 8422 // quad_perm:[%d,%d,%d,%d] 8423 8424 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8425 return -1; 8426 8427 int64_t Val = 0; 8428 for (int i = 0; i < 4; ++i) { 8429 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8430 return -1; 8431 8432 int64_t Temp; 8433 SMLoc Loc = getLoc(); 8434 if (getParser().parseAbsoluteExpression(Temp)) 8435 return -1; 8436 if (Temp < 0 || Temp > 3) { 8437 Error(Loc, "expected a 2-bit value"); 8438 return -1; 8439 } 8440 8441 Val += (Temp << i * 2); 8442 } 8443 8444 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8445 return -1; 8446 8447 return Val; 8448 } 8449 8450 int64_t 8451 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8452 using namespace AMDGPU::DPP; 8453 8454 // sel:%d 8455 8456 int64_t Val; 8457 SMLoc Loc = getLoc(); 8458 8459 if (getParser().parseAbsoluteExpression(Val)) 8460 return -1; 8461 8462 struct DppCtrlCheck { 8463 int64_t Ctrl; 8464 int Lo; 8465 int Hi; 8466 }; 8467 8468 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8469 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8470 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8471 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8472 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8473 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8474 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8475 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8476 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8477 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8478 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8479 .Default({-1, 0, 0}); 8480 8481 bool Valid; 8482 if (Check.Ctrl == -1) { 8483 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8484 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8485 } else { 8486 Valid = Check.Lo <= Val && Val <= Check.Hi; 8487 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8488 } 8489 8490 if (!Valid) { 8491 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8492 return -1; 8493 } 8494 8495 return Val; 8496 } 8497 8498 OperandMatchResultTy 8499 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8500 using namespace AMDGPU::DPP; 8501 8502 if (!isToken(AsmToken::Identifier) || 8503 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8504 return MatchOperand_NoMatch; 8505 8506 SMLoc S = getLoc(); 8507 int64_t Val = -1; 8508 StringRef Ctrl; 8509 8510 parseId(Ctrl); 8511 8512 if (Ctrl == "row_mirror") { 8513 Val = DppCtrl::ROW_MIRROR; 8514 } else if (Ctrl == "row_half_mirror") { 8515 Val = DppCtrl::ROW_HALF_MIRROR; 8516 } else { 8517 if (skipToken(AsmToken::Colon, "expected a colon")) { 8518 if (Ctrl == "quad_perm") { 8519 Val = parseDPPCtrlPerm(); 8520 } else { 8521 Val = parseDPPCtrlSel(Ctrl); 8522 } 8523 } 8524 } 8525 8526 if (Val == -1) 8527 return MatchOperand_ParseFail; 8528 8529 Operands.push_back( 8530 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8531 return MatchOperand_Success; 8532 } 8533 8534 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8535 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8536 } 8537 8538 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8539 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8540 } 8541 8542 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8543 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8544 } 8545 8546 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8547 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8548 } 8549 8550 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8551 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8552 } 8553 8554 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8555 OptionalImmIndexMap OptionalIdx; 8556 8557 unsigned Opc = Inst.getOpcode(); 8558 bool HasModifiers = 8559 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8560 unsigned I = 1; 8561 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8562 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8563 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8564 } 8565 8566 int Fi = 0; 8567 for (unsigned E = Operands.size(); I != E; ++I) { 8568 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8569 MCOI::TIED_TO); 8570 if (TiedTo != -1) { 8571 assert((unsigned)TiedTo < Inst.getNumOperands()); 8572 // handle tied old or src2 for MAC instructions 8573 Inst.addOperand(Inst.getOperand(TiedTo)); 8574 } 8575 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8576 // Add the register arguments 8577 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8578 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8579 // Skip it. 8580 continue; 8581 } 8582 8583 if (IsDPP8) { 8584 if (Op.isDPP8()) { 8585 Op.addImmOperands(Inst, 1); 8586 } else if (HasModifiers && 8587 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8588 Op.addRegWithFPInputModsOperands(Inst, 2); 8589 } else if (Op.isFI()) { 8590 Fi = Op.getImm(); 8591 } else if (Op.isReg()) { 8592 Op.addRegOperands(Inst, 1); 8593 } else { 8594 llvm_unreachable("Invalid operand type"); 8595 } 8596 } else { 8597 if (HasModifiers && 8598 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8599 Op.addRegWithFPInputModsOperands(Inst, 2); 8600 } else if (Op.isReg()) { 8601 Op.addRegOperands(Inst, 1); 8602 } else if (Op.isDPPCtrl()) { 8603 Op.addImmOperands(Inst, 1); 8604 } else if (Op.isImm()) { 8605 // Handle optional arguments 8606 OptionalIdx[Op.getImmTy()] = I; 8607 } else { 8608 llvm_unreachable("Invalid operand type"); 8609 } 8610 } 8611 } 8612 8613 if (IsDPP8) { 8614 using namespace llvm::AMDGPU::DPP; 8615 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8616 } else { 8617 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8618 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8619 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8620 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8621 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8622 } 8623 } 8624 } 8625 8626 //===----------------------------------------------------------------------===// 8627 // sdwa 8628 //===----------------------------------------------------------------------===// 8629 8630 OperandMatchResultTy 8631 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8632 AMDGPUOperand::ImmTy Type) { 8633 using namespace llvm::AMDGPU::SDWA; 8634 8635 SMLoc S = getLoc(); 8636 StringRef Value; 8637 OperandMatchResultTy res; 8638 8639 SMLoc StringLoc; 8640 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8641 if (res != MatchOperand_Success) { 8642 return res; 8643 } 8644 8645 int64_t Int; 8646 Int = StringSwitch<int64_t>(Value) 8647 .Case("BYTE_0", SdwaSel::BYTE_0) 8648 .Case("BYTE_1", SdwaSel::BYTE_1) 8649 .Case("BYTE_2", SdwaSel::BYTE_2) 8650 .Case("BYTE_3", SdwaSel::BYTE_3) 8651 .Case("WORD_0", SdwaSel::WORD_0) 8652 .Case("WORD_1", SdwaSel::WORD_1) 8653 .Case("DWORD", SdwaSel::DWORD) 8654 .Default(0xffffffff); 8655 8656 if (Int == 0xffffffff) { 8657 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8658 return MatchOperand_ParseFail; 8659 } 8660 8661 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8662 return MatchOperand_Success; 8663 } 8664 8665 OperandMatchResultTy 8666 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8667 using namespace llvm::AMDGPU::SDWA; 8668 8669 SMLoc S = getLoc(); 8670 StringRef Value; 8671 OperandMatchResultTy res; 8672 8673 SMLoc StringLoc; 8674 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8675 if (res != MatchOperand_Success) { 8676 return res; 8677 } 8678 8679 int64_t Int; 8680 Int = StringSwitch<int64_t>(Value) 8681 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8682 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8683 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8684 .Default(0xffffffff); 8685 8686 if (Int == 0xffffffff) { 8687 Error(StringLoc, "invalid dst_unused value"); 8688 return MatchOperand_ParseFail; 8689 } 8690 8691 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8692 return MatchOperand_Success; 8693 } 8694 8695 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8696 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8697 } 8698 8699 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8700 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8701 } 8702 8703 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8704 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8705 } 8706 8707 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8708 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8709 } 8710 8711 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8712 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8713 } 8714 8715 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8716 uint64_t BasicInstType, 8717 bool SkipDstVcc, 8718 bool SkipSrcVcc) { 8719 using namespace llvm::AMDGPU::SDWA; 8720 8721 OptionalImmIndexMap OptionalIdx; 8722 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8723 bool SkippedVcc = false; 8724 8725 unsigned I = 1; 8726 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8727 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8728 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8729 } 8730 8731 for (unsigned E = Operands.size(); I != E; ++I) { 8732 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8733 if (SkipVcc && !SkippedVcc && Op.isReg() && 8734 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8735 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8736 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8737 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8738 // Skip VCC only if we didn't skip it on previous iteration. 8739 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8740 if (BasicInstType == SIInstrFlags::VOP2 && 8741 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8742 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8743 SkippedVcc = true; 8744 continue; 8745 } else if (BasicInstType == SIInstrFlags::VOPC && 8746 Inst.getNumOperands() == 0) { 8747 SkippedVcc = true; 8748 continue; 8749 } 8750 } 8751 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8752 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8753 } else if (Op.isImm()) { 8754 // Handle optional arguments 8755 OptionalIdx[Op.getImmTy()] = I; 8756 } else { 8757 llvm_unreachable("Invalid operand type"); 8758 } 8759 SkippedVcc = false; 8760 } 8761 8762 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8763 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8764 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8765 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8766 switch (BasicInstType) { 8767 case SIInstrFlags::VOP1: 8768 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8769 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8770 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8771 } 8772 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8773 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8774 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8775 break; 8776 8777 case SIInstrFlags::VOP2: 8778 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8779 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8780 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8781 } 8782 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8783 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8784 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8785 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8786 break; 8787 8788 case SIInstrFlags::VOPC: 8789 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8790 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8791 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8792 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8793 break; 8794 8795 default: 8796 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8797 } 8798 } 8799 8800 // special case v_mac_{f16, f32}: 8801 // it has src2 register operand that is tied to dst operand 8802 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8803 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8804 auto it = Inst.begin(); 8805 std::advance( 8806 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8807 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8808 } 8809 } 8810 8811 //===----------------------------------------------------------------------===// 8812 // mAI 8813 //===----------------------------------------------------------------------===// 8814 8815 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8816 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8817 } 8818 8819 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8820 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8821 } 8822 8823 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8824 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8825 } 8826 8827 /// Force static initialization. 8828 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8829 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8830 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8831 } 8832 8833 #define GET_REGISTER_MATCHER 8834 #define GET_MATCHER_IMPLEMENTATION 8835 #define GET_MNEMONIC_SPELL_CHECKER 8836 #define GET_MNEMONIC_CHECKER 8837 #include "AMDGPUGenAsmMatcher.inc" 8838 8839 // This function should be defined after auto-generated include so that we have 8840 // MatchClassKind enum defined 8841 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8842 unsigned Kind) { 8843 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8844 // But MatchInstructionImpl() expects to meet token and fails to validate 8845 // operand. This method checks if we are given immediate operand but expect to 8846 // get corresponding token. 8847 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8848 switch (Kind) { 8849 case MCK_addr64: 8850 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8851 case MCK_gds: 8852 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8853 case MCK_lds: 8854 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8855 case MCK_idxen: 8856 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8857 case MCK_offen: 8858 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8859 case MCK_SSrcB32: 8860 // When operands have expression values, they will return true for isToken, 8861 // because it is not possible to distinguish between a token and an 8862 // expression at parse time. MatchInstructionImpl() will always try to 8863 // match an operand as a token, when isToken returns true, and when the 8864 // name of the expression is not a valid token, the match will fail, 8865 // so we need to handle it here. 8866 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8867 case MCK_SSrcF32: 8868 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8869 case MCK_SoppBrTarget: 8870 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8871 case MCK_VReg32OrOff: 8872 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8873 case MCK_InterpSlot: 8874 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8875 case MCK_Attr: 8876 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8877 case MCK_AttrChan: 8878 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8879 case MCK_ImmSMEMOffset: 8880 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8881 case MCK_SReg_64: 8882 case MCK_SReg_64_XEXEC: 8883 // Null is defined as a 32-bit register but 8884 // it should also be enabled with 64-bit operands. 8885 // The following code enables it for SReg_64 operands 8886 // used as source and destination. Remaining source 8887 // operands are handled in isInlinableImm. 8888 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8889 default: 8890 return Match_InvalidOperand; 8891 } 8892 } 8893 8894 //===----------------------------------------------------------------------===// 8895 // endpgm 8896 //===----------------------------------------------------------------------===// 8897 8898 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8899 SMLoc S = getLoc(); 8900 int64_t Imm = 0; 8901 8902 if (!parseExpr(Imm)) { 8903 // The operand is optional, if not present default to 0 8904 Imm = 0; 8905 } 8906 8907 if (!isUInt<16>(Imm)) { 8908 Error(S, "expected a 16-bit value"); 8909 return MatchOperand_ParseFail; 8910 } 8911 8912 Operands.push_back( 8913 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8914 return MatchOperand_Success; 8915 } 8916 8917 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8918 8919 //===----------------------------------------------------------------------===// 8920 // LDSDIR 8921 //===----------------------------------------------------------------------===// 8922 8923 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const { 8924 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST); 8925 } 8926 8927 bool AMDGPUOperand::isWaitVDST() const { 8928 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); 8929 } 8930 8931 //===----------------------------------------------------------------------===// 8932 // VINTERP 8933 //===----------------------------------------------------------------------===// 8934 8935 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const { 8936 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP); 8937 } 8938 8939 bool AMDGPUOperand::isWaitEXP() const { 8940 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); 8941 } 8942