1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCParser/MCAsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 32 #include "llvm/MC/MCSymbol.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/AMDGPUMetadata.h" 35 #include "llvm/Support/AMDHSAKernelDescriptor.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/MachineValueType.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Support/TargetParser.h" 40 41 using namespace llvm; 42 using namespace llvm::AMDGPU; 43 using namespace llvm::amdhsa; 44 45 namespace { 46 47 class AMDGPUAsmParser; 48 49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 50 51 //===----------------------------------------------------------------------===// 52 // Operand 53 //===----------------------------------------------------------------------===// 54 55 class AMDGPUOperand : public MCParsedAsmOperand { 56 enum KindTy { 57 Token, 58 Immediate, 59 Register, 60 Expression 61 } Kind; 62 63 SMLoc StartLoc, EndLoc; 64 const AMDGPUAsmParser *AsmParser; 65 66 public: 67 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 68 : Kind(Kind_), AsmParser(AsmParser_) {} 69 70 using Ptr = std::unique_ptr<AMDGPUOperand>; 71 72 struct Modifiers { 73 bool Abs = false; 74 bool Neg = false; 75 bool Sext = false; 76 77 bool hasFPModifiers() const { return Abs || Neg; } 78 bool hasIntModifiers() const { return Sext; } 79 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 80 81 int64_t getFPModifiersOperand() const { 82 int64_t Operand = 0; 83 Operand |= Abs ? SISrcMods::ABS : 0u; 84 Operand |= Neg ? SISrcMods::NEG : 0u; 85 return Operand; 86 } 87 88 int64_t getIntModifiersOperand() const { 89 int64_t Operand = 0; 90 Operand |= Sext ? SISrcMods::SEXT : 0u; 91 return Operand; 92 } 93 94 int64_t getModifiersOperand() const { 95 assert(!(hasFPModifiers() && hasIntModifiers()) 96 && "fp and int modifiers should not be used simultaneously"); 97 if (hasFPModifiers()) { 98 return getFPModifiersOperand(); 99 } else if (hasIntModifiers()) { 100 return getIntModifiersOperand(); 101 } else { 102 return 0; 103 } 104 } 105 106 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 107 }; 108 109 enum ImmTy { 110 ImmTyNone, 111 ImmTyGDS, 112 ImmTyLDS, 113 ImmTyOffen, 114 ImmTyIdxen, 115 ImmTyAddr64, 116 ImmTyOffset, 117 ImmTyInstOffset, 118 ImmTyOffset0, 119 ImmTyOffset1, 120 ImmTyCPol, 121 ImmTySWZ, 122 ImmTyTFE, 123 ImmTyD16, 124 ImmTyClampSI, 125 ImmTyOModSI, 126 ImmTyDPP8, 127 ImmTyDppCtrl, 128 ImmTyDppRowMask, 129 ImmTyDppBankMask, 130 ImmTyDppBoundCtrl, 131 ImmTyDppFi, 132 ImmTySdwaDstSel, 133 ImmTySdwaSrc0Sel, 134 ImmTySdwaSrc1Sel, 135 ImmTySdwaDstUnused, 136 ImmTyDMask, 137 ImmTyDim, 138 ImmTyUNorm, 139 ImmTyDA, 140 ImmTyR128A16, 141 ImmTyA16, 142 ImmTyLWE, 143 ImmTyExpTgt, 144 ImmTyExpCompr, 145 ImmTyExpVM, 146 ImmTyFORMAT, 147 ImmTyHwreg, 148 ImmTyOff, 149 ImmTySendMsg, 150 ImmTyInterpSlot, 151 ImmTyInterpAttr, 152 ImmTyAttrChan, 153 ImmTyOpSel, 154 ImmTyOpSelHi, 155 ImmTyNegLo, 156 ImmTyNegHi, 157 ImmTySwizzle, 158 ImmTyGprIdxMode, 159 ImmTyHigh, 160 ImmTyBLGP, 161 ImmTyCBSZ, 162 ImmTyABID, 163 ImmTyEndpgm, 164 ImmTyWaitVDST, 165 ImmTyWaitEXP, 166 }; 167 168 enum ImmKindTy { 169 ImmKindTyNone, 170 ImmKindTyLiteral, 171 ImmKindTyConst, 172 }; 173 174 private: 175 struct TokOp { 176 const char *Data; 177 unsigned Length; 178 }; 179 180 struct ImmOp { 181 int64_t Val; 182 ImmTy Type; 183 bool IsFPImm; 184 mutable ImmKindTy Kind; 185 Modifiers Mods; 186 }; 187 188 struct RegOp { 189 unsigned RegNo; 190 Modifiers Mods; 191 }; 192 193 union { 194 TokOp Tok; 195 ImmOp Imm; 196 RegOp Reg; 197 const MCExpr *Expr; 198 }; 199 200 public: 201 bool isToken() const override { 202 if (Kind == Token) 203 return true; 204 205 // When parsing operands, we can't always tell if something was meant to be 206 // a token, like 'gds', or an expression that references a global variable. 207 // In this case, we assume the string is an expression, and if we need to 208 // interpret is a token, then we treat the symbol name as the token. 209 return isSymbolRefExpr(); 210 } 211 212 bool isSymbolRefExpr() const { 213 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 214 } 215 216 bool isImm() const override { 217 return Kind == Immediate; 218 } 219 220 void setImmKindNone() const { 221 assert(isImm()); 222 Imm.Kind = ImmKindTyNone; 223 } 224 225 void setImmKindLiteral() const { 226 assert(isImm()); 227 Imm.Kind = ImmKindTyLiteral; 228 } 229 230 void setImmKindConst() const { 231 assert(isImm()); 232 Imm.Kind = ImmKindTyConst; 233 } 234 235 bool IsImmKindLiteral() const { 236 return isImm() && Imm.Kind == ImmKindTyLiteral; 237 } 238 239 bool isImmKindConst() const { 240 return isImm() && Imm.Kind == ImmKindTyConst; 241 } 242 243 bool isInlinableImm(MVT type) const; 244 bool isLiteralImm(MVT type) const; 245 246 bool isRegKind() const { 247 return Kind == Register; 248 } 249 250 bool isReg() const override { 251 return isRegKind() && !hasModifiers(); 252 } 253 254 bool isRegOrInline(unsigned RCID, MVT type) const { 255 return isRegClass(RCID) || isInlinableImm(type); 256 } 257 258 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 259 return isRegOrInline(RCID, type) || isLiteralImm(type); 260 } 261 262 bool isRegOrImmWithInt16InputMods() const { 263 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 264 } 265 266 bool isRegOrImmWithInt32InputMods() const { 267 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 268 } 269 270 bool isRegOrImmWithInt64InputMods() const { 271 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 272 } 273 274 bool isRegOrImmWithFP16InputMods() const { 275 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 276 } 277 278 bool isRegOrImmWithFP32InputMods() const { 279 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 280 } 281 282 bool isRegOrImmWithFP64InputMods() const { 283 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 284 } 285 286 bool isVReg() const { 287 return isRegClass(AMDGPU::VGPR_32RegClassID) || 288 isRegClass(AMDGPU::VReg_64RegClassID) || 289 isRegClass(AMDGPU::VReg_96RegClassID) || 290 isRegClass(AMDGPU::VReg_128RegClassID) || 291 isRegClass(AMDGPU::VReg_160RegClassID) || 292 isRegClass(AMDGPU::VReg_192RegClassID) || 293 isRegClass(AMDGPU::VReg_256RegClassID) || 294 isRegClass(AMDGPU::VReg_512RegClassID) || 295 isRegClass(AMDGPU::VReg_1024RegClassID); 296 } 297 298 bool isVReg32() const { 299 return isRegClass(AMDGPU::VGPR_32RegClassID); 300 } 301 302 bool isVReg32OrOff() const { 303 return isOff() || isVReg32(); 304 } 305 306 bool isNull() const { 307 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 308 } 309 310 bool isVRegWithInputMods() const; 311 312 bool isSDWAOperand(MVT type) const; 313 bool isSDWAFP16Operand() const; 314 bool isSDWAFP32Operand() const; 315 bool isSDWAInt16Operand() const; 316 bool isSDWAInt32Operand() const; 317 318 bool isImmTy(ImmTy ImmT) const { 319 return isImm() && Imm.Type == ImmT; 320 } 321 322 bool isImmModifier() const { 323 return isImm() && Imm.Type != ImmTyNone; 324 } 325 326 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 327 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 328 bool isDMask() const { return isImmTy(ImmTyDMask); } 329 bool isDim() const { return isImmTy(ImmTyDim); } 330 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 331 bool isDA() const { return isImmTy(ImmTyDA); } 332 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 333 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 334 bool isLWE() const { return isImmTy(ImmTyLWE); } 335 bool isOff() const { return isImmTy(ImmTyOff); } 336 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 337 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 338 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 339 bool isOffen() const { return isImmTy(ImmTyOffen); } 340 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 341 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 342 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 343 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 344 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 345 346 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 347 bool isGDS() const { return isImmTy(ImmTyGDS); } 348 bool isLDS() const { return isImmTy(ImmTyLDS); } 349 bool isCPol() const { return isImmTy(ImmTyCPol); } 350 bool isSWZ() const { return isImmTy(ImmTySWZ); } 351 bool isTFE() const { return isImmTy(ImmTyTFE); } 352 bool isD16() const { return isImmTy(ImmTyD16); } 353 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 354 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 355 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 356 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 357 bool isFI() const { return isImmTy(ImmTyDppFi); } 358 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 359 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 360 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 361 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 362 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 363 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 364 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 365 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 366 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 367 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 368 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 369 bool isHigh() const { return isImmTy(ImmTyHigh); } 370 371 bool isMod() const { 372 return isClampSI() || isOModSI(); 373 } 374 375 bool isRegOrImm() const { 376 return isReg() || isImm(); 377 } 378 379 bool isRegClass(unsigned RCID) const; 380 381 bool isInlineValue() const; 382 383 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 384 return isRegOrInline(RCID, type) && !hasModifiers(); 385 } 386 387 bool isSCSrcB16() const { 388 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 389 } 390 391 bool isSCSrcV2B16() const { 392 return isSCSrcB16(); 393 } 394 395 bool isSCSrcB32() const { 396 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 397 } 398 399 bool isSCSrcB64() const { 400 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 401 } 402 403 bool isBoolReg() const; 404 405 bool isSCSrcF16() const { 406 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 407 } 408 409 bool isSCSrcV2F16() const { 410 return isSCSrcF16(); 411 } 412 413 bool isSCSrcF32() const { 414 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 415 } 416 417 bool isSCSrcF64() const { 418 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 419 } 420 421 bool isSSrcB32() const { 422 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 423 } 424 425 bool isSSrcB16() const { 426 return isSCSrcB16() || isLiteralImm(MVT::i16); 427 } 428 429 bool isSSrcV2B16() const { 430 llvm_unreachable("cannot happen"); 431 return isSSrcB16(); 432 } 433 434 bool isSSrcB64() const { 435 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 436 // See isVSrc64(). 437 return isSCSrcB64() || isLiteralImm(MVT::i64); 438 } 439 440 bool isSSrcF32() const { 441 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 442 } 443 444 bool isSSrcF64() const { 445 return isSCSrcB64() || isLiteralImm(MVT::f64); 446 } 447 448 bool isSSrcF16() const { 449 return isSCSrcB16() || isLiteralImm(MVT::f16); 450 } 451 452 bool isSSrcV2F16() const { 453 llvm_unreachable("cannot happen"); 454 return isSSrcF16(); 455 } 456 457 bool isSSrcV2FP32() const { 458 llvm_unreachable("cannot happen"); 459 return isSSrcF32(); 460 } 461 462 bool isSCSrcV2FP32() const { 463 llvm_unreachable("cannot happen"); 464 return isSCSrcF32(); 465 } 466 467 bool isSSrcV2INT32() const { 468 llvm_unreachable("cannot happen"); 469 return isSSrcB32(); 470 } 471 472 bool isSCSrcV2INT32() const { 473 llvm_unreachable("cannot happen"); 474 return isSCSrcB32(); 475 } 476 477 bool isSSrcOrLdsB32() const { 478 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 479 isLiteralImm(MVT::i32) || isExpr(); 480 } 481 482 bool isVCSrcB32() const { 483 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 484 } 485 486 bool isVCSrcB64() const { 487 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 488 } 489 490 bool isVCSrcB16() const { 491 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 492 } 493 494 bool isVCSrcV2B16() const { 495 return isVCSrcB16(); 496 } 497 498 bool isVCSrcF32() const { 499 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 500 } 501 502 bool isVCSrcF64() const { 503 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 504 } 505 506 bool isVCSrcF16() const { 507 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 508 } 509 510 bool isVCSrcV2F16() const { 511 return isVCSrcF16(); 512 } 513 514 bool isVSrcB32() const { 515 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 516 } 517 518 bool isVSrcB64() const { 519 return isVCSrcF64() || isLiteralImm(MVT::i64); 520 } 521 522 bool isVSrcB16() const { 523 return isVCSrcB16() || isLiteralImm(MVT::i16); 524 } 525 526 bool isVSrcV2B16() const { 527 return isVSrcB16() || isLiteralImm(MVT::v2i16); 528 } 529 530 bool isVCSrcV2FP32() const { 531 return isVCSrcF64(); 532 } 533 534 bool isVSrcV2FP32() const { 535 return isVSrcF64() || isLiteralImm(MVT::v2f32); 536 } 537 538 bool isVCSrcV2INT32() const { 539 return isVCSrcB64(); 540 } 541 542 bool isVSrcV2INT32() const { 543 return isVSrcB64() || isLiteralImm(MVT::v2i32); 544 } 545 546 bool isVSrcF32() const { 547 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 548 } 549 550 bool isVSrcF64() const { 551 return isVCSrcF64() || isLiteralImm(MVT::f64); 552 } 553 554 bool isVSrcF16() const { 555 return isVCSrcF16() || isLiteralImm(MVT::f16); 556 } 557 558 bool isVSrcV2F16() const { 559 return isVSrcF16() || isLiteralImm(MVT::v2f16); 560 } 561 562 bool isVISrcB32() const { 563 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 564 } 565 566 bool isVISrcB16() const { 567 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 568 } 569 570 bool isVISrcV2B16() const { 571 return isVISrcB16(); 572 } 573 574 bool isVISrcF32() const { 575 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 576 } 577 578 bool isVISrcF16() const { 579 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 580 } 581 582 bool isVISrcV2F16() const { 583 return isVISrcF16() || isVISrcB32(); 584 } 585 586 bool isVISrc_64B64() const { 587 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 588 } 589 590 bool isVISrc_64F64() const { 591 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 592 } 593 594 bool isVISrc_64V2FP32() const { 595 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 596 } 597 598 bool isVISrc_64V2INT32() const { 599 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 600 } 601 602 bool isVISrc_256B64() const { 603 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 604 } 605 606 bool isVISrc_256F64() const { 607 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 608 } 609 610 bool isVISrc_128B16() const { 611 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 612 } 613 614 bool isVISrc_128V2B16() const { 615 return isVISrc_128B16(); 616 } 617 618 bool isVISrc_128B32() const { 619 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 620 } 621 622 bool isVISrc_128F32() const { 623 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 624 } 625 626 bool isVISrc_256V2FP32() const { 627 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 628 } 629 630 bool isVISrc_256V2INT32() const { 631 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 632 } 633 634 bool isVISrc_512B32() const { 635 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 636 } 637 638 bool isVISrc_512B16() const { 639 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 640 } 641 642 bool isVISrc_512V2B16() const { 643 return isVISrc_512B16(); 644 } 645 646 bool isVISrc_512F32() const { 647 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 648 } 649 650 bool isVISrc_512F16() const { 651 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 652 } 653 654 bool isVISrc_512V2F16() const { 655 return isVISrc_512F16() || isVISrc_512B32(); 656 } 657 658 bool isVISrc_1024B32() const { 659 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 660 } 661 662 bool isVISrc_1024B16() const { 663 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 664 } 665 666 bool isVISrc_1024V2B16() const { 667 return isVISrc_1024B16(); 668 } 669 670 bool isVISrc_1024F32() const { 671 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 672 } 673 674 bool isVISrc_1024F16() const { 675 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 676 } 677 678 bool isVISrc_1024V2F16() const { 679 return isVISrc_1024F16() || isVISrc_1024B32(); 680 } 681 682 bool isAISrcB32() const { 683 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 684 } 685 686 bool isAISrcB16() const { 687 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 688 } 689 690 bool isAISrcV2B16() const { 691 return isAISrcB16(); 692 } 693 694 bool isAISrcF32() const { 695 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 696 } 697 698 bool isAISrcF16() const { 699 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 700 } 701 702 bool isAISrcV2F16() const { 703 return isAISrcF16() || isAISrcB32(); 704 } 705 706 bool isAISrc_64B64() const { 707 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 708 } 709 710 bool isAISrc_64F64() const { 711 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 712 } 713 714 bool isAISrc_128B32() const { 715 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 716 } 717 718 bool isAISrc_128B16() const { 719 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 720 } 721 722 bool isAISrc_128V2B16() const { 723 return isAISrc_128B16(); 724 } 725 726 bool isAISrc_128F32() const { 727 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 728 } 729 730 bool isAISrc_128F16() const { 731 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 732 } 733 734 bool isAISrc_128V2F16() const { 735 return isAISrc_128F16() || isAISrc_128B32(); 736 } 737 738 bool isVISrc_128F16() const { 739 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 740 } 741 742 bool isVISrc_128V2F16() const { 743 return isVISrc_128F16() || isVISrc_128B32(); 744 } 745 746 bool isAISrc_256B64() const { 747 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 748 } 749 750 bool isAISrc_256F64() const { 751 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 752 } 753 754 bool isAISrc_512B32() const { 755 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 756 } 757 758 bool isAISrc_512B16() const { 759 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 760 } 761 762 bool isAISrc_512V2B16() const { 763 return isAISrc_512B16(); 764 } 765 766 bool isAISrc_512F32() const { 767 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 768 } 769 770 bool isAISrc_512F16() const { 771 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 772 } 773 774 bool isAISrc_512V2F16() const { 775 return isAISrc_512F16() || isAISrc_512B32(); 776 } 777 778 bool isAISrc_1024B32() const { 779 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 780 } 781 782 bool isAISrc_1024B16() const { 783 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 784 } 785 786 bool isAISrc_1024V2B16() const { 787 return isAISrc_1024B16(); 788 } 789 790 bool isAISrc_1024F32() const { 791 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 792 } 793 794 bool isAISrc_1024F16() const { 795 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 796 } 797 798 bool isAISrc_1024V2F16() const { 799 return isAISrc_1024F16() || isAISrc_1024B32(); 800 } 801 802 bool isKImmFP32() const { 803 return isLiteralImm(MVT::f32); 804 } 805 806 bool isKImmFP16() const { 807 return isLiteralImm(MVT::f16); 808 } 809 810 bool isMem() const override { 811 return false; 812 } 813 814 bool isExpr() const { 815 return Kind == Expression; 816 } 817 818 bool isSoppBrTarget() const { 819 return isExpr() || isImm(); 820 } 821 822 bool isSWaitCnt() const; 823 bool isDepCtr() const; 824 bool isSDelayAlu() const; 825 bool isHwreg() const; 826 bool isSendMsg() const; 827 bool isSwizzle() const; 828 bool isSMRDOffset8() const; 829 bool isSMEMOffset() const; 830 bool isSMRDLiteralOffset() const; 831 bool isDPP8() const; 832 bool isDPPCtrl() const; 833 bool isBLGP() const; 834 bool isCBSZ() const; 835 bool isABID() const; 836 bool isGPRIdxMode() const; 837 bool isS16Imm() const; 838 bool isU16Imm() const; 839 bool isEndpgm() const; 840 bool isWaitVDST() const; 841 bool isWaitEXP() const; 842 843 StringRef getExpressionAsToken() const { 844 assert(isExpr()); 845 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 846 return S->getSymbol().getName(); 847 } 848 849 StringRef getToken() const { 850 assert(isToken()); 851 852 if (Kind == Expression) 853 return getExpressionAsToken(); 854 855 return StringRef(Tok.Data, Tok.Length); 856 } 857 858 int64_t getImm() const { 859 assert(isImm()); 860 return Imm.Val; 861 } 862 863 void setImm(int64_t Val) { 864 assert(isImm()); 865 Imm.Val = Val; 866 } 867 868 ImmTy getImmTy() const { 869 assert(isImm()); 870 return Imm.Type; 871 } 872 873 unsigned getReg() const override { 874 assert(isRegKind()); 875 return Reg.RegNo; 876 } 877 878 SMLoc getStartLoc() const override { 879 return StartLoc; 880 } 881 882 SMLoc getEndLoc() const override { 883 return EndLoc; 884 } 885 886 SMRange getLocRange() const { 887 return SMRange(StartLoc, EndLoc); 888 } 889 890 Modifiers getModifiers() const { 891 assert(isRegKind() || isImmTy(ImmTyNone)); 892 return isRegKind() ? Reg.Mods : Imm.Mods; 893 } 894 895 void setModifiers(Modifiers Mods) { 896 assert(isRegKind() || isImmTy(ImmTyNone)); 897 if (isRegKind()) 898 Reg.Mods = Mods; 899 else 900 Imm.Mods = Mods; 901 } 902 903 bool hasModifiers() const { 904 return getModifiers().hasModifiers(); 905 } 906 907 bool hasFPModifiers() const { 908 return getModifiers().hasFPModifiers(); 909 } 910 911 bool hasIntModifiers() const { 912 return getModifiers().hasIntModifiers(); 913 } 914 915 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 916 917 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 918 919 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 920 921 template <unsigned Bitwidth> 922 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 923 924 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 925 addKImmFPOperands<16>(Inst, N); 926 } 927 928 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 929 addKImmFPOperands<32>(Inst, N); 930 } 931 932 void addRegOperands(MCInst &Inst, unsigned N) const; 933 934 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 935 addRegOperands(Inst, N); 936 } 937 938 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 939 if (isRegKind()) 940 addRegOperands(Inst, N); 941 else if (isExpr()) 942 Inst.addOperand(MCOperand::createExpr(Expr)); 943 else 944 addImmOperands(Inst, N); 945 } 946 947 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 948 Modifiers Mods = getModifiers(); 949 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 950 if (isRegKind()) { 951 addRegOperands(Inst, N); 952 } else { 953 addImmOperands(Inst, N, false); 954 } 955 } 956 957 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 958 assert(!hasIntModifiers()); 959 addRegOrImmWithInputModsOperands(Inst, N); 960 } 961 962 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 963 assert(!hasFPModifiers()); 964 addRegOrImmWithInputModsOperands(Inst, N); 965 } 966 967 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 968 Modifiers Mods = getModifiers(); 969 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 970 assert(isRegKind()); 971 addRegOperands(Inst, N); 972 } 973 974 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 975 assert(!hasIntModifiers()); 976 addRegWithInputModsOperands(Inst, N); 977 } 978 979 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 980 assert(!hasFPModifiers()); 981 addRegWithInputModsOperands(Inst, N); 982 } 983 984 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 985 if (isImm()) 986 addImmOperands(Inst, N); 987 else { 988 assert(isExpr()); 989 Inst.addOperand(MCOperand::createExpr(Expr)); 990 } 991 } 992 993 static void printImmTy(raw_ostream& OS, ImmTy Type) { 994 switch (Type) { 995 case ImmTyNone: OS << "None"; break; 996 case ImmTyGDS: OS << "GDS"; break; 997 case ImmTyLDS: OS << "LDS"; break; 998 case ImmTyOffen: OS << "Offen"; break; 999 case ImmTyIdxen: OS << "Idxen"; break; 1000 case ImmTyAddr64: OS << "Addr64"; break; 1001 case ImmTyOffset: OS << "Offset"; break; 1002 case ImmTyInstOffset: OS << "InstOffset"; break; 1003 case ImmTyOffset0: OS << "Offset0"; break; 1004 case ImmTyOffset1: OS << "Offset1"; break; 1005 case ImmTyCPol: OS << "CPol"; break; 1006 case ImmTySWZ: OS << "SWZ"; break; 1007 case ImmTyTFE: OS << "TFE"; break; 1008 case ImmTyD16: OS << "D16"; break; 1009 case ImmTyFORMAT: OS << "FORMAT"; break; 1010 case ImmTyClampSI: OS << "ClampSI"; break; 1011 case ImmTyOModSI: OS << "OModSI"; break; 1012 case ImmTyDPP8: OS << "DPP8"; break; 1013 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1014 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1015 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1016 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1017 case ImmTyDppFi: OS << "FI"; break; 1018 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1019 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1020 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1021 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1022 case ImmTyDMask: OS << "DMask"; break; 1023 case ImmTyDim: OS << "Dim"; break; 1024 case ImmTyUNorm: OS << "UNorm"; break; 1025 case ImmTyDA: OS << "DA"; break; 1026 case ImmTyR128A16: OS << "R128A16"; break; 1027 case ImmTyA16: OS << "A16"; break; 1028 case ImmTyLWE: OS << "LWE"; break; 1029 case ImmTyOff: OS << "Off"; break; 1030 case ImmTyExpTgt: OS << "ExpTgt"; break; 1031 case ImmTyExpCompr: OS << "ExpCompr"; break; 1032 case ImmTyExpVM: OS << "ExpVM"; break; 1033 case ImmTyHwreg: OS << "Hwreg"; break; 1034 case ImmTySendMsg: OS << "SendMsg"; break; 1035 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1036 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1037 case ImmTyAttrChan: OS << "AttrChan"; break; 1038 case ImmTyOpSel: OS << "OpSel"; break; 1039 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1040 case ImmTyNegLo: OS << "NegLo"; break; 1041 case ImmTyNegHi: OS << "NegHi"; break; 1042 case ImmTySwizzle: OS << "Swizzle"; break; 1043 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1044 case ImmTyHigh: OS << "High"; break; 1045 case ImmTyBLGP: OS << "BLGP"; break; 1046 case ImmTyCBSZ: OS << "CBSZ"; break; 1047 case ImmTyABID: OS << "ABID"; break; 1048 case ImmTyEndpgm: OS << "Endpgm"; break; 1049 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1050 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1051 } 1052 } 1053 1054 void print(raw_ostream &OS) const override { 1055 switch (Kind) { 1056 case Register: 1057 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1058 break; 1059 case Immediate: 1060 OS << '<' << getImm(); 1061 if (getImmTy() != ImmTyNone) { 1062 OS << " type: "; printImmTy(OS, getImmTy()); 1063 } 1064 OS << " mods: " << Imm.Mods << '>'; 1065 break; 1066 case Token: 1067 OS << '\'' << getToken() << '\''; 1068 break; 1069 case Expression: 1070 OS << "<expr " << *Expr << '>'; 1071 break; 1072 } 1073 } 1074 1075 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1076 int64_t Val, SMLoc Loc, 1077 ImmTy Type = ImmTyNone, 1078 bool IsFPImm = false) { 1079 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1080 Op->Imm.Val = Val; 1081 Op->Imm.IsFPImm = IsFPImm; 1082 Op->Imm.Kind = ImmKindTyNone; 1083 Op->Imm.Type = Type; 1084 Op->Imm.Mods = Modifiers(); 1085 Op->StartLoc = Loc; 1086 Op->EndLoc = Loc; 1087 return Op; 1088 } 1089 1090 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1091 StringRef Str, SMLoc Loc, 1092 bool HasExplicitEncodingSize = true) { 1093 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1094 Res->Tok.Data = Str.data(); 1095 Res->Tok.Length = Str.size(); 1096 Res->StartLoc = Loc; 1097 Res->EndLoc = Loc; 1098 return Res; 1099 } 1100 1101 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1102 unsigned RegNo, SMLoc S, 1103 SMLoc E) { 1104 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1105 Op->Reg.RegNo = RegNo; 1106 Op->Reg.Mods = Modifiers(); 1107 Op->StartLoc = S; 1108 Op->EndLoc = E; 1109 return Op; 1110 } 1111 1112 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1113 const class MCExpr *Expr, SMLoc S) { 1114 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1115 Op->Expr = Expr; 1116 Op->StartLoc = S; 1117 Op->EndLoc = S; 1118 return Op; 1119 } 1120 }; 1121 1122 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1123 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1124 return OS; 1125 } 1126 1127 //===----------------------------------------------------------------------===// 1128 // AsmParser 1129 //===----------------------------------------------------------------------===// 1130 1131 // Holds info related to the current kernel, e.g. count of SGPRs used. 1132 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1133 // .amdgpu_hsa_kernel or at EOF. 1134 class KernelScopeInfo { 1135 int SgprIndexUnusedMin = -1; 1136 int VgprIndexUnusedMin = -1; 1137 int AgprIndexUnusedMin = -1; 1138 MCContext *Ctx = nullptr; 1139 MCSubtargetInfo const *MSTI = nullptr; 1140 1141 void usesSgprAt(int i) { 1142 if (i >= SgprIndexUnusedMin) { 1143 SgprIndexUnusedMin = ++i; 1144 if (Ctx) { 1145 MCSymbol* const Sym = 1146 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1147 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1148 } 1149 } 1150 } 1151 1152 void usesVgprAt(int i) { 1153 if (i >= VgprIndexUnusedMin) { 1154 VgprIndexUnusedMin = ++i; 1155 if (Ctx) { 1156 MCSymbol* const Sym = 1157 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1158 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1159 VgprIndexUnusedMin); 1160 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1161 } 1162 } 1163 } 1164 1165 void usesAgprAt(int i) { 1166 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1167 if (!hasMAIInsts(*MSTI)) 1168 return; 1169 1170 if (i >= AgprIndexUnusedMin) { 1171 AgprIndexUnusedMin = ++i; 1172 if (Ctx) { 1173 MCSymbol* const Sym = 1174 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1175 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1176 1177 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1178 MCSymbol* const vSym = 1179 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1180 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1181 VgprIndexUnusedMin); 1182 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1183 } 1184 } 1185 } 1186 1187 public: 1188 KernelScopeInfo() = default; 1189 1190 void initialize(MCContext &Context) { 1191 Ctx = &Context; 1192 MSTI = Ctx->getSubtargetInfo(); 1193 1194 usesSgprAt(SgprIndexUnusedMin = -1); 1195 usesVgprAt(VgprIndexUnusedMin = -1); 1196 if (hasMAIInsts(*MSTI)) { 1197 usesAgprAt(AgprIndexUnusedMin = -1); 1198 } 1199 } 1200 1201 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1202 unsigned RegWidth) { 1203 switch (RegKind) { 1204 case IS_SGPR: 1205 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1206 break; 1207 case IS_AGPR: 1208 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1209 break; 1210 case IS_VGPR: 1211 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1212 break; 1213 default: 1214 break; 1215 } 1216 } 1217 }; 1218 1219 class AMDGPUAsmParser : public MCTargetAsmParser { 1220 MCAsmParser &Parser; 1221 1222 // Number of extra operands parsed after the first optional operand. 1223 // This may be necessary to skip hardcoded mandatory operands. 1224 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1225 1226 unsigned ForcedEncodingSize = 0; 1227 bool ForcedDPP = false; 1228 bool ForcedSDWA = false; 1229 KernelScopeInfo KernelScope; 1230 unsigned CPolSeen; 1231 1232 /// @name Auto-generated Match Functions 1233 /// { 1234 1235 #define GET_ASSEMBLER_HEADER 1236 #include "AMDGPUGenAsmMatcher.inc" 1237 1238 /// } 1239 1240 private: 1241 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1242 bool OutOfRangeError(SMRange Range); 1243 /// Calculate VGPR/SGPR blocks required for given target, reserved 1244 /// registers, and user-specified NextFreeXGPR values. 1245 /// 1246 /// \param Features [in] Target features, used for bug corrections. 1247 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1248 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1249 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1250 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1251 /// descriptor field, if valid. 1252 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1253 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1254 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1255 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1256 /// \param VGPRBlocks [out] Result VGPR block count. 1257 /// \param SGPRBlocks [out] Result SGPR block count. 1258 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1259 bool FlatScrUsed, bool XNACKUsed, 1260 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1261 SMRange VGPRRange, unsigned NextFreeSGPR, 1262 SMRange SGPRRange, unsigned &VGPRBlocks, 1263 unsigned &SGPRBlocks); 1264 bool ParseDirectiveAMDGCNTarget(); 1265 bool ParseDirectiveAMDHSAKernel(); 1266 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1267 bool ParseDirectiveHSACodeObjectVersion(); 1268 bool ParseDirectiveHSACodeObjectISA(); 1269 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1270 bool ParseDirectiveAMDKernelCodeT(); 1271 // TODO: Possibly make subtargetHasRegister const. 1272 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1273 bool ParseDirectiveAMDGPUHsaKernel(); 1274 1275 bool ParseDirectiveISAVersion(); 1276 bool ParseDirectiveHSAMetadata(); 1277 bool ParseDirectivePALMetadataBegin(); 1278 bool ParseDirectivePALMetadata(); 1279 bool ParseDirectiveAMDGPULDS(); 1280 1281 /// Common code to parse out a block of text (typically YAML) between start and 1282 /// end directives. 1283 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1284 const char *AssemblerDirectiveEnd, 1285 std::string &CollectString); 1286 1287 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1288 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1289 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1290 unsigned &RegNum, unsigned &RegWidth, 1291 bool RestoreOnFailure = false); 1292 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1293 unsigned &RegNum, unsigned &RegWidth, 1294 SmallVectorImpl<AsmToken> &Tokens); 1295 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1296 unsigned &RegWidth, 1297 SmallVectorImpl<AsmToken> &Tokens); 1298 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1299 unsigned &RegWidth, 1300 SmallVectorImpl<AsmToken> &Tokens); 1301 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1302 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1303 bool ParseRegRange(unsigned& Num, unsigned& Width); 1304 unsigned getRegularReg(RegisterKind RegKind, 1305 unsigned RegNum, 1306 unsigned RegWidth, 1307 SMLoc Loc); 1308 1309 bool isRegister(); 1310 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1311 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1312 void initializeGprCountSymbol(RegisterKind RegKind); 1313 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1314 unsigned RegWidth); 1315 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1316 bool IsAtomic, bool IsLds = false); 1317 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1318 bool IsGdsHardcoded); 1319 1320 public: 1321 enum AMDGPUMatchResultTy { 1322 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1323 }; 1324 enum OperandMode { 1325 OperandMode_Default, 1326 OperandMode_NSA, 1327 }; 1328 1329 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1330 1331 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1332 const MCInstrInfo &MII, 1333 const MCTargetOptions &Options) 1334 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1335 MCAsmParserExtension::Initialize(Parser); 1336 1337 if (getFeatureBits().none()) { 1338 // Set default features. 1339 copySTI().ToggleFeature("southern-islands"); 1340 } 1341 1342 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1343 1344 { 1345 // TODO: make those pre-defined variables read-only. 1346 // Currently there is none suitable machinery in the core llvm-mc for this. 1347 // MCSymbol::isRedefinable is intended for another purpose, and 1348 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1349 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1350 MCContext &Ctx = getContext(); 1351 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1352 MCSymbol *Sym = 1353 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1354 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1355 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1356 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1357 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1358 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1359 } else { 1360 MCSymbol *Sym = 1361 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1362 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1363 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1364 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1365 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1366 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1367 } 1368 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1369 initializeGprCountSymbol(IS_VGPR); 1370 initializeGprCountSymbol(IS_SGPR); 1371 } else 1372 KernelScope.initialize(getContext()); 1373 } 1374 } 1375 1376 bool hasMIMG_R128() const { 1377 return AMDGPU::hasMIMG_R128(getSTI()); 1378 } 1379 1380 bool hasPackedD16() const { 1381 return AMDGPU::hasPackedD16(getSTI()); 1382 } 1383 1384 bool hasGFX10A16() const { 1385 return AMDGPU::hasGFX10A16(getSTI()); 1386 } 1387 1388 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1389 1390 bool isSI() const { 1391 return AMDGPU::isSI(getSTI()); 1392 } 1393 1394 bool isCI() const { 1395 return AMDGPU::isCI(getSTI()); 1396 } 1397 1398 bool isVI() const { 1399 return AMDGPU::isVI(getSTI()); 1400 } 1401 1402 bool isGFX9() const { 1403 return AMDGPU::isGFX9(getSTI()); 1404 } 1405 1406 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1407 bool isGFX90A() const { 1408 return AMDGPU::isGFX90A(getSTI()); 1409 } 1410 1411 bool isGFX940() const { 1412 return AMDGPU::isGFX940(getSTI()); 1413 } 1414 1415 bool isGFX9Plus() const { 1416 return AMDGPU::isGFX9Plus(getSTI()); 1417 } 1418 1419 bool isGFX10() const { 1420 return AMDGPU::isGFX10(getSTI()); 1421 } 1422 1423 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1424 1425 bool isGFX11() const { 1426 return AMDGPU::isGFX11(getSTI()); 1427 } 1428 1429 bool isGFX11Plus() const { 1430 return AMDGPU::isGFX11Plus(getSTI()); 1431 } 1432 1433 bool isGFX10_BEncoding() const { 1434 return AMDGPU::isGFX10_BEncoding(getSTI()); 1435 } 1436 1437 bool hasInv2PiInlineImm() const { 1438 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1439 } 1440 1441 bool hasFlatOffsets() const { 1442 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1443 } 1444 1445 bool hasArchitectedFlatScratch() const { 1446 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1447 } 1448 1449 bool hasSGPR102_SGPR103() const { 1450 return !isVI() && !isGFX9(); 1451 } 1452 1453 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1454 1455 bool hasIntClamp() const { 1456 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1457 } 1458 1459 AMDGPUTargetStreamer &getTargetStreamer() { 1460 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1461 return static_cast<AMDGPUTargetStreamer &>(TS); 1462 } 1463 1464 const MCRegisterInfo *getMRI() const { 1465 // We need this const_cast because for some reason getContext() is not const 1466 // in MCAsmParser. 1467 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1468 } 1469 1470 const MCInstrInfo *getMII() const { 1471 return &MII; 1472 } 1473 1474 const FeatureBitset &getFeatureBits() const { 1475 return getSTI().getFeatureBits(); 1476 } 1477 1478 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1479 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1480 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1481 1482 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1483 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1484 bool isForcedDPP() const { return ForcedDPP; } 1485 bool isForcedSDWA() const { return ForcedSDWA; } 1486 ArrayRef<unsigned> getMatchedVariants() const; 1487 StringRef getMatchedVariantName() const; 1488 1489 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1490 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1491 bool RestoreOnFailure); 1492 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1493 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1494 SMLoc &EndLoc) override; 1495 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1496 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1497 unsigned Kind) override; 1498 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1499 OperandVector &Operands, MCStreamer &Out, 1500 uint64_t &ErrorInfo, 1501 bool MatchingInlineAsm) override; 1502 bool ParseDirective(AsmToken DirectiveID) override; 1503 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1504 OperandMode Mode = OperandMode_Default); 1505 StringRef parseMnemonicSuffix(StringRef Name); 1506 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1507 SMLoc NameLoc, OperandVector &Operands) override; 1508 //bool ProcessInstruction(MCInst &Inst); 1509 1510 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1511 1512 OperandMatchResultTy 1513 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1514 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1515 bool (*ConvertResult)(int64_t &) = nullptr); 1516 1517 OperandMatchResultTy 1518 parseOperandArrayWithPrefix(const char *Prefix, 1519 OperandVector &Operands, 1520 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1521 bool (*ConvertResult)(int64_t&) = nullptr); 1522 1523 OperandMatchResultTy 1524 parseNamedBit(StringRef Name, OperandVector &Operands, 1525 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1526 OperandMatchResultTy parseCPol(OperandVector &Operands); 1527 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1528 StringRef &Value, 1529 SMLoc &StringLoc); 1530 1531 bool isModifier(); 1532 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1533 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1534 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1535 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1536 bool parseSP3NegModifier(); 1537 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1538 OperandMatchResultTy parseReg(OperandVector &Operands); 1539 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1540 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1541 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1542 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1543 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1544 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1545 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1546 OperandMatchResultTy parseUfmt(int64_t &Format); 1547 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1548 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1549 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1550 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1551 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1552 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1553 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1554 1555 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1556 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1557 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1558 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1559 1560 bool parseCnt(int64_t &IntVal); 1561 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1562 1563 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1564 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1565 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1566 1567 bool parseDelay(int64_t &Delay); 1568 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands); 1569 1570 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1571 1572 private: 1573 struct OperandInfoTy { 1574 SMLoc Loc; 1575 int64_t Id; 1576 bool IsSymbolic = false; 1577 bool IsDefined = false; 1578 1579 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1580 }; 1581 1582 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1583 bool validateSendMsg(const OperandInfoTy &Msg, 1584 const OperandInfoTy &Op, 1585 const OperandInfoTy &Stream); 1586 1587 bool parseHwregBody(OperandInfoTy &HwReg, 1588 OperandInfoTy &Offset, 1589 OperandInfoTy &Width); 1590 bool validateHwreg(const OperandInfoTy &HwReg, 1591 const OperandInfoTy &Offset, 1592 const OperandInfoTy &Width); 1593 1594 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1595 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1596 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1597 1598 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1599 const OperandVector &Operands) const; 1600 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1601 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1602 SMLoc getLitLoc(const OperandVector &Operands) const; 1603 SMLoc getConstLoc(const OperandVector &Operands) const; 1604 1605 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1606 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1607 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1608 bool validateSOPLiteral(const MCInst &Inst) const; 1609 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1610 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1611 bool validateIntClampSupported(const MCInst &Inst); 1612 bool validateMIMGAtomicDMask(const MCInst &Inst); 1613 bool validateMIMGGatherDMask(const MCInst &Inst); 1614 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1615 Optional<StringRef> validateMIMGDataSize(const MCInst &Inst); 1616 bool validateMIMGAddrSize(const MCInst &Inst); 1617 bool validateMIMGD16(const MCInst &Inst); 1618 bool validateMIMGDim(const MCInst &Inst); 1619 bool validateMIMGMSAA(const MCInst &Inst); 1620 bool validateOpSel(const MCInst &Inst); 1621 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1622 bool validateVccOperand(unsigned Reg) const; 1623 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1624 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1625 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1626 bool validateAGPRLdSt(const MCInst &Inst) const; 1627 bool validateVGPRAlign(const MCInst &Inst) const; 1628 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1629 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1630 bool validateDivScale(const MCInst &Inst); 1631 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1632 const SMLoc &IDLoc); 1633 bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands, 1634 const SMLoc &IDLoc); 1635 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1636 unsigned getConstantBusLimit(unsigned Opcode) const; 1637 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1638 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1639 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1640 1641 bool isSupportedMnemo(StringRef Mnemo, 1642 const FeatureBitset &FBS); 1643 bool isSupportedMnemo(StringRef Mnemo, 1644 const FeatureBitset &FBS, 1645 ArrayRef<unsigned> Variants); 1646 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1647 1648 bool isId(const StringRef Id) const; 1649 bool isId(const AsmToken &Token, const StringRef Id) const; 1650 bool isToken(const AsmToken::TokenKind Kind) const; 1651 bool trySkipId(const StringRef Id); 1652 bool trySkipId(const StringRef Pref, const StringRef Id); 1653 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1654 bool trySkipToken(const AsmToken::TokenKind Kind); 1655 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1656 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1657 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1658 1659 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1660 AsmToken::TokenKind getTokenKind() const; 1661 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1662 bool parseExpr(OperandVector &Operands); 1663 StringRef getTokenStr() const; 1664 AsmToken peekToken(); 1665 AsmToken getToken() const; 1666 SMLoc getLoc() const; 1667 void lex(); 1668 1669 public: 1670 void onBeginOfFile() override; 1671 1672 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1673 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1674 1675 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1676 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1677 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1678 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1679 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1680 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1681 1682 bool parseSwizzleOperand(int64_t &Op, 1683 const unsigned MinVal, 1684 const unsigned MaxVal, 1685 const StringRef ErrMsg, 1686 SMLoc &Loc); 1687 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1688 const unsigned MinVal, 1689 const unsigned MaxVal, 1690 const StringRef ErrMsg); 1691 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1692 bool parseSwizzleOffset(int64_t &Imm); 1693 bool parseSwizzleMacro(int64_t &Imm); 1694 bool parseSwizzleQuadPerm(int64_t &Imm); 1695 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1696 bool parseSwizzleBroadcast(int64_t &Imm); 1697 bool parseSwizzleSwap(int64_t &Imm); 1698 bool parseSwizzleReverse(int64_t &Imm); 1699 1700 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1701 int64_t parseGPRIdxMacro(); 1702 1703 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1704 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1705 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1706 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1707 1708 AMDGPUOperand::Ptr defaultCPol() const; 1709 1710 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1711 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1712 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1713 AMDGPUOperand::Ptr defaultFlatOffset() const; 1714 1715 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1716 1717 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1718 OptionalImmIndexMap &OptionalIdx); 1719 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1720 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1721 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1722 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1723 OptionalImmIndexMap &OptionalIdx); 1724 1725 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1726 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1727 1728 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1729 bool IsAtomic = false); 1730 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1731 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1732 1733 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1734 1735 bool parseDimId(unsigned &Encoding); 1736 OperandMatchResultTy parseDim(OperandVector &Operands); 1737 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1738 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1739 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1740 int64_t parseDPPCtrlSel(StringRef Ctrl); 1741 int64_t parseDPPCtrlPerm(); 1742 AMDGPUOperand::Ptr defaultRowMask() const; 1743 AMDGPUOperand::Ptr defaultBankMask() const; 1744 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1745 AMDGPUOperand::Ptr defaultFI() const; 1746 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1747 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1748 1749 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1750 AMDGPUOperand::ImmTy Type); 1751 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1752 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1753 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1754 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1755 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1756 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1757 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1758 uint64_t BasicInstType, 1759 bool SkipDstVcc = false, 1760 bool SkipSrcVcc = false); 1761 1762 AMDGPUOperand::Ptr defaultBLGP() const; 1763 AMDGPUOperand::Ptr defaultCBSZ() const; 1764 AMDGPUOperand::Ptr defaultABID() const; 1765 1766 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1767 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1768 1769 AMDGPUOperand::Ptr defaultWaitVDST() const; 1770 AMDGPUOperand::Ptr defaultWaitEXP() const; 1771 }; 1772 1773 struct OptionalOperand { 1774 const char *Name; 1775 AMDGPUOperand::ImmTy Type; 1776 bool IsBit; 1777 bool (*ConvertResult)(int64_t&); 1778 }; 1779 1780 } // end anonymous namespace 1781 1782 // May be called with integer type with equivalent bitwidth. 1783 static const fltSemantics *getFltSemantics(unsigned Size) { 1784 switch (Size) { 1785 case 4: 1786 return &APFloat::IEEEsingle(); 1787 case 8: 1788 return &APFloat::IEEEdouble(); 1789 case 2: 1790 return &APFloat::IEEEhalf(); 1791 default: 1792 llvm_unreachable("unsupported fp type"); 1793 } 1794 } 1795 1796 static const fltSemantics *getFltSemantics(MVT VT) { 1797 return getFltSemantics(VT.getSizeInBits() / 8); 1798 } 1799 1800 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1801 switch (OperandType) { 1802 case AMDGPU::OPERAND_REG_IMM_INT32: 1803 case AMDGPU::OPERAND_REG_IMM_FP32: 1804 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1805 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1806 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1807 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1808 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1809 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1810 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1811 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1812 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1813 case AMDGPU::OPERAND_KIMM32: 1814 return &APFloat::IEEEsingle(); 1815 case AMDGPU::OPERAND_REG_IMM_INT64: 1816 case AMDGPU::OPERAND_REG_IMM_FP64: 1817 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1818 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1819 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1820 return &APFloat::IEEEdouble(); 1821 case AMDGPU::OPERAND_REG_IMM_INT16: 1822 case AMDGPU::OPERAND_REG_IMM_FP16: 1823 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1824 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1825 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1826 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1827 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1828 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1829 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1830 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1831 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1832 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1833 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1834 case AMDGPU::OPERAND_KIMM16: 1835 return &APFloat::IEEEhalf(); 1836 default: 1837 llvm_unreachable("unsupported fp type"); 1838 } 1839 } 1840 1841 //===----------------------------------------------------------------------===// 1842 // Operand 1843 //===----------------------------------------------------------------------===// 1844 1845 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1846 bool Lost; 1847 1848 // Convert literal to single precision 1849 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1850 APFloat::rmNearestTiesToEven, 1851 &Lost); 1852 // We allow precision lost but not overflow or underflow 1853 if (Status != APFloat::opOK && 1854 Lost && 1855 ((Status & APFloat::opOverflow) != 0 || 1856 (Status & APFloat::opUnderflow) != 0)) { 1857 return false; 1858 } 1859 1860 return true; 1861 } 1862 1863 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1864 return isUIntN(Size, Val) || isIntN(Size, Val); 1865 } 1866 1867 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1868 if (VT.getScalarType() == MVT::i16) { 1869 // FP immediate values are broken. 1870 return isInlinableIntLiteral(Val); 1871 } 1872 1873 // f16/v2f16 operands work correctly for all values. 1874 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1875 } 1876 1877 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1878 1879 // This is a hack to enable named inline values like 1880 // shared_base with both 32-bit and 64-bit operands. 1881 // Note that these values are defined as 1882 // 32-bit operands only. 1883 if (isInlineValue()) { 1884 return true; 1885 } 1886 1887 if (!isImmTy(ImmTyNone)) { 1888 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1889 return false; 1890 } 1891 // TODO: We should avoid using host float here. It would be better to 1892 // check the float bit values which is what a few other places do. 1893 // We've had bot failures before due to weird NaN support on mips hosts. 1894 1895 APInt Literal(64, Imm.Val); 1896 1897 if (Imm.IsFPImm) { // We got fp literal token 1898 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1899 return AMDGPU::isInlinableLiteral64(Imm.Val, 1900 AsmParser->hasInv2PiInlineImm()); 1901 } 1902 1903 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1904 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1905 return false; 1906 1907 if (type.getScalarSizeInBits() == 16) { 1908 return isInlineableLiteralOp16( 1909 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1910 type, AsmParser->hasInv2PiInlineImm()); 1911 } 1912 1913 // Check if single precision literal is inlinable 1914 return AMDGPU::isInlinableLiteral32( 1915 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1916 AsmParser->hasInv2PiInlineImm()); 1917 } 1918 1919 // We got int literal token. 1920 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1921 return AMDGPU::isInlinableLiteral64(Imm.Val, 1922 AsmParser->hasInv2PiInlineImm()); 1923 } 1924 1925 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1926 return false; 1927 } 1928 1929 if (type.getScalarSizeInBits() == 16) { 1930 return isInlineableLiteralOp16( 1931 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1932 type, AsmParser->hasInv2PiInlineImm()); 1933 } 1934 1935 return AMDGPU::isInlinableLiteral32( 1936 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1937 AsmParser->hasInv2PiInlineImm()); 1938 } 1939 1940 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1941 // Check that this immediate can be added as literal 1942 if (!isImmTy(ImmTyNone)) { 1943 return false; 1944 } 1945 1946 if (!Imm.IsFPImm) { 1947 // We got int literal token. 1948 1949 if (type == MVT::f64 && hasFPModifiers()) { 1950 // Cannot apply fp modifiers to int literals preserving the same semantics 1951 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1952 // disable these cases. 1953 return false; 1954 } 1955 1956 unsigned Size = type.getSizeInBits(); 1957 if (Size == 64) 1958 Size = 32; 1959 1960 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1961 // types. 1962 return isSafeTruncation(Imm.Val, Size); 1963 } 1964 1965 // We got fp literal token 1966 if (type == MVT::f64) { // Expected 64-bit fp operand 1967 // We would set low 64-bits of literal to zeroes but we accept this literals 1968 return true; 1969 } 1970 1971 if (type == MVT::i64) { // Expected 64-bit int operand 1972 // We don't allow fp literals in 64-bit integer instructions. It is 1973 // unclear how we should encode them. 1974 return false; 1975 } 1976 1977 // We allow fp literals with f16x2 operands assuming that the specified 1978 // literal goes into the lower half and the upper half is zero. We also 1979 // require that the literal may be losslessly converted to f16. 1980 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1981 (type == MVT::v2i16)? MVT::i16 : 1982 (type == MVT::v2f32)? MVT::f32 : type; 1983 1984 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1985 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1986 } 1987 1988 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1989 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1990 } 1991 1992 bool AMDGPUOperand::isVRegWithInputMods() const { 1993 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1994 // GFX90A allows DPP on 64-bit operands. 1995 (isRegClass(AMDGPU::VReg_64RegClassID) && 1996 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1997 } 1998 1999 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 2000 if (AsmParser->isVI()) 2001 return isVReg32(); 2002 else if (AsmParser->isGFX9Plus()) 2003 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2004 else 2005 return false; 2006 } 2007 2008 bool AMDGPUOperand::isSDWAFP16Operand() const { 2009 return isSDWAOperand(MVT::f16); 2010 } 2011 2012 bool AMDGPUOperand::isSDWAFP32Operand() const { 2013 return isSDWAOperand(MVT::f32); 2014 } 2015 2016 bool AMDGPUOperand::isSDWAInt16Operand() const { 2017 return isSDWAOperand(MVT::i16); 2018 } 2019 2020 bool AMDGPUOperand::isSDWAInt32Operand() const { 2021 return isSDWAOperand(MVT::i32); 2022 } 2023 2024 bool AMDGPUOperand::isBoolReg() const { 2025 auto FB = AsmParser->getFeatureBits(); 2026 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2027 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2028 } 2029 2030 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2031 { 2032 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2033 assert(Size == 2 || Size == 4 || Size == 8); 2034 2035 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2036 2037 if (Imm.Mods.Abs) { 2038 Val &= ~FpSignMask; 2039 } 2040 if (Imm.Mods.Neg) { 2041 Val ^= FpSignMask; 2042 } 2043 2044 return Val; 2045 } 2046 2047 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2048 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2049 Inst.getNumOperands())) { 2050 addLiteralImmOperand(Inst, Imm.Val, 2051 ApplyModifiers & 2052 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2053 } else { 2054 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2055 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2056 setImmKindNone(); 2057 } 2058 } 2059 2060 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2061 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2062 auto OpNum = Inst.getNumOperands(); 2063 // Check that this operand accepts literals 2064 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2065 2066 if (ApplyModifiers) { 2067 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2068 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2069 Val = applyInputFPModifiers(Val, Size); 2070 } 2071 2072 APInt Literal(64, Val); 2073 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2074 2075 if (Imm.IsFPImm) { // We got fp literal token 2076 switch (OpTy) { 2077 case AMDGPU::OPERAND_REG_IMM_INT64: 2078 case AMDGPU::OPERAND_REG_IMM_FP64: 2079 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2080 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2081 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2082 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2083 AsmParser->hasInv2PiInlineImm())) { 2084 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2085 setImmKindConst(); 2086 return; 2087 } 2088 2089 // Non-inlineable 2090 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2091 // For fp operands we check if low 32 bits are zeros 2092 if (Literal.getLoBits(32) != 0) { 2093 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2094 "Can't encode literal as exact 64-bit floating-point operand. " 2095 "Low 32-bits will be set to zero"); 2096 } 2097 2098 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2099 setImmKindLiteral(); 2100 return; 2101 } 2102 2103 // We don't allow fp literals in 64-bit integer instructions. It is 2104 // unclear how we should encode them. This case should be checked earlier 2105 // in predicate methods (isLiteralImm()) 2106 llvm_unreachable("fp literal in 64-bit integer instruction."); 2107 2108 case AMDGPU::OPERAND_REG_IMM_INT32: 2109 case AMDGPU::OPERAND_REG_IMM_FP32: 2110 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2111 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2112 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2113 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2114 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2115 case AMDGPU::OPERAND_REG_IMM_INT16: 2116 case AMDGPU::OPERAND_REG_IMM_FP16: 2117 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2118 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2119 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2120 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2121 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2122 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2123 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2124 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2125 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2126 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2127 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2128 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2129 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2130 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2131 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2132 case AMDGPU::OPERAND_KIMM32: 2133 case AMDGPU::OPERAND_KIMM16: { 2134 bool lost; 2135 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2136 // Convert literal to single precision 2137 FPLiteral.convert(*getOpFltSemantics(OpTy), 2138 APFloat::rmNearestTiesToEven, &lost); 2139 // We allow precision lost but not overflow or underflow. This should be 2140 // checked earlier in isLiteralImm() 2141 2142 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2143 Inst.addOperand(MCOperand::createImm(ImmVal)); 2144 setImmKindLiteral(); 2145 return; 2146 } 2147 default: 2148 llvm_unreachable("invalid operand size"); 2149 } 2150 2151 return; 2152 } 2153 2154 // We got int literal token. 2155 // Only sign extend inline immediates. 2156 switch (OpTy) { 2157 case AMDGPU::OPERAND_REG_IMM_INT32: 2158 case AMDGPU::OPERAND_REG_IMM_FP32: 2159 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2160 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2161 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2162 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2163 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2164 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2165 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2166 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2167 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2168 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2169 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2170 if (isSafeTruncation(Val, 32) && 2171 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2172 AsmParser->hasInv2PiInlineImm())) { 2173 Inst.addOperand(MCOperand::createImm(Val)); 2174 setImmKindConst(); 2175 return; 2176 } 2177 2178 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2179 setImmKindLiteral(); 2180 return; 2181 2182 case AMDGPU::OPERAND_REG_IMM_INT64: 2183 case AMDGPU::OPERAND_REG_IMM_FP64: 2184 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2185 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2186 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2187 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2188 Inst.addOperand(MCOperand::createImm(Val)); 2189 setImmKindConst(); 2190 return; 2191 } 2192 2193 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2194 setImmKindLiteral(); 2195 return; 2196 2197 case AMDGPU::OPERAND_REG_IMM_INT16: 2198 case AMDGPU::OPERAND_REG_IMM_FP16: 2199 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2200 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2201 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2202 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2203 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2204 if (isSafeTruncation(Val, 16) && 2205 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2206 AsmParser->hasInv2PiInlineImm())) { 2207 Inst.addOperand(MCOperand::createImm(Val)); 2208 setImmKindConst(); 2209 return; 2210 } 2211 2212 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2213 setImmKindLiteral(); 2214 return; 2215 2216 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2217 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2218 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2219 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2220 assert(isSafeTruncation(Val, 16)); 2221 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2222 AsmParser->hasInv2PiInlineImm())); 2223 2224 Inst.addOperand(MCOperand::createImm(Val)); 2225 return; 2226 } 2227 case AMDGPU::OPERAND_KIMM32: 2228 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2229 setImmKindNone(); 2230 return; 2231 case AMDGPU::OPERAND_KIMM16: 2232 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2233 setImmKindNone(); 2234 return; 2235 default: 2236 llvm_unreachable("invalid operand size"); 2237 } 2238 } 2239 2240 template <unsigned Bitwidth> 2241 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2242 APInt Literal(64, Imm.Val); 2243 setImmKindNone(); 2244 2245 if (!Imm.IsFPImm) { 2246 // We got int literal token. 2247 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2248 return; 2249 } 2250 2251 bool Lost; 2252 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2253 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2254 APFloat::rmNearestTiesToEven, &Lost); 2255 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2256 } 2257 2258 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2259 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2260 } 2261 2262 static bool isInlineValue(unsigned Reg) { 2263 switch (Reg) { 2264 case AMDGPU::SRC_SHARED_BASE: 2265 case AMDGPU::SRC_SHARED_LIMIT: 2266 case AMDGPU::SRC_PRIVATE_BASE: 2267 case AMDGPU::SRC_PRIVATE_LIMIT: 2268 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2269 return true; 2270 case AMDGPU::SRC_VCCZ: 2271 case AMDGPU::SRC_EXECZ: 2272 case AMDGPU::SRC_SCC: 2273 return true; 2274 case AMDGPU::SGPR_NULL: 2275 return true; 2276 default: 2277 return false; 2278 } 2279 } 2280 2281 bool AMDGPUOperand::isInlineValue() const { 2282 return isRegKind() && ::isInlineValue(getReg()); 2283 } 2284 2285 //===----------------------------------------------------------------------===// 2286 // AsmParser 2287 //===----------------------------------------------------------------------===// 2288 2289 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2290 if (Is == IS_VGPR) { 2291 switch (RegWidth) { 2292 default: return -1; 2293 case 32: 2294 return AMDGPU::VGPR_32RegClassID; 2295 case 64: 2296 return AMDGPU::VReg_64RegClassID; 2297 case 96: 2298 return AMDGPU::VReg_96RegClassID; 2299 case 128: 2300 return AMDGPU::VReg_128RegClassID; 2301 case 160: 2302 return AMDGPU::VReg_160RegClassID; 2303 case 192: 2304 return AMDGPU::VReg_192RegClassID; 2305 case 224: 2306 return AMDGPU::VReg_224RegClassID; 2307 case 256: 2308 return AMDGPU::VReg_256RegClassID; 2309 case 512: 2310 return AMDGPU::VReg_512RegClassID; 2311 case 1024: 2312 return AMDGPU::VReg_1024RegClassID; 2313 } 2314 } else if (Is == IS_TTMP) { 2315 switch (RegWidth) { 2316 default: return -1; 2317 case 32: 2318 return AMDGPU::TTMP_32RegClassID; 2319 case 64: 2320 return AMDGPU::TTMP_64RegClassID; 2321 case 128: 2322 return AMDGPU::TTMP_128RegClassID; 2323 case 256: 2324 return AMDGPU::TTMP_256RegClassID; 2325 case 512: 2326 return AMDGPU::TTMP_512RegClassID; 2327 } 2328 } else if (Is == IS_SGPR) { 2329 switch (RegWidth) { 2330 default: return -1; 2331 case 32: 2332 return AMDGPU::SGPR_32RegClassID; 2333 case 64: 2334 return AMDGPU::SGPR_64RegClassID; 2335 case 96: 2336 return AMDGPU::SGPR_96RegClassID; 2337 case 128: 2338 return AMDGPU::SGPR_128RegClassID; 2339 case 160: 2340 return AMDGPU::SGPR_160RegClassID; 2341 case 192: 2342 return AMDGPU::SGPR_192RegClassID; 2343 case 224: 2344 return AMDGPU::SGPR_224RegClassID; 2345 case 256: 2346 return AMDGPU::SGPR_256RegClassID; 2347 case 512: 2348 return AMDGPU::SGPR_512RegClassID; 2349 } 2350 } else if (Is == IS_AGPR) { 2351 switch (RegWidth) { 2352 default: return -1; 2353 case 32: 2354 return AMDGPU::AGPR_32RegClassID; 2355 case 64: 2356 return AMDGPU::AReg_64RegClassID; 2357 case 96: 2358 return AMDGPU::AReg_96RegClassID; 2359 case 128: 2360 return AMDGPU::AReg_128RegClassID; 2361 case 160: 2362 return AMDGPU::AReg_160RegClassID; 2363 case 192: 2364 return AMDGPU::AReg_192RegClassID; 2365 case 224: 2366 return AMDGPU::AReg_224RegClassID; 2367 case 256: 2368 return AMDGPU::AReg_256RegClassID; 2369 case 512: 2370 return AMDGPU::AReg_512RegClassID; 2371 case 1024: 2372 return AMDGPU::AReg_1024RegClassID; 2373 } 2374 } 2375 return -1; 2376 } 2377 2378 static unsigned getSpecialRegForName(StringRef RegName) { 2379 return StringSwitch<unsigned>(RegName) 2380 .Case("exec", AMDGPU::EXEC) 2381 .Case("vcc", AMDGPU::VCC) 2382 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2383 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2384 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2385 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2386 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2387 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2388 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2389 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2390 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2391 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2392 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2393 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2394 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2395 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2396 .Case("m0", AMDGPU::M0) 2397 .Case("vccz", AMDGPU::SRC_VCCZ) 2398 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2399 .Case("execz", AMDGPU::SRC_EXECZ) 2400 .Case("src_execz", AMDGPU::SRC_EXECZ) 2401 .Case("scc", AMDGPU::SRC_SCC) 2402 .Case("src_scc", AMDGPU::SRC_SCC) 2403 .Case("tba", AMDGPU::TBA) 2404 .Case("tma", AMDGPU::TMA) 2405 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2406 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2407 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2408 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2409 .Case("vcc_lo", AMDGPU::VCC_LO) 2410 .Case("vcc_hi", AMDGPU::VCC_HI) 2411 .Case("exec_lo", AMDGPU::EXEC_LO) 2412 .Case("exec_hi", AMDGPU::EXEC_HI) 2413 .Case("tma_lo", AMDGPU::TMA_LO) 2414 .Case("tma_hi", AMDGPU::TMA_HI) 2415 .Case("tba_lo", AMDGPU::TBA_LO) 2416 .Case("tba_hi", AMDGPU::TBA_HI) 2417 .Case("pc", AMDGPU::PC_REG) 2418 .Case("null", AMDGPU::SGPR_NULL) 2419 .Default(AMDGPU::NoRegister); 2420 } 2421 2422 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2423 SMLoc &EndLoc, bool RestoreOnFailure) { 2424 auto R = parseRegister(); 2425 if (!R) return true; 2426 assert(R->isReg()); 2427 RegNo = R->getReg(); 2428 StartLoc = R->getStartLoc(); 2429 EndLoc = R->getEndLoc(); 2430 return false; 2431 } 2432 2433 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2434 SMLoc &EndLoc) { 2435 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2436 } 2437 2438 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2439 SMLoc &StartLoc, 2440 SMLoc &EndLoc) { 2441 bool Result = 2442 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2443 bool PendingErrors = getParser().hasPendingError(); 2444 getParser().clearPendingErrors(); 2445 if (PendingErrors) 2446 return MatchOperand_ParseFail; 2447 if (Result) 2448 return MatchOperand_NoMatch; 2449 return MatchOperand_Success; 2450 } 2451 2452 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2453 RegisterKind RegKind, unsigned Reg1, 2454 SMLoc Loc) { 2455 switch (RegKind) { 2456 case IS_SPECIAL: 2457 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2458 Reg = AMDGPU::EXEC; 2459 RegWidth = 64; 2460 return true; 2461 } 2462 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2463 Reg = AMDGPU::FLAT_SCR; 2464 RegWidth = 64; 2465 return true; 2466 } 2467 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2468 Reg = AMDGPU::XNACK_MASK; 2469 RegWidth = 64; 2470 return true; 2471 } 2472 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2473 Reg = AMDGPU::VCC; 2474 RegWidth = 64; 2475 return true; 2476 } 2477 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2478 Reg = AMDGPU::TBA; 2479 RegWidth = 64; 2480 return true; 2481 } 2482 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2483 Reg = AMDGPU::TMA; 2484 RegWidth = 64; 2485 return true; 2486 } 2487 Error(Loc, "register does not fit in the list"); 2488 return false; 2489 case IS_VGPR: 2490 case IS_SGPR: 2491 case IS_AGPR: 2492 case IS_TTMP: 2493 if (Reg1 != Reg + RegWidth / 32) { 2494 Error(Loc, "registers in a list must have consecutive indices"); 2495 return false; 2496 } 2497 RegWidth += 32; 2498 return true; 2499 default: 2500 llvm_unreachable("unexpected register kind"); 2501 } 2502 } 2503 2504 struct RegInfo { 2505 StringLiteral Name; 2506 RegisterKind Kind; 2507 }; 2508 2509 static constexpr RegInfo RegularRegisters[] = { 2510 {{"v"}, IS_VGPR}, 2511 {{"s"}, IS_SGPR}, 2512 {{"ttmp"}, IS_TTMP}, 2513 {{"acc"}, IS_AGPR}, 2514 {{"a"}, IS_AGPR}, 2515 }; 2516 2517 static bool isRegularReg(RegisterKind Kind) { 2518 return Kind == IS_VGPR || 2519 Kind == IS_SGPR || 2520 Kind == IS_TTMP || 2521 Kind == IS_AGPR; 2522 } 2523 2524 static const RegInfo* getRegularRegInfo(StringRef Str) { 2525 for (const RegInfo &Reg : RegularRegisters) 2526 if (Str.startswith(Reg.Name)) 2527 return &Reg; 2528 return nullptr; 2529 } 2530 2531 static bool getRegNum(StringRef Str, unsigned& Num) { 2532 return !Str.getAsInteger(10, Num); 2533 } 2534 2535 bool 2536 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2537 const AsmToken &NextToken) const { 2538 2539 // A list of consecutive registers: [s0,s1,s2,s3] 2540 if (Token.is(AsmToken::LBrac)) 2541 return true; 2542 2543 if (!Token.is(AsmToken::Identifier)) 2544 return false; 2545 2546 // A single register like s0 or a range of registers like s[0:1] 2547 2548 StringRef Str = Token.getString(); 2549 const RegInfo *Reg = getRegularRegInfo(Str); 2550 if (Reg) { 2551 StringRef RegName = Reg->Name; 2552 StringRef RegSuffix = Str.substr(RegName.size()); 2553 if (!RegSuffix.empty()) { 2554 unsigned Num; 2555 // A single register with an index: rXX 2556 if (getRegNum(RegSuffix, Num)) 2557 return true; 2558 } else { 2559 // A range of registers: r[XX:YY]. 2560 if (NextToken.is(AsmToken::LBrac)) 2561 return true; 2562 } 2563 } 2564 2565 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2566 } 2567 2568 bool 2569 AMDGPUAsmParser::isRegister() 2570 { 2571 return isRegister(getToken(), peekToken()); 2572 } 2573 2574 unsigned 2575 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2576 unsigned RegNum, 2577 unsigned RegWidth, 2578 SMLoc Loc) { 2579 2580 assert(isRegularReg(RegKind)); 2581 2582 unsigned AlignSize = 1; 2583 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2584 // SGPR and TTMP registers must be aligned. 2585 // Max required alignment is 4 dwords. 2586 AlignSize = std::min(RegWidth / 32, 4u); 2587 } 2588 2589 if (RegNum % AlignSize != 0) { 2590 Error(Loc, "invalid register alignment"); 2591 return AMDGPU::NoRegister; 2592 } 2593 2594 unsigned RegIdx = RegNum / AlignSize; 2595 int RCID = getRegClass(RegKind, RegWidth); 2596 if (RCID == -1) { 2597 Error(Loc, "invalid or unsupported register size"); 2598 return AMDGPU::NoRegister; 2599 } 2600 2601 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2602 const MCRegisterClass RC = TRI->getRegClass(RCID); 2603 if (RegIdx >= RC.getNumRegs()) { 2604 Error(Loc, "register index is out of range"); 2605 return AMDGPU::NoRegister; 2606 } 2607 2608 return RC.getRegister(RegIdx); 2609 } 2610 2611 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2612 int64_t RegLo, RegHi; 2613 if (!skipToken(AsmToken::LBrac, "missing register index")) 2614 return false; 2615 2616 SMLoc FirstIdxLoc = getLoc(); 2617 SMLoc SecondIdxLoc; 2618 2619 if (!parseExpr(RegLo)) 2620 return false; 2621 2622 if (trySkipToken(AsmToken::Colon)) { 2623 SecondIdxLoc = getLoc(); 2624 if (!parseExpr(RegHi)) 2625 return false; 2626 } else { 2627 RegHi = RegLo; 2628 } 2629 2630 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2631 return false; 2632 2633 if (!isUInt<32>(RegLo)) { 2634 Error(FirstIdxLoc, "invalid register index"); 2635 return false; 2636 } 2637 2638 if (!isUInt<32>(RegHi)) { 2639 Error(SecondIdxLoc, "invalid register index"); 2640 return false; 2641 } 2642 2643 if (RegLo > RegHi) { 2644 Error(FirstIdxLoc, "first register index should not exceed second index"); 2645 return false; 2646 } 2647 2648 Num = static_cast<unsigned>(RegLo); 2649 RegWidth = 32 * ((RegHi - RegLo) + 1); 2650 return true; 2651 } 2652 2653 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2654 unsigned &RegNum, unsigned &RegWidth, 2655 SmallVectorImpl<AsmToken> &Tokens) { 2656 assert(isToken(AsmToken::Identifier)); 2657 unsigned Reg = getSpecialRegForName(getTokenStr()); 2658 if (Reg) { 2659 RegNum = 0; 2660 RegWidth = 32; 2661 RegKind = IS_SPECIAL; 2662 Tokens.push_back(getToken()); 2663 lex(); // skip register name 2664 } 2665 return Reg; 2666 } 2667 2668 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2669 unsigned &RegNum, unsigned &RegWidth, 2670 SmallVectorImpl<AsmToken> &Tokens) { 2671 assert(isToken(AsmToken::Identifier)); 2672 StringRef RegName = getTokenStr(); 2673 auto Loc = getLoc(); 2674 2675 const RegInfo *RI = getRegularRegInfo(RegName); 2676 if (!RI) { 2677 Error(Loc, "invalid register name"); 2678 return AMDGPU::NoRegister; 2679 } 2680 2681 Tokens.push_back(getToken()); 2682 lex(); // skip register name 2683 2684 RegKind = RI->Kind; 2685 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2686 if (!RegSuffix.empty()) { 2687 // Single 32-bit register: vXX. 2688 if (!getRegNum(RegSuffix, RegNum)) { 2689 Error(Loc, "invalid register index"); 2690 return AMDGPU::NoRegister; 2691 } 2692 RegWidth = 32; 2693 } else { 2694 // Range of registers: v[XX:YY]. ":YY" is optional. 2695 if (!ParseRegRange(RegNum, RegWidth)) 2696 return AMDGPU::NoRegister; 2697 } 2698 2699 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2700 } 2701 2702 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2703 unsigned &RegWidth, 2704 SmallVectorImpl<AsmToken> &Tokens) { 2705 unsigned Reg = AMDGPU::NoRegister; 2706 auto ListLoc = getLoc(); 2707 2708 if (!skipToken(AsmToken::LBrac, 2709 "expected a register or a list of registers")) { 2710 return AMDGPU::NoRegister; 2711 } 2712 2713 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2714 2715 auto Loc = getLoc(); 2716 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2717 return AMDGPU::NoRegister; 2718 if (RegWidth != 32) { 2719 Error(Loc, "expected a single 32-bit register"); 2720 return AMDGPU::NoRegister; 2721 } 2722 2723 for (; trySkipToken(AsmToken::Comma); ) { 2724 RegisterKind NextRegKind; 2725 unsigned NextReg, NextRegNum, NextRegWidth; 2726 Loc = getLoc(); 2727 2728 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2729 NextRegNum, NextRegWidth, 2730 Tokens)) { 2731 return AMDGPU::NoRegister; 2732 } 2733 if (NextRegWidth != 32) { 2734 Error(Loc, "expected a single 32-bit register"); 2735 return AMDGPU::NoRegister; 2736 } 2737 if (NextRegKind != RegKind) { 2738 Error(Loc, "registers in a list must be of the same kind"); 2739 return AMDGPU::NoRegister; 2740 } 2741 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2742 return AMDGPU::NoRegister; 2743 } 2744 2745 if (!skipToken(AsmToken::RBrac, 2746 "expected a comma or a closing square bracket")) { 2747 return AMDGPU::NoRegister; 2748 } 2749 2750 if (isRegularReg(RegKind)) 2751 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2752 2753 return Reg; 2754 } 2755 2756 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2757 unsigned &RegNum, unsigned &RegWidth, 2758 SmallVectorImpl<AsmToken> &Tokens) { 2759 auto Loc = getLoc(); 2760 Reg = AMDGPU::NoRegister; 2761 2762 if (isToken(AsmToken::Identifier)) { 2763 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2764 if (Reg == AMDGPU::NoRegister) 2765 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2766 } else { 2767 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2768 } 2769 2770 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2771 if (Reg == AMDGPU::NoRegister) { 2772 assert(Parser.hasPendingError()); 2773 return false; 2774 } 2775 2776 if (!subtargetHasRegister(*TRI, Reg)) { 2777 if (Reg == AMDGPU::SGPR_NULL) { 2778 Error(Loc, "'null' operand is not supported on this GPU"); 2779 } else { 2780 Error(Loc, "register not available on this GPU"); 2781 } 2782 return false; 2783 } 2784 2785 return true; 2786 } 2787 2788 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2789 unsigned &RegNum, unsigned &RegWidth, 2790 bool RestoreOnFailure /*=false*/) { 2791 Reg = AMDGPU::NoRegister; 2792 2793 SmallVector<AsmToken, 1> Tokens; 2794 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2795 if (RestoreOnFailure) { 2796 while (!Tokens.empty()) { 2797 getLexer().UnLex(Tokens.pop_back_val()); 2798 } 2799 } 2800 return true; 2801 } 2802 return false; 2803 } 2804 2805 Optional<StringRef> 2806 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2807 switch (RegKind) { 2808 case IS_VGPR: 2809 return StringRef(".amdgcn.next_free_vgpr"); 2810 case IS_SGPR: 2811 return StringRef(".amdgcn.next_free_sgpr"); 2812 default: 2813 return None; 2814 } 2815 } 2816 2817 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2818 auto SymbolName = getGprCountSymbolName(RegKind); 2819 assert(SymbolName && "initializing invalid register kind"); 2820 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2821 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2822 } 2823 2824 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2825 unsigned DwordRegIndex, 2826 unsigned RegWidth) { 2827 // Symbols are only defined for GCN targets 2828 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2829 return true; 2830 2831 auto SymbolName = getGprCountSymbolName(RegKind); 2832 if (!SymbolName) 2833 return true; 2834 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2835 2836 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2837 int64_t OldCount; 2838 2839 if (!Sym->isVariable()) 2840 return !Error(getLoc(), 2841 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2842 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2843 return !Error( 2844 getLoc(), 2845 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2846 2847 if (OldCount <= NewMax) 2848 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2849 2850 return true; 2851 } 2852 2853 std::unique_ptr<AMDGPUOperand> 2854 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2855 const auto &Tok = getToken(); 2856 SMLoc StartLoc = Tok.getLoc(); 2857 SMLoc EndLoc = Tok.getEndLoc(); 2858 RegisterKind RegKind; 2859 unsigned Reg, RegNum, RegWidth; 2860 2861 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2862 return nullptr; 2863 } 2864 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2865 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2866 return nullptr; 2867 } else 2868 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2869 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2870 } 2871 2872 OperandMatchResultTy 2873 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2874 // TODO: add syntactic sugar for 1/(2*PI) 2875 2876 assert(!isRegister()); 2877 assert(!isModifier()); 2878 2879 const auto& Tok = getToken(); 2880 const auto& NextTok = peekToken(); 2881 bool IsReal = Tok.is(AsmToken::Real); 2882 SMLoc S = getLoc(); 2883 bool Negate = false; 2884 2885 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2886 lex(); 2887 IsReal = true; 2888 Negate = true; 2889 } 2890 2891 if (IsReal) { 2892 // Floating-point expressions are not supported. 2893 // Can only allow floating-point literals with an 2894 // optional sign. 2895 2896 StringRef Num = getTokenStr(); 2897 lex(); 2898 2899 APFloat RealVal(APFloat::IEEEdouble()); 2900 auto roundMode = APFloat::rmNearestTiesToEven; 2901 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2902 return MatchOperand_ParseFail; 2903 } 2904 if (Negate) 2905 RealVal.changeSign(); 2906 2907 Operands.push_back( 2908 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2909 AMDGPUOperand::ImmTyNone, true)); 2910 2911 return MatchOperand_Success; 2912 2913 } else { 2914 int64_t IntVal; 2915 const MCExpr *Expr; 2916 SMLoc S = getLoc(); 2917 2918 if (HasSP3AbsModifier) { 2919 // This is a workaround for handling expressions 2920 // as arguments of SP3 'abs' modifier, for example: 2921 // |1.0| 2922 // |-1| 2923 // |1+x| 2924 // This syntax is not compatible with syntax of standard 2925 // MC expressions (due to the trailing '|'). 2926 SMLoc EndLoc; 2927 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2928 return MatchOperand_ParseFail; 2929 } else { 2930 if (Parser.parseExpression(Expr)) 2931 return MatchOperand_ParseFail; 2932 } 2933 2934 if (Expr->evaluateAsAbsolute(IntVal)) { 2935 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2936 } else { 2937 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2938 } 2939 2940 return MatchOperand_Success; 2941 } 2942 2943 return MatchOperand_NoMatch; 2944 } 2945 2946 OperandMatchResultTy 2947 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2948 if (!isRegister()) 2949 return MatchOperand_NoMatch; 2950 2951 if (auto R = parseRegister()) { 2952 assert(R->isReg()); 2953 Operands.push_back(std::move(R)); 2954 return MatchOperand_Success; 2955 } 2956 return MatchOperand_ParseFail; 2957 } 2958 2959 OperandMatchResultTy 2960 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2961 auto res = parseReg(Operands); 2962 if (res != MatchOperand_NoMatch) { 2963 return res; 2964 } else if (isModifier()) { 2965 return MatchOperand_NoMatch; 2966 } else { 2967 return parseImm(Operands, HasSP3AbsMod); 2968 } 2969 } 2970 2971 bool 2972 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2973 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2974 const auto &str = Token.getString(); 2975 return str == "abs" || str == "neg" || str == "sext"; 2976 } 2977 return false; 2978 } 2979 2980 bool 2981 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2982 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2983 } 2984 2985 bool 2986 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2987 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2988 } 2989 2990 bool 2991 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2992 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2993 } 2994 2995 // Check if this is an operand modifier or an opcode modifier 2996 // which may look like an expression but it is not. We should 2997 // avoid parsing these modifiers as expressions. Currently 2998 // recognized sequences are: 2999 // |...| 3000 // abs(...) 3001 // neg(...) 3002 // sext(...) 3003 // -reg 3004 // -|...| 3005 // -abs(...) 3006 // name:... 3007 // Note that simple opcode modifiers like 'gds' may be parsed as 3008 // expressions; this is a special case. See getExpressionAsToken. 3009 // 3010 bool 3011 AMDGPUAsmParser::isModifier() { 3012 3013 AsmToken Tok = getToken(); 3014 AsmToken NextToken[2]; 3015 peekTokens(NextToken); 3016 3017 return isOperandModifier(Tok, NextToken[0]) || 3018 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3019 isOpcodeModifierWithVal(Tok, NextToken[0]); 3020 } 3021 3022 // Check if the current token is an SP3 'neg' modifier. 3023 // Currently this modifier is allowed in the following context: 3024 // 3025 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3026 // 2. Before an 'abs' modifier: -abs(...) 3027 // 3. Before an SP3 'abs' modifier: -|...| 3028 // 3029 // In all other cases "-" is handled as a part 3030 // of an expression that follows the sign. 3031 // 3032 // Note: When "-" is followed by an integer literal, 3033 // this is interpreted as integer negation rather 3034 // than a floating-point NEG modifier applied to N. 3035 // Beside being contr-intuitive, such use of floating-point 3036 // NEG modifier would have resulted in different meaning 3037 // of integer literals used with VOP1/2/C and VOP3, 3038 // for example: 3039 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3040 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3041 // Negative fp literals with preceding "-" are 3042 // handled likewise for uniformity 3043 // 3044 bool 3045 AMDGPUAsmParser::parseSP3NegModifier() { 3046 3047 AsmToken NextToken[2]; 3048 peekTokens(NextToken); 3049 3050 if (isToken(AsmToken::Minus) && 3051 (isRegister(NextToken[0], NextToken[1]) || 3052 NextToken[0].is(AsmToken::Pipe) || 3053 isId(NextToken[0], "abs"))) { 3054 lex(); 3055 return true; 3056 } 3057 3058 return false; 3059 } 3060 3061 OperandMatchResultTy 3062 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3063 bool AllowImm) { 3064 bool Neg, SP3Neg; 3065 bool Abs, SP3Abs; 3066 SMLoc Loc; 3067 3068 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3069 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3070 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3071 return MatchOperand_ParseFail; 3072 } 3073 3074 SP3Neg = parseSP3NegModifier(); 3075 3076 Loc = getLoc(); 3077 Neg = trySkipId("neg"); 3078 if (Neg && SP3Neg) { 3079 Error(Loc, "expected register or immediate"); 3080 return MatchOperand_ParseFail; 3081 } 3082 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3083 return MatchOperand_ParseFail; 3084 3085 Abs = trySkipId("abs"); 3086 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3087 return MatchOperand_ParseFail; 3088 3089 Loc = getLoc(); 3090 SP3Abs = trySkipToken(AsmToken::Pipe); 3091 if (Abs && SP3Abs) { 3092 Error(Loc, "expected register or immediate"); 3093 return MatchOperand_ParseFail; 3094 } 3095 3096 OperandMatchResultTy Res; 3097 if (AllowImm) { 3098 Res = parseRegOrImm(Operands, SP3Abs); 3099 } else { 3100 Res = parseReg(Operands); 3101 } 3102 if (Res != MatchOperand_Success) { 3103 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3104 } 3105 3106 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3107 return MatchOperand_ParseFail; 3108 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3109 return MatchOperand_ParseFail; 3110 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3111 return MatchOperand_ParseFail; 3112 3113 AMDGPUOperand::Modifiers Mods; 3114 Mods.Abs = Abs || SP3Abs; 3115 Mods.Neg = Neg || SP3Neg; 3116 3117 if (Mods.hasFPModifiers()) { 3118 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3119 if (Op.isExpr()) { 3120 Error(Op.getStartLoc(), "expected an absolute expression"); 3121 return MatchOperand_ParseFail; 3122 } 3123 Op.setModifiers(Mods); 3124 } 3125 return MatchOperand_Success; 3126 } 3127 3128 OperandMatchResultTy 3129 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3130 bool AllowImm) { 3131 bool Sext = trySkipId("sext"); 3132 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3133 return MatchOperand_ParseFail; 3134 3135 OperandMatchResultTy Res; 3136 if (AllowImm) { 3137 Res = parseRegOrImm(Operands); 3138 } else { 3139 Res = parseReg(Operands); 3140 } 3141 if (Res != MatchOperand_Success) { 3142 return Sext? MatchOperand_ParseFail : Res; 3143 } 3144 3145 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3146 return MatchOperand_ParseFail; 3147 3148 AMDGPUOperand::Modifiers Mods; 3149 Mods.Sext = Sext; 3150 3151 if (Mods.hasIntModifiers()) { 3152 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3153 if (Op.isExpr()) { 3154 Error(Op.getStartLoc(), "expected an absolute expression"); 3155 return MatchOperand_ParseFail; 3156 } 3157 Op.setModifiers(Mods); 3158 } 3159 3160 return MatchOperand_Success; 3161 } 3162 3163 OperandMatchResultTy 3164 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3165 return parseRegOrImmWithFPInputMods(Operands, false); 3166 } 3167 3168 OperandMatchResultTy 3169 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3170 return parseRegOrImmWithIntInputMods(Operands, false); 3171 } 3172 3173 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3174 auto Loc = getLoc(); 3175 if (trySkipId("off")) { 3176 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3177 AMDGPUOperand::ImmTyOff, false)); 3178 return MatchOperand_Success; 3179 } 3180 3181 if (!isRegister()) 3182 return MatchOperand_NoMatch; 3183 3184 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3185 if (Reg) { 3186 Operands.push_back(std::move(Reg)); 3187 return MatchOperand_Success; 3188 } 3189 3190 return MatchOperand_ParseFail; 3191 3192 } 3193 3194 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3195 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3196 3197 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3198 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3199 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3200 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3201 return Match_InvalidOperand; 3202 3203 if ((TSFlags & SIInstrFlags::VOP3) && 3204 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3205 getForcedEncodingSize() != 64) 3206 return Match_PreferE32; 3207 3208 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3209 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3210 // v_mac_f32/16 allow only dst_sel == DWORD; 3211 auto OpNum = 3212 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3213 const auto &Op = Inst.getOperand(OpNum); 3214 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3215 return Match_InvalidOperand; 3216 } 3217 } 3218 3219 return Match_Success; 3220 } 3221 3222 static ArrayRef<unsigned> getAllVariants() { 3223 static const unsigned Variants[] = { 3224 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3225 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3226 }; 3227 3228 return makeArrayRef(Variants); 3229 } 3230 3231 // What asm variants we should check 3232 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3233 if (getForcedEncodingSize() == 32) { 3234 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3235 return makeArrayRef(Variants); 3236 } 3237 3238 if (isForcedVOP3()) { 3239 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3240 return makeArrayRef(Variants); 3241 } 3242 3243 if (isForcedSDWA()) { 3244 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3245 AMDGPUAsmVariants::SDWA9}; 3246 return makeArrayRef(Variants); 3247 } 3248 3249 if (isForcedDPP()) { 3250 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3251 return makeArrayRef(Variants); 3252 } 3253 3254 return getAllVariants(); 3255 } 3256 3257 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3258 if (getForcedEncodingSize() == 32) 3259 return "e32"; 3260 3261 if (isForcedVOP3()) 3262 return "e64"; 3263 3264 if (isForcedSDWA()) 3265 return "sdwa"; 3266 3267 if (isForcedDPP()) 3268 return "dpp"; 3269 3270 return ""; 3271 } 3272 3273 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3274 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3275 const unsigned Num = Desc.getNumImplicitUses(); 3276 for (unsigned i = 0; i < Num; ++i) { 3277 unsigned Reg = Desc.ImplicitUses[i]; 3278 switch (Reg) { 3279 case AMDGPU::FLAT_SCR: 3280 case AMDGPU::VCC: 3281 case AMDGPU::VCC_LO: 3282 case AMDGPU::VCC_HI: 3283 case AMDGPU::M0: 3284 return Reg; 3285 default: 3286 break; 3287 } 3288 } 3289 return AMDGPU::NoRegister; 3290 } 3291 3292 // NB: This code is correct only when used to check constant 3293 // bus limitations because GFX7 support no f16 inline constants. 3294 // Note that there are no cases when a GFX7 opcode violates 3295 // constant bus limitations due to the use of an f16 constant. 3296 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3297 unsigned OpIdx) const { 3298 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3299 3300 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3301 return false; 3302 } 3303 3304 const MCOperand &MO = Inst.getOperand(OpIdx); 3305 3306 int64_t Val = MO.getImm(); 3307 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3308 3309 switch (OpSize) { // expected operand size 3310 case 8: 3311 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3312 case 4: 3313 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3314 case 2: { 3315 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3316 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3317 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3318 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3319 return AMDGPU::isInlinableIntLiteral(Val); 3320 3321 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3322 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3323 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3324 return AMDGPU::isInlinableIntLiteralV216(Val); 3325 3326 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3327 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3328 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3329 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3330 3331 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3332 } 3333 default: 3334 llvm_unreachable("invalid operand size"); 3335 } 3336 } 3337 3338 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3339 if (!isGFX10Plus()) 3340 return 1; 3341 3342 switch (Opcode) { 3343 // 64-bit shift instructions can use only one scalar value input 3344 case AMDGPU::V_LSHLREV_B64_e64: 3345 case AMDGPU::V_LSHLREV_B64_gfx10: 3346 case AMDGPU::V_LSHRREV_B64_e64: 3347 case AMDGPU::V_LSHRREV_B64_gfx10: 3348 case AMDGPU::V_ASHRREV_I64_e64: 3349 case AMDGPU::V_ASHRREV_I64_gfx10: 3350 case AMDGPU::V_LSHL_B64_e64: 3351 case AMDGPU::V_LSHR_B64_e64: 3352 case AMDGPU::V_ASHR_I64_e64: 3353 return 1; 3354 default: 3355 return 2; 3356 } 3357 } 3358 3359 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3360 const MCOperand &MO = Inst.getOperand(OpIdx); 3361 if (MO.isImm()) { 3362 return !isInlineConstant(Inst, OpIdx); 3363 } else if (MO.isReg()) { 3364 auto Reg = MO.getReg(); 3365 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3366 auto PReg = mc2PseudoReg(Reg); 3367 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3368 } else { 3369 return true; 3370 } 3371 } 3372 3373 bool 3374 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3375 const OperandVector &Operands) { 3376 const unsigned Opcode = Inst.getOpcode(); 3377 const MCInstrDesc &Desc = MII.get(Opcode); 3378 unsigned LastSGPR = AMDGPU::NoRegister; 3379 unsigned ConstantBusUseCount = 0; 3380 unsigned NumLiterals = 0; 3381 unsigned LiteralSize; 3382 3383 if (Desc.TSFlags & 3384 (SIInstrFlags::VOPC | 3385 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3386 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3387 SIInstrFlags::SDWA)) { 3388 // Check special imm operands (used by madmk, etc) 3389 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3390 ++NumLiterals; 3391 LiteralSize = 4; 3392 } 3393 3394 SmallDenseSet<unsigned> SGPRsUsed; 3395 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3396 if (SGPRUsed != AMDGPU::NoRegister) { 3397 SGPRsUsed.insert(SGPRUsed); 3398 ++ConstantBusUseCount; 3399 } 3400 3401 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3402 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3403 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3404 3405 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3406 3407 for (int OpIdx : OpIndices) { 3408 if (OpIdx == -1) break; 3409 3410 const MCOperand &MO = Inst.getOperand(OpIdx); 3411 if (usesConstantBus(Inst, OpIdx)) { 3412 if (MO.isReg()) { 3413 LastSGPR = mc2PseudoReg(MO.getReg()); 3414 // Pairs of registers with a partial intersections like these 3415 // s0, s[0:1] 3416 // flat_scratch_lo, flat_scratch 3417 // flat_scratch_lo, flat_scratch_hi 3418 // are theoretically valid but they are disabled anyway. 3419 // Note that this code mimics SIInstrInfo::verifyInstruction 3420 if (!SGPRsUsed.count(LastSGPR)) { 3421 SGPRsUsed.insert(LastSGPR); 3422 ++ConstantBusUseCount; 3423 } 3424 } else { // Expression or a literal 3425 3426 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3427 continue; // special operand like VINTERP attr_chan 3428 3429 // An instruction may use only one literal. 3430 // This has been validated on the previous step. 3431 // See validateVOPLiteral. 3432 // This literal may be used as more than one operand. 3433 // If all these operands are of the same size, 3434 // this literal counts as one scalar value. 3435 // Otherwise it counts as 2 scalar values. 3436 // See "GFX10 Shader Programming", section 3.6.2.3. 3437 3438 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3439 if (Size < 4) Size = 4; 3440 3441 if (NumLiterals == 0) { 3442 NumLiterals = 1; 3443 LiteralSize = Size; 3444 } else if (LiteralSize != Size) { 3445 NumLiterals = 2; 3446 } 3447 } 3448 } 3449 } 3450 } 3451 ConstantBusUseCount += NumLiterals; 3452 3453 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3454 return true; 3455 3456 SMLoc LitLoc = getLitLoc(Operands); 3457 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3458 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3459 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3460 return false; 3461 } 3462 3463 bool 3464 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3465 const OperandVector &Operands) { 3466 const unsigned Opcode = Inst.getOpcode(); 3467 const MCInstrDesc &Desc = MII.get(Opcode); 3468 3469 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3470 if (DstIdx == -1 || 3471 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3472 return true; 3473 } 3474 3475 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3476 3477 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3478 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3479 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3480 3481 assert(DstIdx != -1); 3482 const MCOperand &Dst = Inst.getOperand(DstIdx); 3483 assert(Dst.isReg()); 3484 3485 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3486 3487 for (int SrcIdx : SrcIndices) { 3488 if (SrcIdx == -1) break; 3489 const MCOperand &Src = Inst.getOperand(SrcIdx); 3490 if (Src.isReg()) { 3491 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3492 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3493 Error(getRegLoc(SrcReg, Operands), 3494 "destination must be different than all sources"); 3495 return false; 3496 } 3497 } 3498 } 3499 3500 return true; 3501 } 3502 3503 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3504 3505 const unsigned Opc = Inst.getOpcode(); 3506 const MCInstrDesc &Desc = MII.get(Opc); 3507 3508 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3509 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3510 assert(ClampIdx != -1); 3511 return Inst.getOperand(ClampIdx).getImm() == 0; 3512 } 3513 3514 return true; 3515 } 3516 3517 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3518 3519 const unsigned Opc = Inst.getOpcode(); 3520 const MCInstrDesc &Desc = MII.get(Opc); 3521 3522 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3523 return None; 3524 3525 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3526 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3527 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3528 3529 assert(VDataIdx != -1); 3530 3531 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3532 return None; 3533 3534 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3535 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3536 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3537 if (DMask == 0) 3538 DMask = 1; 3539 3540 bool isPackedD16 = false; 3541 unsigned DataSize = 3542 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3543 if (hasPackedD16()) { 3544 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3545 isPackedD16 = D16Idx >= 0; 3546 if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) 3547 DataSize = (DataSize + 1) / 2; 3548 } 3549 3550 if ((VDataSize / 4) == DataSize + TFESize) 3551 return None; 3552 3553 return StringRef(isPackedD16 3554 ? "image data size does not match dmask, d16 and tfe" 3555 : "image data size does not match dmask and tfe"); 3556 } 3557 3558 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3559 const unsigned Opc = Inst.getOpcode(); 3560 const MCInstrDesc &Desc = MII.get(Opc); 3561 3562 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3563 return true; 3564 3565 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3566 3567 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3568 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3569 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3570 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3571 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3572 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3573 3574 assert(VAddr0Idx != -1); 3575 assert(SrsrcIdx != -1); 3576 assert(SrsrcIdx > VAddr0Idx); 3577 3578 if (DimIdx == -1) 3579 return true; // intersect_ray 3580 3581 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3582 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3583 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3584 unsigned ActualAddrSize = 3585 IsNSA ? SrsrcIdx - VAddr0Idx 3586 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3587 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3588 3589 unsigned ExpectedAddrSize = 3590 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3591 3592 if (!IsNSA) { 3593 if (ExpectedAddrSize > 8) 3594 ExpectedAddrSize = 16; 3595 3596 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3597 // This provides backward compatibility for assembly created 3598 // before 160b/192b/224b types were directly supported. 3599 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3600 return true; 3601 } 3602 3603 return ActualAddrSize == ExpectedAddrSize; 3604 } 3605 3606 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3607 3608 const unsigned Opc = Inst.getOpcode(); 3609 const MCInstrDesc &Desc = MII.get(Opc); 3610 3611 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3612 return true; 3613 if (!Desc.mayLoad() || !Desc.mayStore()) 3614 return true; // Not atomic 3615 3616 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3617 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3618 3619 // This is an incomplete check because image_atomic_cmpswap 3620 // may only use 0x3 and 0xf while other atomic operations 3621 // may use 0x1 and 0x3. However these limitations are 3622 // verified when we check that dmask matches dst size. 3623 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3624 } 3625 3626 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3627 3628 const unsigned Opc = Inst.getOpcode(); 3629 const MCInstrDesc &Desc = MII.get(Opc); 3630 3631 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3632 return true; 3633 3634 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3635 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3636 3637 // GATHER4 instructions use dmask in a different fashion compared to 3638 // other MIMG instructions. The only useful DMASK values are 3639 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3640 // (red,red,red,red) etc.) The ISA document doesn't mention 3641 // this. 3642 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3643 } 3644 3645 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3646 const unsigned Opc = Inst.getOpcode(); 3647 const MCInstrDesc &Desc = MII.get(Opc); 3648 3649 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3650 return true; 3651 3652 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3653 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3654 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3655 3656 if (!BaseOpcode->MSAA) 3657 return true; 3658 3659 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3660 assert(DimIdx != -1); 3661 3662 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3663 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3664 3665 return DimInfo->MSAA; 3666 } 3667 3668 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3669 { 3670 switch (Opcode) { 3671 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3672 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3673 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3674 return true; 3675 default: 3676 return false; 3677 } 3678 } 3679 3680 // movrels* opcodes should only allow VGPRS as src0. 3681 // This is specified in .td description for vop1/vop3, 3682 // but sdwa is handled differently. See isSDWAOperand. 3683 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3684 const OperandVector &Operands) { 3685 3686 const unsigned Opc = Inst.getOpcode(); 3687 const MCInstrDesc &Desc = MII.get(Opc); 3688 3689 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3690 return true; 3691 3692 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3693 assert(Src0Idx != -1); 3694 3695 SMLoc ErrLoc; 3696 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3697 if (Src0.isReg()) { 3698 auto Reg = mc2PseudoReg(Src0.getReg()); 3699 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3700 if (!isSGPR(Reg, TRI)) 3701 return true; 3702 ErrLoc = getRegLoc(Reg, Operands); 3703 } else { 3704 ErrLoc = getConstLoc(Operands); 3705 } 3706 3707 Error(ErrLoc, "source operand must be a VGPR"); 3708 return false; 3709 } 3710 3711 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3712 const OperandVector &Operands) { 3713 3714 const unsigned Opc = Inst.getOpcode(); 3715 3716 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3717 return true; 3718 3719 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3720 assert(Src0Idx != -1); 3721 3722 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3723 if (!Src0.isReg()) 3724 return true; 3725 3726 auto Reg = mc2PseudoReg(Src0.getReg()); 3727 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3728 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3729 Error(getRegLoc(Reg, Operands), 3730 "source operand must be either a VGPR or an inline constant"); 3731 return false; 3732 } 3733 3734 return true; 3735 } 3736 3737 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3738 const OperandVector &Operands) { 3739 const unsigned Opc = Inst.getOpcode(); 3740 const MCInstrDesc &Desc = MII.get(Opc); 3741 3742 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3743 return true; 3744 3745 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3746 if (Src2Idx == -1) 3747 return true; 3748 3749 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3750 if (!Src2.isReg()) 3751 return true; 3752 3753 MCRegister Src2Reg = Src2.getReg(); 3754 MCRegister DstReg = Inst.getOperand(0).getReg(); 3755 if (Src2Reg == DstReg) 3756 return true; 3757 3758 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3759 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3760 return true; 3761 3762 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3763 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3764 "source 2 operand must not partially overlap with dst"); 3765 return false; 3766 } 3767 3768 return true; 3769 } 3770 3771 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3772 switch (Inst.getOpcode()) { 3773 default: 3774 return true; 3775 case V_DIV_SCALE_F32_gfx6_gfx7: 3776 case V_DIV_SCALE_F32_vi: 3777 case V_DIV_SCALE_F32_gfx10: 3778 case V_DIV_SCALE_F64_gfx6_gfx7: 3779 case V_DIV_SCALE_F64_vi: 3780 case V_DIV_SCALE_F64_gfx10: 3781 break; 3782 } 3783 3784 // TODO: Check that src0 = src1 or src2. 3785 3786 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3787 AMDGPU::OpName::src2_modifiers, 3788 AMDGPU::OpName::src2_modifiers}) { 3789 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3790 .getImm() & 3791 SISrcMods::ABS) { 3792 return false; 3793 } 3794 } 3795 3796 return true; 3797 } 3798 3799 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3800 3801 const unsigned Opc = Inst.getOpcode(); 3802 const MCInstrDesc &Desc = MII.get(Opc); 3803 3804 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3805 return true; 3806 3807 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3808 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3809 if (isCI() || isSI()) 3810 return false; 3811 } 3812 3813 return true; 3814 } 3815 3816 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3817 const unsigned Opc = Inst.getOpcode(); 3818 const MCInstrDesc &Desc = MII.get(Opc); 3819 3820 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3821 return true; 3822 3823 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3824 if (DimIdx < 0) 3825 return true; 3826 3827 long Imm = Inst.getOperand(DimIdx).getImm(); 3828 if (Imm < 0 || Imm >= 8) 3829 return false; 3830 3831 return true; 3832 } 3833 3834 static bool IsRevOpcode(const unsigned Opcode) 3835 { 3836 switch (Opcode) { 3837 case AMDGPU::V_SUBREV_F32_e32: 3838 case AMDGPU::V_SUBREV_F32_e64: 3839 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3840 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3841 case AMDGPU::V_SUBREV_F32_e32_vi: 3842 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3843 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3844 case AMDGPU::V_SUBREV_F32_e64_vi: 3845 3846 case AMDGPU::V_SUBREV_CO_U32_e32: 3847 case AMDGPU::V_SUBREV_CO_U32_e64: 3848 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3849 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3850 3851 case AMDGPU::V_SUBBREV_U32_e32: 3852 case AMDGPU::V_SUBBREV_U32_e64: 3853 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3854 case AMDGPU::V_SUBBREV_U32_e32_vi: 3855 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3856 case AMDGPU::V_SUBBREV_U32_e64_vi: 3857 3858 case AMDGPU::V_SUBREV_U32_e32: 3859 case AMDGPU::V_SUBREV_U32_e64: 3860 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3861 case AMDGPU::V_SUBREV_U32_e32_vi: 3862 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3863 case AMDGPU::V_SUBREV_U32_e64_vi: 3864 3865 case AMDGPU::V_SUBREV_F16_e32: 3866 case AMDGPU::V_SUBREV_F16_e64: 3867 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3868 case AMDGPU::V_SUBREV_F16_e32_vi: 3869 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3870 case AMDGPU::V_SUBREV_F16_e64_vi: 3871 3872 case AMDGPU::V_SUBREV_U16_e32: 3873 case AMDGPU::V_SUBREV_U16_e64: 3874 case AMDGPU::V_SUBREV_U16_e32_vi: 3875 case AMDGPU::V_SUBREV_U16_e64_vi: 3876 3877 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3878 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3879 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3880 3881 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3882 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3883 3884 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3885 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3886 3887 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3888 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3889 3890 case AMDGPU::V_LSHRREV_B32_e32: 3891 case AMDGPU::V_LSHRREV_B32_e64: 3892 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3893 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3894 case AMDGPU::V_LSHRREV_B32_e32_vi: 3895 case AMDGPU::V_LSHRREV_B32_e64_vi: 3896 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3897 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3898 3899 case AMDGPU::V_ASHRREV_I32_e32: 3900 case AMDGPU::V_ASHRREV_I32_e64: 3901 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3902 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3903 case AMDGPU::V_ASHRREV_I32_e32_vi: 3904 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3905 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3906 case AMDGPU::V_ASHRREV_I32_e64_vi: 3907 3908 case AMDGPU::V_LSHLREV_B32_e32: 3909 case AMDGPU::V_LSHLREV_B32_e64: 3910 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3911 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3912 case AMDGPU::V_LSHLREV_B32_e32_vi: 3913 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3914 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3915 case AMDGPU::V_LSHLREV_B32_e64_vi: 3916 3917 case AMDGPU::V_LSHLREV_B16_e32: 3918 case AMDGPU::V_LSHLREV_B16_e64: 3919 case AMDGPU::V_LSHLREV_B16_e32_vi: 3920 case AMDGPU::V_LSHLREV_B16_e64_vi: 3921 case AMDGPU::V_LSHLREV_B16_gfx10: 3922 3923 case AMDGPU::V_LSHRREV_B16_e32: 3924 case AMDGPU::V_LSHRREV_B16_e64: 3925 case AMDGPU::V_LSHRREV_B16_e32_vi: 3926 case AMDGPU::V_LSHRREV_B16_e64_vi: 3927 case AMDGPU::V_LSHRREV_B16_gfx10: 3928 3929 case AMDGPU::V_ASHRREV_I16_e32: 3930 case AMDGPU::V_ASHRREV_I16_e64: 3931 case AMDGPU::V_ASHRREV_I16_e32_vi: 3932 case AMDGPU::V_ASHRREV_I16_e64_vi: 3933 case AMDGPU::V_ASHRREV_I16_gfx10: 3934 3935 case AMDGPU::V_LSHLREV_B64_e64: 3936 case AMDGPU::V_LSHLREV_B64_gfx10: 3937 case AMDGPU::V_LSHLREV_B64_vi: 3938 3939 case AMDGPU::V_LSHRREV_B64_e64: 3940 case AMDGPU::V_LSHRREV_B64_gfx10: 3941 case AMDGPU::V_LSHRREV_B64_vi: 3942 3943 case AMDGPU::V_ASHRREV_I64_e64: 3944 case AMDGPU::V_ASHRREV_I64_gfx10: 3945 case AMDGPU::V_ASHRREV_I64_vi: 3946 3947 case AMDGPU::V_PK_LSHLREV_B16: 3948 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3949 case AMDGPU::V_PK_LSHLREV_B16_vi: 3950 3951 case AMDGPU::V_PK_LSHRREV_B16: 3952 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3953 case AMDGPU::V_PK_LSHRREV_B16_vi: 3954 case AMDGPU::V_PK_ASHRREV_I16: 3955 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3956 case AMDGPU::V_PK_ASHRREV_I16_vi: 3957 return true; 3958 default: 3959 return false; 3960 } 3961 } 3962 3963 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3964 3965 using namespace SIInstrFlags; 3966 const unsigned Opcode = Inst.getOpcode(); 3967 const MCInstrDesc &Desc = MII.get(Opcode); 3968 3969 // lds_direct register is defined so that it can be used 3970 // with 9-bit operands only. Ignore encodings which do not accept these. 3971 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3972 if ((Desc.TSFlags & Enc) == 0) 3973 return None; 3974 3975 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3976 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3977 if (SrcIdx == -1) 3978 break; 3979 const auto &Src = Inst.getOperand(SrcIdx); 3980 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3981 3982 if (isGFX90A() || isGFX11Plus()) 3983 return StringRef("lds_direct is not supported on this GPU"); 3984 3985 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3986 return StringRef("lds_direct cannot be used with this instruction"); 3987 3988 if (SrcName != OpName::src0) 3989 return StringRef("lds_direct may be used as src0 only"); 3990 } 3991 } 3992 3993 return None; 3994 } 3995 3996 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3997 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3998 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3999 if (Op.isFlatOffset()) 4000 return Op.getStartLoc(); 4001 } 4002 return getLoc(); 4003 } 4004 4005 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4006 const OperandVector &Operands) { 4007 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4008 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4009 return true; 4010 4011 auto Opcode = Inst.getOpcode(); 4012 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4013 assert(OpNum != -1); 4014 4015 const auto &Op = Inst.getOperand(OpNum); 4016 if (!hasFlatOffsets() && Op.getImm() != 0) { 4017 Error(getFlatOffsetLoc(Operands), 4018 "flat offset modifier is not supported on this GPU"); 4019 return false; 4020 } 4021 4022 // For FLAT segment the offset must be positive; 4023 // MSB is ignored and forced to zero. 4024 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 4025 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 4026 if (!isIntN(OffsetSize, Op.getImm())) { 4027 Error(getFlatOffsetLoc(Operands), 4028 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4029 return false; 4030 } 4031 } else { 4032 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 4033 if (!isUIntN(OffsetSize, Op.getImm())) { 4034 Error(getFlatOffsetLoc(Operands), 4035 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4036 return false; 4037 } 4038 } 4039 4040 return true; 4041 } 4042 4043 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4044 // Start with second operand because SMEM Offset cannot be dst or src0. 4045 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4046 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4047 if (Op.isSMEMOffset()) 4048 return Op.getStartLoc(); 4049 } 4050 return getLoc(); 4051 } 4052 4053 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4054 const OperandVector &Operands) { 4055 if (isCI() || isSI()) 4056 return true; 4057 4058 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4059 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4060 return true; 4061 4062 auto Opcode = Inst.getOpcode(); 4063 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4064 if (OpNum == -1) 4065 return true; 4066 4067 const auto &Op = Inst.getOperand(OpNum); 4068 if (!Op.isImm()) 4069 return true; 4070 4071 uint64_t Offset = Op.getImm(); 4072 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4073 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4074 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4075 return true; 4076 4077 Error(getSMEMOffsetLoc(Operands), 4078 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4079 "expected a 21-bit signed offset"); 4080 4081 return false; 4082 } 4083 4084 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4085 unsigned Opcode = Inst.getOpcode(); 4086 const MCInstrDesc &Desc = MII.get(Opcode); 4087 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4088 return true; 4089 4090 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4091 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4092 4093 const int OpIndices[] = { Src0Idx, Src1Idx }; 4094 4095 unsigned NumExprs = 0; 4096 unsigned NumLiterals = 0; 4097 uint32_t LiteralValue; 4098 4099 for (int OpIdx : OpIndices) { 4100 if (OpIdx == -1) break; 4101 4102 const MCOperand &MO = Inst.getOperand(OpIdx); 4103 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4104 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4105 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4106 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4107 if (NumLiterals == 0 || LiteralValue != Value) { 4108 LiteralValue = Value; 4109 ++NumLiterals; 4110 } 4111 } else if (MO.isExpr()) { 4112 ++NumExprs; 4113 } 4114 } 4115 } 4116 4117 return NumLiterals + NumExprs <= 1; 4118 } 4119 4120 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4121 const unsigned Opc = Inst.getOpcode(); 4122 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4123 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4124 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4125 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4126 4127 if (OpSel & ~3) 4128 return false; 4129 } 4130 4131 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4132 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4133 if (OpSelIdx != -1) { 4134 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4135 return false; 4136 } 4137 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4138 if (OpSelHiIdx != -1) { 4139 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4140 return false; 4141 } 4142 } 4143 4144 return true; 4145 } 4146 4147 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4148 const OperandVector &Operands) { 4149 const unsigned Opc = Inst.getOpcode(); 4150 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4151 if (DppCtrlIdx < 0) 4152 return true; 4153 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4154 4155 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4156 // DPP64 is supported for row_newbcast only. 4157 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4158 if (Src0Idx >= 0 && 4159 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4160 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4161 Error(S, "64 bit dpp only supports row_newbcast"); 4162 return false; 4163 } 4164 } 4165 4166 return true; 4167 } 4168 4169 // Check if VCC register matches wavefront size 4170 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4171 auto FB = getFeatureBits(); 4172 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4173 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4174 } 4175 4176 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4177 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4178 const OperandVector &Operands) { 4179 unsigned Opcode = Inst.getOpcode(); 4180 const MCInstrDesc &Desc = MII.get(Opcode); 4181 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4182 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4183 ImmIdx == -1) 4184 return true; 4185 4186 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4187 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4188 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4189 4190 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4191 4192 unsigned NumExprs = 0; 4193 unsigned NumLiterals = 0; 4194 uint32_t LiteralValue; 4195 4196 for (int OpIdx : OpIndices) { 4197 if (OpIdx == -1) 4198 continue; 4199 4200 const MCOperand &MO = Inst.getOperand(OpIdx); 4201 if (!MO.isImm() && !MO.isExpr()) 4202 continue; 4203 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4204 continue; 4205 4206 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4207 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4208 Error(getConstLoc(Operands), 4209 "inline constants are not allowed for this operand"); 4210 return false; 4211 } 4212 4213 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4214 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4215 if (NumLiterals == 0 || LiteralValue != Value) { 4216 LiteralValue = Value; 4217 ++NumLiterals; 4218 } 4219 } else if (MO.isExpr()) { 4220 ++NumExprs; 4221 } 4222 } 4223 NumLiterals += NumExprs; 4224 4225 if (!NumLiterals) 4226 return true; 4227 4228 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4229 Error(getLitLoc(Operands), "literal operands are not supported"); 4230 return false; 4231 } 4232 4233 if (NumLiterals > 1) { 4234 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4235 return false; 4236 } 4237 4238 return true; 4239 } 4240 4241 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4242 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4243 const MCRegisterInfo *MRI) { 4244 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4245 if (OpIdx < 0) 4246 return -1; 4247 4248 const MCOperand &Op = Inst.getOperand(OpIdx); 4249 if (!Op.isReg()) 4250 return -1; 4251 4252 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4253 auto Reg = Sub ? Sub : Op.getReg(); 4254 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4255 return AGPR32.contains(Reg) ? 1 : 0; 4256 } 4257 4258 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4259 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4260 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4261 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4262 SIInstrFlags::DS)) == 0) 4263 return true; 4264 4265 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4266 : AMDGPU::OpName::vdata; 4267 4268 const MCRegisterInfo *MRI = getMRI(); 4269 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4270 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4271 4272 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4273 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4274 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4275 return false; 4276 } 4277 4278 auto FB = getFeatureBits(); 4279 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4280 if (DataAreg < 0 || DstAreg < 0) 4281 return true; 4282 return DstAreg == DataAreg; 4283 } 4284 4285 return DstAreg < 1 && DataAreg < 1; 4286 } 4287 4288 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4289 auto FB = getFeatureBits(); 4290 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4291 return true; 4292 4293 const MCRegisterInfo *MRI = getMRI(); 4294 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4295 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4296 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4297 const MCOperand &Op = Inst.getOperand(I); 4298 if (!Op.isReg()) 4299 continue; 4300 4301 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4302 if (!Sub) 4303 continue; 4304 4305 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4306 return false; 4307 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4308 return false; 4309 } 4310 4311 return true; 4312 } 4313 4314 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4315 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4316 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4317 if (Op.isBLGP()) 4318 return Op.getStartLoc(); 4319 } 4320 return SMLoc(); 4321 } 4322 4323 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4324 const OperandVector &Operands) { 4325 unsigned Opc = Inst.getOpcode(); 4326 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4327 if (BlgpIdx == -1) 4328 return true; 4329 SMLoc BLGPLoc = getBLGPLoc(Operands); 4330 if (!BLGPLoc.isValid()) 4331 return true; 4332 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4333 auto FB = getFeatureBits(); 4334 bool UsesNeg = false; 4335 if (FB[AMDGPU::FeatureGFX940Insts]) { 4336 switch (Opc) { 4337 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4338 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4339 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4340 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4341 UsesNeg = true; 4342 } 4343 } 4344 4345 if (IsNeg == UsesNeg) 4346 return true; 4347 4348 Error(BLGPLoc, 4349 UsesNeg ? "invalid modifier: blgp is not supported" 4350 : "invalid modifier: neg is not supported"); 4351 4352 return false; 4353 } 4354 4355 // gfx90a has an undocumented limitation: 4356 // DS_GWS opcodes must use even aligned registers. 4357 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4358 const OperandVector &Operands) { 4359 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4360 return true; 4361 4362 int Opc = Inst.getOpcode(); 4363 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4364 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4365 return true; 4366 4367 const MCRegisterInfo *MRI = getMRI(); 4368 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4369 int Data0Pos = 4370 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4371 assert(Data0Pos != -1); 4372 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4373 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4374 if (RegIdx & 1) { 4375 SMLoc RegLoc = getRegLoc(Reg, Operands); 4376 Error(RegLoc, "vgpr must be even aligned"); 4377 return false; 4378 } 4379 4380 return true; 4381 } 4382 4383 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4384 const OperandVector &Operands, 4385 const SMLoc &IDLoc) { 4386 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4387 AMDGPU::OpName::cpol); 4388 if (CPolPos == -1) 4389 return true; 4390 4391 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4392 4393 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4394 if (TSFlags & SIInstrFlags::SMRD) { 4395 if (CPol && (isSI() || isCI())) { 4396 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4397 Error(S, "cache policy is not supported for SMRD instructions"); 4398 return false; 4399 } 4400 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4401 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4402 return false; 4403 } 4404 } 4405 4406 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4407 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4408 StringRef CStr(S.getPointer()); 4409 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4410 Error(S, "scc is not supported on this GPU"); 4411 return false; 4412 } 4413 4414 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4415 return true; 4416 4417 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4418 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4419 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4420 : "instruction must use glc"); 4421 return false; 4422 } 4423 } else { 4424 if (CPol & CPol::GLC) { 4425 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4426 StringRef CStr(S.getPointer()); 4427 S = SMLoc::getFromPointer( 4428 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4429 Error(S, isGFX940() ? "instruction must not use sc0" 4430 : "instruction must not use glc"); 4431 return false; 4432 } 4433 } 4434 4435 return true; 4436 } 4437 4438 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst, 4439 const OperandVector &Operands, 4440 const SMLoc &IDLoc) { 4441 if (isGFX940()) 4442 return true; 4443 4444 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4445 if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) != 4446 (SIInstrFlags::VALU | SIInstrFlags::FLAT)) 4447 return true; 4448 // This is FLAT LDS DMA. 4449 4450 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands); 4451 StringRef CStr(S.getPointer()); 4452 if (!CStr.startswith("lds")) { 4453 // This is incorrectly selected LDS DMA version of a FLAT load opcode. 4454 // And LDS version should have 'lds' modifier, but it follows optional 4455 // operands so its absense is ignored by the matcher. 4456 Error(IDLoc, "invalid operands for instruction"); 4457 return false; 4458 } 4459 4460 return true; 4461 } 4462 4463 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4464 const SMLoc &IDLoc, 4465 const OperandVector &Operands) { 4466 if (auto ErrMsg = validateLdsDirect(Inst)) { 4467 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4468 return false; 4469 } 4470 if (!validateSOPLiteral(Inst)) { 4471 Error(getLitLoc(Operands), 4472 "only one literal operand is allowed"); 4473 return false; 4474 } 4475 if (!validateVOPLiteral(Inst, Operands)) { 4476 return false; 4477 } 4478 if (!validateConstantBusLimitations(Inst, Operands)) { 4479 return false; 4480 } 4481 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4482 return false; 4483 } 4484 if (!validateIntClampSupported(Inst)) { 4485 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4486 "integer clamping is not supported on this GPU"); 4487 return false; 4488 } 4489 if (!validateOpSel(Inst)) { 4490 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4491 "invalid op_sel operand"); 4492 return false; 4493 } 4494 if (!validateDPP(Inst, Operands)) { 4495 return false; 4496 } 4497 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4498 if (!validateMIMGD16(Inst)) { 4499 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4500 "d16 modifier is not supported on this GPU"); 4501 return false; 4502 } 4503 if (!validateMIMGDim(Inst)) { 4504 Error(IDLoc, "dim modifier is required on this GPU"); 4505 return false; 4506 } 4507 if (!validateMIMGMSAA(Inst)) { 4508 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4509 "invalid dim; must be MSAA type"); 4510 return false; 4511 } 4512 if (auto ErrMsg = validateMIMGDataSize(Inst)) { 4513 Error(IDLoc, *ErrMsg); 4514 return false; 4515 } 4516 if (!validateMIMGAddrSize(Inst)) { 4517 Error(IDLoc, 4518 "image address size does not match dim and a16"); 4519 return false; 4520 } 4521 if (!validateMIMGAtomicDMask(Inst)) { 4522 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4523 "invalid atomic image dmask"); 4524 return false; 4525 } 4526 if (!validateMIMGGatherDMask(Inst)) { 4527 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4528 "invalid image_gather dmask: only one bit must be set"); 4529 return false; 4530 } 4531 if (!validateMovrels(Inst, Operands)) { 4532 return false; 4533 } 4534 if (!validateFlatOffset(Inst, Operands)) { 4535 return false; 4536 } 4537 if (!validateSMEMOffset(Inst, Operands)) { 4538 return false; 4539 } 4540 if (!validateMAIAccWrite(Inst, Operands)) { 4541 return false; 4542 } 4543 if (!validateMFMA(Inst, Operands)) { 4544 return false; 4545 } 4546 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4547 return false; 4548 } 4549 4550 if (!validateAGPRLdSt(Inst)) { 4551 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4552 ? "invalid register class: data and dst should be all VGPR or AGPR" 4553 : "invalid register class: agpr loads and stores not supported on this GPU" 4554 ); 4555 return false; 4556 } 4557 if (!validateVGPRAlign(Inst)) { 4558 Error(IDLoc, 4559 "invalid register class: vgpr tuples must be 64 bit aligned"); 4560 return false; 4561 } 4562 if (!validateGWS(Inst, Operands)) { 4563 return false; 4564 } 4565 4566 if (!validateBLGP(Inst, Operands)) { 4567 return false; 4568 } 4569 4570 if (!validateDivScale(Inst)) { 4571 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4572 return false; 4573 } 4574 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4575 return false; 4576 } 4577 4578 if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) { 4579 return false; 4580 } 4581 4582 return true; 4583 } 4584 4585 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4586 const FeatureBitset &FBS, 4587 unsigned VariantID = 0); 4588 4589 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4590 const FeatureBitset &AvailableFeatures, 4591 unsigned VariantID); 4592 4593 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4594 const FeatureBitset &FBS) { 4595 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4596 } 4597 4598 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4599 const FeatureBitset &FBS, 4600 ArrayRef<unsigned> Variants) { 4601 for (auto Variant : Variants) { 4602 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4603 return true; 4604 } 4605 4606 return false; 4607 } 4608 4609 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4610 const SMLoc &IDLoc) { 4611 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4612 4613 // Check if requested instruction variant is supported. 4614 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4615 return false; 4616 4617 // This instruction is not supported. 4618 // Clear any other pending errors because they are no longer relevant. 4619 getParser().clearPendingErrors(); 4620 4621 // Requested instruction variant is not supported. 4622 // Check if any other variants are supported. 4623 StringRef VariantName = getMatchedVariantName(); 4624 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4625 return Error(IDLoc, 4626 Twine(VariantName, 4627 " variant of this instruction is not supported")); 4628 } 4629 4630 // Finally check if this instruction is supported on any other GPU. 4631 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4632 return Error(IDLoc, "instruction not supported on this GPU"); 4633 } 4634 4635 // Instruction not supported on any GPU. Probably a typo. 4636 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4637 return Error(IDLoc, "invalid instruction" + Suggestion); 4638 } 4639 4640 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4641 OperandVector &Operands, 4642 MCStreamer &Out, 4643 uint64_t &ErrorInfo, 4644 bool MatchingInlineAsm) { 4645 MCInst Inst; 4646 unsigned Result = Match_Success; 4647 for (auto Variant : getMatchedVariants()) { 4648 uint64_t EI; 4649 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4650 Variant); 4651 // We order match statuses from least to most specific. We use most specific 4652 // status as resulting 4653 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4654 if ((R == Match_Success) || 4655 (R == Match_PreferE32) || 4656 (R == Match_MissingFeature && Result != Match_PreferE32) || 4657 (R == Match_InvalidOperand && Result != Match_MissingFeature 4658 && Result != Match_PreferE32) || 4659 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4660 && Result != Match_MissingFeature 4661 && Result != Match_PreferE32)) { 4662 Result = R; 4663 ErrorInfo = EI; 4664 } 4665 if (R == Match_Success) 4666 break; 4667 } 4668 4669 if (Result == Match_Success) { 4670 if (!validateInstruction(Inst, IDLoc, Operands)) { 4671 return true; 4672 } 4673 Inst.setLoc(IDLoc); 4674 Out.emitInstruction(Inst, getSTI()); 4675 return false; 4676 } 4677 4678 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4679 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4680 return true; 4681 } 4682 4683 switch (Result) { 4684 default: break; 4685 case Match_MissingFeature: 4686 // It has been verified that the specified instruction 4687 // mnemonic is valid. A match was found but it requires 4688 // features which are not supported on this GPU. 4689 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4690 4691 case Match_InvalidOperand: { 4692 SMLoc ErrorLoc = IDLoc; 4693 if (ErrorInfo != ~0ULL) { 4694 if (ErrorInfo >= Operands.size()) { 4695 return Error(IDLoc, "too few operands for instruction"); 4696 } 4697 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4698 if (ErrorLoc == SMLoc()) 4699 ErrorLoc = IDLoc; 4700 } 4701 return Error(ErrorLoc, "invalid operand for instruction"); 4702 } 4703 4704 case Match_PreferE32: 4705 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4706 "should be encoded as e32"); 4707 case Match_MnemonicFail: 4708 llvm_unreachable("Invalid instructions should have been handled already"); 4709 } 4710 llvm_unreachable("Implement any new match types added!"); 4711 } 4712 4713 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4714 int64_t Tmp = -1; 4715 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4716 return true; 4717 } 4718 if (getParser().parseAbsoluteExpression(Tmp)) { 4719 return true; 4720 } 4721 Ret = static_cast<uint32_t>(Tmp); 4722 return false; 4723 } 4724 4725 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4726 uint32_t &Minor) { 4727 if (ParseAsAbsoluteExpression(Major)) 4728 return TokError("invalid major version"); 4729 4730 if (!trySkipToken(AsmToken::Comma)) 4731 return TokError("minor version number required, comma expected"); 4732 4733 if (ParseAsAbsoluteExpression(Minor)) 4734 return TokError("invalid minor version"); 4735 4736 return false; 4737 } 4738 4739 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4740 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4741 return TokError("directive only supported for amdgcn architecture"); 4742 4743 std::string TargetIDDirective; 4744 SMLoc TargetStart = getTok().getLoc(); 4745 if (getParser().parseEscapedString(TargetIDDirective)) 4746 return true; 4747 4748 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4749 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4750 return getParser().Error(TargetRange.Start, 4751 (Twine(".amdgcn_target directive's target id ") + 4752 Twine(TargetIDDirective) + 4753 Twine(" does not match the specified target id ") + 4754 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4755 4756 return false; 4757 } 4758 4759 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4760 return Error(Range.Start, "value out of range", Range); 4761 } 4762 4763 bool AMDGPUAsmParser::calculateGPRBlocks( 4764 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4765 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4766 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4767 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4768 // TODO(scott.linder): These calculations are duplicated from 4769 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4770 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4771 4772 unsigned NumVGPRs = NextFreeVGPR; 4773 unsigned NumSGPRs = NextFreeSGPR; 4774 4775 if (Version.Major >= 10) 4776 NumSGPRs = 0; 4777 else { 4778 unsigned MaxAddressableNumSGPRs = 4779 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4780 4781 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4782 NumSGPRs > MaxAddressableNumSGPRs) 4783 return OutOfRangeError(SGPRRange); 4784 4785 NumSGPRs += 4786 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4787 4788 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4789 NumSGPRs > MaxAddressableNumSGPRs) 4790 return OutOfRangeError(SGPRRange); 4791 4792 if (Features.test(FeatureSGPRInitBug)) 4793 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4794 } 4795 4796 VGPRBlocks = 4797 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4798 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4799 4800 return false; 4801 } 4802 4803 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4804 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4805 return TokError("directive only supported for amdgcn architecture"); 4806 4807 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4808 return TokError("directive only supported for amdhsa OS"); 4809 4810 StringRef KernelName; 4811 if (getParser().parseIdentifier(KernelName)) 4812 return true; 4813 4814 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4815 4816 StringSet<> Seen; 4817 4818 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4819 4820 SMRange VGPRRange; 4821 uint64_t NextFreeVGPR = 0; 4822 uint64_t AccumOffset = 0; 4823 uint64_t SharedVGPRCount = 0; 4824 SMRange SGPRRange; 4825 uint64_t NextFreeSGPR = 0; 4826 4827 // Count the number of user SGPRs implied from the enabled feature bits. 4828 unsigned ImpliedUserSGPRCount = 0; 4829 4830 // Track if the asm explicitly contains the directive for the user SGPR 4831 // count. 4832 Optional<unsigned> ExplicitUserSGPRCount; 4833 bool ReserveVCC = true; 4834 bool ReserveFlatScr = true; 4835 Optional<bool> EnableWavefrontSize32; 4836 4837 while (true) { 4838 while (trySkipToken(AsmToken::EndOfStatement)); 4839 4840 StringRef ID; 4841 SMRange IDRange = getTok().getLocRange(); 4842 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4843 return true; 4844 4845 if (ID == ".end_amdhsa_kernel") 4846 break; 4847 4848 if (Seen.find(ID) != Seen.end()) 4849 return TokError(".amdhsa_ directives cannot be repeated"); 4850 Seen.insert(ID); 4851 4852 SMLoc ValStart = getLoc(); 4853 int64_t IVal; 4854 if (getParser().parseAbsoluteExpression(IVal)) 4855 return true; 4856 SMLoc ValEnd = getLoc(); 4857 SMRange ValRange = SMRange(ValStart, ValEnd); 4858 4859 if (IVal < 0) 4860 return OutOfRangeError(ValRange); 4861 4862 uint64_t Val = IVal; 4863 4864 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4865 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4866 return OutOfRangeError(RANGE); \ 4867 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4868 4869 if (ID == ".amdhsa_group_segment_fixed_size") { 4870 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4871 return OutOfRangeError(ValRange); 4872 KD.group_segment_fixed_size = Val; 4873 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4874 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4875 return OutOfRangeError(ValRange); 4876 KD.private_segment_fixed_size = Val; 4877 } else if (ID == ".amdhsa_kernarg_size") { 4878 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4879 return OutOfRangeError(ValRange); 4880 KD.kernarg_size = Val; 4881 } else if (ID == ".amdhsa_user_sgpr_count") { 4882 ExplicitUserSGPRCount = Val; 4883 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4884 if (hasArchitectedFlatScratch()) 4885 return Error(IDRange.Start, 4886 "directive is not supported with architected flat scratch", 4887 IDRange); 4888 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4889 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4890 Val, ValRange); 4891 if (Val) 4892 ImpliedUserSGPRCount += 4; 4893 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4894 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4895 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4896 ValRange); 4897 if (Val) 4898 ImpliedUserSGPRCount += 2; 4899 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4900 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4901 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4902 ValRange); 4903 if (Val) 4904 ImpliedUserSGPRCount += 2; 4905 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4906 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4907 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4908 Val, ValRange); 4909 if (Val) 4910 ImpliedUserSGPRCount += 2; 4911 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4912 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4913 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4914 ValRange); 4915 if (Val) 4916 ImpliedUserSGPRCount += 2; 4917 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4918 if (hasArchitectedFlatScratch()) 4919 return Error(IDRange.Start, 4920 "directive is not supported with architected flat scratch", 4921 IDRange); 4922 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4923 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4924 ValRange); 4925 if (Val) 4926 ImpliedUserSGPRCount += 2; 4927 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4928 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4929 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4930 Val, ValRange); 4931 if (Val) 4932 ImpliedUserSGPRCount += 1; 4933 } else if (ID == ".amdhsa_wavefront_size32") { 4934 if (IVersion.Major < 10) 4935 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4936 EnableWavefrontSize32 = Val; 4937 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4938 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4939 Val, ValRange); 4940 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4941 if (hasArchitectedFlatScratch()) 4942 return Error(IDRange.Start, 4943 "directive is not supported with architected flat scratch", 4944 IDRange); 4945 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4946 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4947 } else if (ID == ".amdhsa_enable_private_segment") { 4948 if (!hasArchitectedFlatScratch()) 4949 return Error( 4950 IDRange.Start, 4951 "directive is not supported without architected flat scratch", 4952 IDRange); 4953 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4954 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4955 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4956 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4957 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4958 ValRange); 4959 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4960 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4961 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4962 ValRange); 4963 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4964 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4965 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4966 ValRange); 4967 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4968 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4969 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4970 ValRange); 4971 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4972 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4973 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4974 ValRange); 4975 } else if (ID == ".amdhsa_next_free_vgpr") { 4976 VGPRRange = ValRange; 4977 NextFreeVGPR = Val; 4978 } else if (ID == ".amdhsa_next_free_sgpr") { 4979 SGPRRange = ValRange; 4980 NextFreeSGPR = Val; 4981 } else if (ID == ".amdhsa_accum_offset") { 4982 if (!isGFX90A()) 4983 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4984 AccumOffset = Val; 4985 } else if (ID == ".amdhsa_reserve_vcc") { 4986 if (!isUInt<1>(Val)) 4987 return OutOfRangeError(ValRange); 4988 ReserveVCC = Val; 4989 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4990 if (IVersion.Major < 7) 4991 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4992 if (hasArchitectedFlatScratch()) 4993 return Error(IDRange.Start, 4994 "directive is not supported with architected flat scratch", 4995 IDRange); 4996 if (!isUInt<1>(Val)) 4997 return OutOfRangeError(ValRange); 4998 ReserveFlatScr = Val; 4999 } else if (ID == ".amdhsa_reserve_xnack_mask") { 5000 if (IVersion.Major < 8) 5001 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 5002 if (!isUInt<1>(Val)) 5003 return OutOfRangeError(ValRange); 5004 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5005 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5006 IDRange); 5007 } else if (ID == ".amdhsa_float_round_mode_32") { 5008 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5009 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 5010 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5011 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5012 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 5013 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5014 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5015 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5016 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5017 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5018 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5019 ValRange); 5020 } else if (ID == ".amdhsa_dx10_clamp") { 5021 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5022 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 5023 } else if (ID == ".amdhsa_ieee_mode") { 5024 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 5025 Val, ValRange); 5026 } else if (ID == ".amdhsa_fp16_overflow") { 5027 if (IVersion.Major < 9) 5028 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5029 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 5030 ValRange); 5031 } else if (ID == ".amdhsa_tg_split") { 5032 if (!isGFX90A()) 5033 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5034 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5035 ValRange); 5036 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5037 if (IVersion.Major < 10) 5038 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5039 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 5040 ValRange); 5041 } else if (ID == ".amdhsa_memory_ordered") { 5042 if (IVersion.Major < 10) 5043 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5044 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 5045 ValRange); 5046 } else if (ID == ".amdhsa_forward_progress") { 5047 if (IVersion.Major < 10) 5048 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5049 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 5050 ValRange); 5051 } else if (ID == ".amdhsa_shared_vgpr_count") { 5052 if (IVersion.Major < 10) 5053 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5054 SharedVGPRCount = Val; 5055 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5056 COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val, 5057 ValRange); 5058 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5059 PARSE_BITS_ENTRY( 5060 KD.compute_pgm_rsrc2, 5061 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5062 ValRange); 5063 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5064 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5065 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5066 Val, ValRange); 5067 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5068 PARSE_BITS_ENTRY( 5069 KD.compute_pgm_rsrc2, 5070 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5071 ValRange); 5072 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5073 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5074 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5075 Val, ValRange); 5076 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5077 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5078 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5079 Val, ValRange); 5080 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5081 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5082 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5083 Val, ValRange); 5084 } else if (ID == ".amdhsa_exception_int_div_zero") { 5085 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5086 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5087 Val, ValRange); 5088 } else { 5089 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5090 } 5091 5092 #undef PARSE_BITS_ENTRY 5093 } 5094 5095 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5096 return TokError(".amdhsa_next_free_vgpr directive is required"); 5097 5098 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5099 return TokError(".amdhsa_next_free_sgpr directive is required"); 5100 5101 unsigned VGPRBlocks; 5102 unsigned SGPRBlocks; 5103 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5104 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5105 EnableWavefrontSize32, NextFreeVGPR, 5106 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5107 SGPRBlocks)) 5108 return true; 5109 5110 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5111 VGPRBlocks)) 5112 return OutOfRangeError(VGPRRange); 5113 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5114 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5115 5116 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5117 SGPRBlocks)) 5118 return OutOfRangeError(SGPRRange); 5119 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5120 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5121 SGPRBlocks); 5122 5123 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5124 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5125 "enabled user SGPRs"); 5126 5127 unsigned UserSGPRCount = 5128 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5129 5130 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5131 return TokError("too many user SGPRs enabled"); 5132 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5133 UserSGPRCount); 5134 5135 if (isGFX90A()) { 5136 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5137 return TokError(".amdhsa_accum_offset directive is required"); 5138 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5139 return TokError("accum_offset should be in range [4..256] in " 5140 "increments of 4"); 5141 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5142 return TokError("accum_offset exceeds total VGPR allocation"); 5143 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5144 (AccumOffset / 4 - 1)); 5145 } 5146 5147 if (IVersion.Major == 10) { 5148 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5149 if (SharedVGPRCount && EnableWavefrontSize32) { 5150 return TokError("shared_vgpr_count directive not valid on " 5151 "wavefront size 32"); 5152 } 5153 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5154 return TokError("shared_vgpr_count*2 + " 5155 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5156 "exceed 63\n"); 5157 } 5158 } 5159 5160 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5161 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5162 ReserveFlatScr); 5163 return false; 5164 } 5165 5166 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5167 uint32_t Major; 5168 uint32_t Minor; 5169 5170 if (ParseDirectiveMajorMinor(Major, Minor)) 5171 return true; 5172 5173 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5174 return false; 5175 } 5176 5177 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5178 uint32_t Major; 5179 uint32_t Minor; 5180 uint32_t Stepping; 5181 StringRef VendorName; 5182 StringRef ArchName; 5183 5184 // If this directive has no arguments, then use the ISA version for the 5185 // targeted GPU. 5186 if (isToken(AsmToken::EndOfStatement)) { 5187 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5188 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5189 ISA.Stepping, 5190 "AMD", "AMDGPU"); 5191 return false; 5192 } 5193 5194 if (ParseDirectiveMajorMinor(Major, Minor)) 5195 return true; 5196 5197 if (!trySkipToken(AsmToken::Comma)) 5198 return TokError("stepping version number required, comma expected"); 5199 5200 if (ParseAsAbsoluteExpression(Stepping)) 5201 return TokError("invalid stepping version"); 5202 5203 if (!trySkipToken(AsmToken::Comma)) 5204 return TokError("vendor name required, comma expected"); 5205 5206 if (!parseString(VendorName, "invalid vendor name")) 5207 return true; 5208 5209 if (!trySkipToken(AsmToken::Comma)) 5210 return TokError("arch name required, comma expected"); 5211 5212 if (!parseString(ArchName, "invalid arch name")) 5213 return true; 5214 5215 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5216 VendorName, ArchName); 5217 return false; 5218 } 5219 5220 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5221 amd_kernel_code_t &Header) { 5222 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5223 // assembly for backwards compatibility. 5224 if (ID == "max_scratch_backing_memory_byte_size") { 5225 Parser.eatToEndOfStatement(); 5226 return false; 5227 } 5228 5229 SmallString<40> ErrStr; 5230 raw_svector_ostream Err(ErrStr); 5231 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5232 return TokError(Err.str()); 5233 } 5234 Lex(); 5235 5236 if (ID == "enable_wavefront_size32") { 5237 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5238 if (!isGFX10Plus()) 5239 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5240 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5241 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5242 } else { 5243 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5244 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5245 } 5246 } 5247 5248 if (ID == "wavefront_size") { 5249 if (Header.wavefront_size == 5) { 5250 if (!isGFX10Plus()) 5251 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5252 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5253 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5254 } else if (Header.wavefront_size == 6) { 5255 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5256 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5257 } 5258 } 5259 5260 if (ID == "enable_wgp_mode") { 5261 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5262 !isGFX10Plus()) 5263 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5264 } 5265 5266 if (ID == "enable_mem_ordered") { 5267 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5268 !isGFX10Plus()) 5269 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5270 } 5271 5272 if (ID == "enable_fwd_progress") { 5273 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5274 !isGFX10Plus()) 5275 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5276 } 5277 5278 return false; 5279 } 5280 5281 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5282 amd_kernel_code_t Header; 5283 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5284 5285 while (true) { 5286 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5287 // will set the current token to EndOfStatement. 5288 while(trySkipToken(AsmToken::EndOfStatement)); 5289 5290 StringRef ID; 5291 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5292 return true; 5293 5294 if (ID == ".end_amd_kernel_code_t") 5295 break; 5296 5297 if (ParseAMDKernelCodeTValue(ID, Header)) 5298 return true; 5299 } 5300 5301 getTargetStreamer().EmitAMDKernelCodeT(Header); 5302 5303 return false; 5304 } 5305 5306 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5307 StringRef KernelName; 5308 if (!parseId(KernelName, "expected symbol name")) 5309 return true; 5310 5311 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5312 ELF::STT_AMDGPU_HSA_KERNEL); 5313 5314 KernelScope.initialize(getContext()); 5315 return false; 5316 } 5317 5318 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5319 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5320 return Error(getLoc(), 5321 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5322 "architectures"); 5323 } 5324 5325 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5326 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5327 return Error(getParser().getTok().getLoc(), "target id must match options"); 5328 5329 getTargetStreamer().EmitISAVersion(); 5330 Lex(); 5331 5332 return false; 5333 } 5334 5335 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5336 const char *AssemblerDirectiveBegin; 5337 const char *AssemblerDirectiveEnd; 5338 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5339 isHsaAbiVersion3AndAbove(&getSTI()) 5340 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5341 HSAMD::V3::AssemblerDirectiveEnd) 5342 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5343 HSAMD::AssemblerDirectiveEnd); 5344 5345 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5346 return Error(getLoc(), 5347 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5348 "not available on non-amdhsa OSes")).str()); 5349 } 5350 5351 std::string HSAMetadataString; 5352 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5353 HSAMetadataString)) 5354 return true; 5355 5356 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5357 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5358 return Error(getLoc(), "invalid HSA metadata"); 5359 } else { 5360 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5361 return Error(getLoc(), "invalid HSA metadata"); 5362 } 5363 5364 return false; 5365 } 5366 5367 /// Common code to parse out a block of text (typically YAML) between start and 5368 /// end directives. 5369 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5370 const char *AssemblerDirectiveEnd, 5371 std::string &CollectString) { 5372 5373 raw_string_ostream CollectStream(CollectString); 5374 5375 getLexer().setSkipSpace(false); 5376 5377 bool FoundEnd = false; 5378 while (!isToken(AsmToken::Eof)) { 5379 while (isToken(AsmToken::Space)) { 5380 CollectStream << getTokenStr(); 5381 Lex(); 5382 } 5383 5384 if (trySkipId(AssemblerDirectiveEnd)) { 5385 FoundEnd = true; 5386 break; 5387 } 5388 5389 CollectStream << Parser.parseStringToEndOfStatement() 5390 << getContext().getAsmInfo()->getSeparatorString(); 5391 5392 Parser.eatToEndOfStatement(); 5393 } 5394 5395 getLexer().setSkipSpace(true); 5396 5397 if (isToken(AsmToken::Eof) && !FoundEnd) { 5398 return TokError(Twine("expected directive ") + 5399 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5400 } 5401 5402 CollectStream.flush(); 5403 return false; 5404 } 5405 5406 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5407 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5408 std::string String; 5409 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5410 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5411 return true; 5412 5413 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5414 if (!PALMetadata->setFromString(String)) 5415 return Error(getLoc(), "invalid PAL metadata"); 5416 return false; 5417 } 5418 5419 /// Parse the assembler directive for old linear-format PAL metadata. 5420 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5421 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5422 return Error(getLoc(), 5423 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5424 "not available on non-amdpal OSes")).str()); 5425 } 5426 5427 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5428 PALMetadata->setLegacy(); 5429 for (;;) { 5430 uint32_t Key, Value; 5431 if (ParseAsAbsoluteExpression(Key)) { 5432 return TokError(Twine("invalid value in ") + 5433 Twine(PALMD::AssemblerDirective)); 5434 } 5435 if (!trySkipToken(AsmToken::Comma)) { 5436 return TokError(Twine("expected an even number of values in ") + 5437 Twine(PALMD::AssemblerDirective)); 5438 } 5439 if (ParseAsAbsoluteExpression(Value)) { 5440 return TokError(Twine("invalid value in ") + 5441 Twine(PALMD::AssemblerDirective)); 5442 } 5443 PALMetadata->setRegister(Key, Value); 5444 if (!trySkipToken(AsmToken::Comma)) 5445 break; 5446 } 5447 return false; 5448 } 5449 5450 /// ParseDirectiveAMDGPULDS 5451 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5452 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5453 if (getParser().checkForValidSection()) 5454 return true; 5455 5456 StringRef Name; 5457 SMLoc NameLoc = getLoc(); 5458 if (getParser().parseIdentifier(Name)) 5459 return TokError("expected identifier in directive"); 5460 5461 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5462 if (parseToken(AsmToken::Comma, "expected ','")) 5463 return true; 5464 5465 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5466 5467 int64_t Size; 5468 SMLoc SizeLoc = getLoc(); 5469 if (getParser().parseAbsoluteExpression(Size)) 5470 return true; 5471 if (Size < 0) 5472 return Error(SizeLoc, "size must be non-negative"); 5473 if (Size > LocalMemorySize) 5474 return Error(SizeLoc, "size is too large"); 5475 5476 int64_t Alignment = 4; 5477 if (trySkipToken(AsmToken::Comma)) { 5478 SMLoc AlignLoc = getLoc(); 5479 if (getParser().parseAbsoluteExpression(Alignment)) 5480 return true; 5481 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5482 return Error(AlignLoc, "alignment must be a power of two"); 5483 5484 // Alignment larger than the size of LDS is possible in theory, as long 5485 // as the linker manages to place to symbol at address 0, but we do want 5486 // to make sure the alignment fits nicely into a 32-bit integer. 5487 if (Alignment >= 1u << 31) 5488 return Error(AlignLoc, "alignment is too large"); 5489 } 5490 5491 if (parseToken(AsmToken::EndOfStatement, 5492 "unexpected token in '.amdgpu_lds' directive")) 5493 return true; 5494 5495 Symbol->redefineIfPossible(); 5496 if (!Symbol->isUndefined()) 5497 return Error(NameLoc, "invalid symbol redefinition"); 5498 5499 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5500 return false; 5501 } 5502 5503 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5504 StringRef IDVal = DirectiveID.getString(); 5505 5506 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5507 if (IDVal == ".amdhsa_kernel") 5508 return ParseDirectiveAMDHSAKernel(); 5509 5510 // TODO: Restructure/combine with PAL metadata directive. 5511 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5512 return ParseDirectiveHSAMetadata(); 5513 } else { 5514 if (IDVal == ".hsa_code_object_version") 5515 return ParseDirectiveHSACodeObjectVersion(); 5516 5517 if (IDVal == ".hsa_code_object_isa") 5518 return ParseDirectiveHSACodeObjectISA(); 5519 5520 if (IDVal == ".amd_kernel_code_t") 5521 return ParseDirectiveAMDKernelCodeT(); 5522 5523 if (IDVal == ".amdgpu_hsa_kernel") 5524 return ParseDirectiveAMDGPUHsaKernel(); 5525 5526 if (IDVal == ".amd_amdgpu_isa") 5527 return ParseDirectiveISAVersion(); 5528 5529 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5530 return ParseDirectiveHSAMetadata(); 5531 } 5532 5533 if (IDVal == ".amdgcn_target") 5534 return ParseDirectiveAMDGCNTarget(); 5535 5536 if (IDVal == ".amdgpu_lds") 5537 return ParseDirectiveAMDGPULDS(); 5538 5539 if (IDVal == PALMD::AssemblerDirectiveBegin) 5540 return ParseDirectivePALMetadataBegin(); 5541 5542 if (IDVal == PALMD::AssemblerDirective) 5543 return ParseDirectivePALMetadata(); 5544 5545 return true; 5546 } 5547 5548 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5549 unsigned RegNo) { 5550 5551 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5552 return isGFX9Plus(); 5553 5554 // GFX10 has 2 more SGPRs 104 and 105. 5555 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5556 return hasSGPR104_SGPR105(); 5557 5558 switch (RegNo) { 5559 case AMDGPU::SRC_SHARED_BASE: 5560 case AMDGPU::SRC_SHARED_LIMIT: 5561 case AMDGPU::SRC_PRIVATE_BASE: 5562 case AMDGPU::SRC_PRIVATE_LIMIT: 5563 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5564 return isGFX9Plus(); 5565 case AMDGPU::TBA: 5566 case AMDGPU::TBA_LO: 5567 case AMDGPU::TBA_HI: 5568 case AMDGPU::TMA: 5569 case AMDGPU::TMA_LO: 5570 case AMDGPU::TMA_HI: 5571 return !isGFX9Plus(); 5572 case AMDGPU::XNACK_MASK: 5573 case AMDGPU::XNACK_MASK_LO: 5574 case AMDGPU::XNACK_MASK_HI: 5575 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5576 case AMDGPU::SGPR_NULL: 5577 return isGFX10Plus(); 5578 default: 5579 break; 5580 } 5581 5582 if (isCI()) 5583 return true; 5584 5585 if (isSI() || isGFX10Plus()) { 5586 // No flat_scr on SI. 5587 // On GFX10 flat scratch is not a valid register operand and can only be 5588 // accessed with s_setreg/s_getreg. 5589 switch (RegNo) { 5590 case AMDGPU::FLAT_SCR: 5591 case AMDGPU::FLAT_SCR_LO: 5592 case AMDGPU::FLAT_SCR_HI: 5593 return false; 5594 default: 5595 return true; 5596 } 5597 } 5598 5599 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5600 // SI/CI have. 5601 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5602 return hasSGPR102_SGPR103(); 5603 5604 return true; 5605 } 5606 5607 OperandMatchResultTy 5608 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5609 OperandMode Mode) { 5610 // Try to parse with a custom parser 5611 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5612 5613 // If we successfully parsed the operand or if there as an error parsing, 5614 // we are done. 5615 // 5616 // If we are parsing after we reach EndOfStatement then this means we 5617 // are appending default values to the Operands list. This is only done 5618 // by custom parser, so we shouldn't continue on to the generic parsing. 5619 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5620 isToken(AsmToken::EndOfStatement)) 5621 return ResTy; 5622 5623 SMLoc RBraceLoc; 5624 SMLoc LBraceLoc = getLoc(); 5625 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5626 unsigned Prefix = Operands.size(); 5627 5628 for (;;) { 5629 auto Loc = getLoc(); 5630 ResTy = parseReg(Operands); 5631 if (ResTy == MatchOperand_NoMatch) 5632 Error(Loc, "expected a register"); 5633 if (ResTy != MatchOperand_Success) 5634 return MatchOperand_ParseFail; 5635 5636 RBraceLoc = getLoc(); 5637 if (trySkipToken(AsmToken::RBrac)) 5638 break; 5639 5640 if (!skipToken(AsmToken::Comma, 5641 "expected a comma or a closing square bracket")) { 5642 return MatchOperand_ParseFail; 5643 } 5644 } 5645 5646 if (Operands.size() - Prefix > 1) { 5647 Operands.insert(Operands.begin() + Prefix, 5648 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5649 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5650 } 5651 5652 return MatchOperand_Success; 5653 } 5654 5655 return parseRegOrImm(Operands); 5656 } 5657 5658 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5659 // Clear any forced encodings from the previous instruction. 5660 setForcedEncodingSize(0); 5661 setForcedDPP(false); 5662 setForcedSDWA(false); 5663 5664 if (Name.endswith("_e64")) { 5665 setForcedEncodingSize(64); 5666 return Name.substr(0, Name.size() - 4); 5667 } else if (Name.endswith("_e32")) { 5668 setForcedEncodingSize(32); 5669 return Name.substr(0, Name.size() - 4); 5670 } else if (Name.endswith("_dpp")) { 5671 setForcedDPP(true); 5672 return Name.substr(0, Name.size() - 4); 5673 } else if (Name.endswith("_sdwa")) { 5674 setForcedSDWA(true); 5675 return Name.substr(0, Name.size() - 5); 5676 } 5677 return Name; 5678 } 5679 5680 static void applyMnemonicAliases(StringRef &Mnemonic, 5681 const FeatureBitset &Features, 5682 unsigned VariantID); 5683 5684 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5685 StringRef Name, 5686 SMLoc NameLoc, OperandVector &Operands) { 5687 // Add the instruction mnemonic 5688 Name = parseMnemonicSuffix(Name); 5689 5690 // If the target architecture uses MnemonicAlias, call it here to parse 5691 // operands correctly. 5692 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 5693 5694 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5695 5696 bool IsMIMG = Name.startswith("image_"); 5697 5698 while (!trySkipToken(AsmToken::EndOfStatement)) { 5699 OperandMode Mode = OperandMode_Default; 5700 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5701 Mode = OperandMode_NSA; 5702 CPolSeen = 0; 5703 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5704 5705 if (Res != MatchOperand_Success) { 5706 checkUnsupportedInstruction(Name, NameLoc); 5707 if (!Parser.hasPendingError()) { 5708 // FIXME: use real operand location rather than the current location. 5709 StringRef Msg = 5710 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5711 "not a valid operand."; 5712 Error(getLoc(), Msg); 5713 } 5714 while (!trySkipToken(AsmToken::EndOfStatement)) { 5715 lex(); 5716 } 5717 return true; 5718 } 5719 5720 // Eat the comma or space if there is one. 5721 trySkipToken(AsmToken::Comma); 5722 } 5723 5724 return false; 5725 } 5726 5727 //===----------------------------------------------------------------------===// 5728 // Utility functions 5729 //===----------------------------------------------------------------------===// 5730 5731 OperandMatchResultTy 5732 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5733 5734 if (!trySkipId(Prefix, AsmToken::Colon)) 5735 return MatchOperand_NoMatch; 5736 5737 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5738 } 5739 5740 OperandMatchResultTy 5741 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5742 AMDGPUOperand::ImmTy ImmTy, 5743 bool (*ConvertResult)(int64_t&)) { 5744 SMLoc S = getLoc(); 5745 int64_t Value = 0; 5746 5747 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5748 if (Res != MatchOperand_Success) 5749 return Res; 5750 5751 if (ConvertResult && !ConvertResult(Value)) { 5752 Error(S, "invalid " + StringRef(Prefix) + " value."); 5753 } 5754 5755 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5756 return MatchOperand_Success; 5757 } 5758 5759 OperandMatchResultTy 5760 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5761 OperandVector &Operands, 5762 AMDGPUOperand::ImmTy ImmTy, 5763 bool (*ConvertResult)(int64_t&)) { 5764 SMLoc S = getLoc(); 5765 if (!trySkipId(Prefix, AsmToken::Colon)) 5766 return MatchOperand_NoMatch; 5767 5768 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5769 return MatchOperand_ParseFail; 5770 5771 unsigned Val = 0; 5772 const unsigned MaxSize = 4; 5773 5774 // FIXME: How to verify the number of elements matches the number of src 5775 // operands? 5776 for (int I = 0; ; ++I) { 5777 int64_t Op; 5778 SMLoc Loc = getLoc(); 5779 if (!parseExpr(Op)) 5780 return MatchOperand_ParseFail; 5781 5782 if (Op != 0 && Op != 1) { 5783 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5784 return MatchOperand_ParseFail; 5785 } 5786 5787 Val |= (Op << I); 5788 5789 if (trySkipToken(AsmToken::RBrac)) 5790 break; 5791 5792 if (I + 1 == MaxSize) { 5793 Error(getLoc(), "expected a closing square bracket"); 5794 return MatchOperand_ParseFail; 5795 } 5796 5797 if (!skipToken(AsmToken::Comma, "expected a comma")) 5798 return MatchOperand_ParseFail; 5799 } 5800 5801 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5802 return MatchOperand_Success; 5803 } 5804 5805 OperandMatchResultTy 5806 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5807 AMDGPUOperand::ImmTy ImmTy) { 5808 int64_t Bit; 5809 SMLoc S = getLoc(); 5810 5811 if (trySkipId(Name)) { 5812 Bit = 1; 5813 } else if (trySkipId("no", Name)) { 5814 Bit = 0; 5815 } else { 5816 return MatchOperand_NoMatch; 5817 } 5818 5819 if (Name == "r128" && !hasMIMG_R128()) { 5820 Error(S, "r128 modifier is not supported on this GPU"); 5821 return MatchOperand_ParseFail; 5822 } 5823 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5824 Error(S, "a16 modifier is not supported on this GPU"); 5825 return MatchOperand_ParseFail; 5826 } 5827 5828 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5829 ImmTy = AMDGPUOperand::ImmTyR128A16; 5830 5831 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5832 return MatchOperand_Success; 5833 } 5834 5835 OperandMatchResultTy 5836 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5837 unsigned CPolOn = 0; 5838 unsigned CPolOff = 0; 5839 SMLoc S = getLoc(); 5840 5841 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5842 if (isGFX940() && !Mnemo.startswith("s_")) { 5843 if (trySkipId("sc0")) 5844 CPolOn = AMDGPU::CPol::SC0; 5845 else if (trySkipId("nosc0")) 5846 CPolOff = AMDGPU::CPol::SC0; 5847 else if (trySkipId("nt")) 5848 CPolOn = AMDGPU::CPol::NT; 5849 else if (trySkipId("nont")) 5850 CPolOff = AMDGPU::CPol::NT; 5851 else if (trySkipId("sc1")) 5852 CPolOn = AMDGPU::CPol::SC1; 5853 else if (trySkipId("nosc1")) 5854 CPolOff = AMDGPU::CPol::SC1; 5855 else 5856 return MatchOperand_NoMatch; 5857 } 5858 else if (trySkipId("glc")) 5859 CPolOn = AMDGPU::CPol::GLC; 5860 else if (trySkipId("noglc")) 5861 CPolOff = AMDGPU::CPol::GLC; 5862 else if (trySkipId("slc")) 5863 CPolOn = AMDGPU::CPol::SLC; 5864 else if (trySkipId("noslc")) 5865 CPolOff = AMDGPU::CPol::SLC; 5866 else if (trySkipId("dlc")) 5867 CPolOn = AMDGPU::CPol::DLC; 5868 else if (trySkipId("nodlc")) 5869 CPolOff = AMDGPU::CPol::DLC; 5870 else if (trySkipId("scc")) 5871 CPolOn = AMDGPU::CPol::SCC; 5872 else if (trySkipId("noscc")) 5873 CPolOff = AMDGPU::CPol::SCC; 5874 else 5875 return MatchOperand_NoMatch; 5876 5877 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5878 Error(S, "dlc modifier is not supported on this GPU"); 5879 return MatchOperand_ParseFail; 5880 } 5881 5882 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5883 Error(S, "scc modifier is not supported on this GPU"); 5884 return MatchOperand_ParseFail; 5885 } 5886 5887 if (CPolSeen & (CPolOn | CPolOff)) { 5888 Error(S, "duplicate cache policy modifier"); 5889 return MatchOperand_ParseFail; 5890 } 5891 5892 CPolSeen |= (CPolOn | CPolOff); 5893 5894 for (unsigned I = 1; I != Operands.size(); ++I) { 5895 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5896 if (Op.isCPol()) { 5897 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5898 return MatchOperand_Success; 5899 } 5900 } 5901 5902 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5903 AMDGPUOperand::ImmTyCPol)); 5904 5905 return MatchOperand_Success; 5906 } 5907 5908 static void addOptionalImmOperand( 5909 MCInst& Inst, const OperandVector& Operands, 5910 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5911 AMDGPUOperand::ImmTy ImmT, 5912 int64_t Default = 0) { 5913 auto i = OptionalIdx.find(ImmT); 5914 if (i != OptionalIdx.end()) { 5915 unsigned Idx = i->second; 5916 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5917 } else { 5918 Inst.addOperand(MCOperand::createImm(Default)); 5919 } 5920 } 5921 5922 OperandMatchResultTy 5923 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5924 StringRef &Value, 5925 SMLoc &StringLoc) { 5926 if (!trySkipId(Prefix, AsmToken::Colon)) 5927 return MatchOperand_NoMatch; 5928 5929 StringLoc = getLoc(); 5930 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5931 : MatchOperand_ParseFail; 5932 } 5933 5934 //===----------------------------------------------------------------------===// 5935 // MTBUF format 5936 //===----------------------------------------------------------------------===// 5937 5938 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5939 int64_t MaxVal, 5940 int64_t &Fmt) { 5941 int64_t Val; 5942 SMLoc Loc = getLoc(); 5943 5944 auto Res = parseIntWithPrefix(Pref, Val); 5945 if (Res == MatchOperand_ParseFail) 5946 return false; 5947 if (Res == MatchOperand_NoMatch) 5948 return true; 5949 5950 if (Val < 0 || Val > MaxVal) { 5951 Error(Loc, Twine("out of range ", StringRef(Pref))); 5952 return false; 5953 } 5954 5955 Fmt = Val; 5956 return true; 5957 } 5958 5959 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5960 // values to live in a joint format operand in the MCInst encoding. 5961 OperandMatchResultTy 5962 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5963 using namespace llvm::AMDGPU::MTBUFFormat; 5964 5965 int64_t Dfmt = DFMT_UNDEF; 5966 int64_t Nfmt = NFMT_UNDEF; 5967 5968 // dfmt and nfmt can appear in either order, and each is optional. 5969 for (int I = 0; I < 2; ++I) { 5970 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5971 return MatchOperand_ParseFail; 5972 5973 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5974 return MatchOperand_ParseFail; 5975 } 5976 // Skip optional comma between dfmt/nfmt 5977 // but guard against 2 commas following each other. 5978 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5979 !peekToken().is(AsmToken::Comma)) { 5980 trySkipToken(AsmToken::Comma); 5981 } 5982 } 5983 5984 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5985 return MatchOperand_NoMatch; 5986 5987 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5988 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5989 5990 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5991 return MatchOperand_Success; 5992 } 5993 5994 OperandMatchResultTy 5995 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5996 using namespace llvm::AMDGPU::MTBUFFormat; 5997 5998 int64_t Fmt = UFMT_UNDEF; 5999 6000 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 6001 return MatchOperand_ParseFail; 6002 6003 if (Fmt == UFMT_UNDEF) 6004 return MatchOperand_NoMatch; 6005 6006 Format = Fmt; 6007 return MatchOperand_Success; 6008 } 6009 6010 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6011 int64_t &Nfmt, 6012 StringRef FormatStr, 6013 SMLoc Loc) { 6014 using namespace llvm::AMDGPU::MTBUFFormat; 6015 int64_t Format; 6016 6017 Format = getDfmt(FormatStr); 6018 if (Format != DFMT_UNDEF) { 6019 Dfmt = Format; 6020 return true; 6021 } 6022 6023 Format = getNfmt(FormatStr, getSTI()); 6024 if (Format != NFMT_UNDEF) { 6025 Nfmt = Format; 6026 return true; 6027 } 6028 6029 Error(Loc, "unsupported format"); 6030 return false; 6031 } 6032 6033 OperandMatchResultTy 6034 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6035 SMLoc FormatLoc, 6036 int64_t &Format) { 6037 using namespace llvm::AMDGPU::MTBUFFormat; 6038 6039 int64_t Dfmt = DFMT_UNDEF; 6040 int64_t Nfmt = NFMT_UNDEF; 6041 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6042 return MatchOperand_ParseFail; 6043 6044 if (trySkipToken(AsmToken::Comma)) { 6045 StringRef Str; 6046 SMLoc Loc = getLoc(); 6047 if (!parseId(Str, "expected a format string") || 6048 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 6049 return MatchOperand_ParseFail; 6050 } 6051 if (Dfmt == DFMT_UNDEF) { 6052 Error(Loc, "duplicate numeric format"); 6053 return MatchOperand_ParseFail; 6054 } else if (Nfmt == NFMT_UNDEF) { 6055 Error(Loc, "duplicate data format"); 6056 return MatchOperand_ParseFail; 6057 } 6058 } 6059 6060 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6061 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6062 6063 if (isGFX10Plus()) { 6064 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6065 if (Ufmt == UFMT_UNDEF) { 6066 Error(FormatLoc, "unsupported format"); 6067 return MatchOperand_ParseFail; 6068 } 6069 Format = Ufmt; 6070 } else { 6071 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6072 } 6073 6074 return MatchOperand_Success; 6075 } 6076 6077 OperandMatchResultTy 6078 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6079 SMLoc Loc, 6080 int64_t &Format) { 6081 using namespace llvm::AMDGPU::MTBUFFormat; 6082 6083 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6084 if (Id == UFMT_UNDEF) 6085 return MatchOperand_NoMatch; 6086 6087 if (!isGFX10Plus()) { 6088 Error(Loc, "unified format is not supported on this GPU"); 6089 return MatchOperand_ParseFail; 6090 } 6091 6092 Format = Id; 6093 return MatchOperand_Success; 6094 } 6095 6096 OperandMatchResultTy 6097 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6098 using namespace llvm::AMDGPU::MTBUFFormat; 6099 SMLoc Loc = getLoc(); 6100 6101 if (!parseExpr(Format)) 6102 return MatchOperand_ParseFail; 6103 if (!isValidFormatEncoding(Format, getSTI())) { 6104 Error(Loc, "out of range format"); 6105 return MatchOperand_ParseFail; 6106 } 6107 6108 return MatchOperand_Success; 6109 } 6110 6111 OperandMatchResultTy 6112 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6113 using namespace llvm::AMDGPU::MTBUFFormat; 6114 6115 if (!trySkipId("format", AsmToken::Colon)) 6116 return MatchOperand_NoMatch; 6117 6118 if (trySkipToken(AsmToken::LBrac)) { 6119 StringRef FormatStr; 6120 SMLoc Loc = getLoc(); 6121 if (!parseId(FormatStr, "expected a format string")) 6122 return MatchOperand_ParseFail; 6123 6124 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6125 if (Res == MatchOperand_NoMatch) 6126 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6127 if (Res != MatchOperand_Success) 6128 return Res; 6129 6130 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6131 return MatchOperand_ParseFail; 6132 6133 return MatchOperand_Success; 6134 } 6135 6136 return parseNumericFormat(Format); 6137 } 6138 6139 OperandMatchResultTy 6140 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6141 using namespace llvm::AMDGPU::MTBUFFormat; 6142 6143 int64_t Format = getDefaultFormatEncoding(getSTI()); 6144 OperandMatchResultTy Res; 6145 SMLoc Loc = getLoc(); 6146 6147 // Parse legacy format syntax. 6148 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6149 if (Res == MatchOperand_ParseFail) 6150 return Res; 6151 6152 bool FormatFound = (Res == MatchOperand_Success); 6153 6154 Operands.push_back( 6155 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6156 6157 if (FormatFound) 6158 trySkipToken(AsmToken::Comma); 6159 6160 if (isToken(AsmToken::EndOfStatement)) { 6161 // We are expecting an soffset operand, 6162 // but let matcher handle the error. 6163 return MatchOperand_Success; 6164 } 6165 6166 // Parse soffset. 6167 Res = parseRegOrImm(Operands); 6168 if (Res != MatchOperand_Success) 6169 return Res; 6170 6171 trySkipToken(AsmToken::Comma); 6172 6173 if (!FormatFound) { 6174 Res = parseSymbolicOrNumericFormat(Format); 6175 if (Res == MatchOperand_ParseFail) 6176 return Res; 6177 if (Res == MatchOperand_Success) { 6178 auto Size = Operands.size(); 6179 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6180 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6181 Op.setImm(Format); 6182 } 6183 return MatchOperand_Success; 6184 } 6185 6186 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6187 Error(getLoc(), "duplicate format"); 6188 return MatchOperand_ParseFail; 6189 } 6190 return MatchOperand_Success; 6191 } 6192 6193 //===----------------------------------------------------------------------===// 6194 // ds 6195 //===----------------------------------------------------------------------===// 6196 6197 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6198 const OperandVector &Operands) { 6199 OptionalImmIndexMap OptionalIdx; 6200 6201 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6202 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6203 6204 // Add the register arguments 6205 if (Op.isReg()) { 6206 Op.addRegOperands(Inst, 1); 6207 continue; 6208 } 6209 6210 // Handle optional arguments 6211 OptionalIdx[Op.getImmTy()] = i; 6212 } 6213 6214 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6215 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6216 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6217 6218 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6219 } 6220 6221 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6222 bool IsGdsHardcoded) { 6223 OptionalImmIndexMap OptionalIdx; 6224 6225 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6226 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6227 6228 // Add the register arguments 6229 if (Op.isReg()) { 6230 Op.addRegOperands(Inst, 1); 6231 continue; 6232 } 6233 6234 if (Op.isToken() && Op.getToken() == "gds") { 6235 IsGdsHardcoded = true; 6236 continue; 6237 } 6238 6239 // Handle optional arguments 6240 OptionalIdx[Op.getImmTy()] = i; 6241 } 6242 6243 AMDGPUOperand::ImmTy OffsetType = 6244 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6245 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6246 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6247 AMDGPUOperand::ImmTyOffset; 6248 6249 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6250 6251 if (!IsGdsHardcoded) { 6252 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6253 } 6254 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6255 } 6256 6257 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6258 OptionalImmIndexMap OptionalIdx; 6259 6260 unsigned OperandIdx[4]; 6261 unsigned EnMask = 0; 6262 int SrcIdx = 0; 6263 6264 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6265 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6266 6267 // Add the register arguments 6268 if (Op.isReg()) { 6269 assert(SrcIdx < 4); 6270 OperandIdx[SrcIdx] = Inst.size(); 6271 Op.addRegOperands(Inst, 1); 6272 ++SrcIdx; 6273 continue; 6274 } 6275 6276 if (Op.isOff()) { 6277 assert(SrcIdx < 4); 6278 OperandIdx[SrcIdx] = Inst.size(); 6279 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6280 ++SrcIdx; 6281 continue; 6282 } 6283 6284 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6285 Op.addImmOperands(Inst, 1); 6286 continue; 6287 } 6288 6289 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 6290 continue; 6291 6292 // Handle optional arguments 6293 OptionalIdx[Op.getImmTy()] = i; 6294 } 6295 6296 assert(SrcIdx == 4); 6297 6298 bool Compr = false; 6299 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6300 Compr = true; 6301 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6302 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6303 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6304 } 6305 6306 for (auto i = 0; i < SrcIdx; ++i) { 6307 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6308 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6309 } 6310 } 6311 6312 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6313 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6314 6315 Inst.addOperand(MCOperand::createImm(EnMask)); 6316 } 6317 6318 //===----------------------------------------------------------------------===// 6319 // s_waitcnt 6320 //===----------------------------------------------------------------------===// 6321 6322 static bool 6323 encodeCnt( 6324 const AMDGPU::IsaVersion ISA, 6325 int64_t &IntVal, 6326 int64_t CntVal, 6327 bool Saturate, 6328 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6329 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6330 { 6331 bool Failed = false; 6332 6333 IntVal = encode(ISA, IntVal, CntVal); 6334 if (CntVal != decode(ISA, IntVal)) { 6335 if (Saturate) { 6336 IntVal = encode(ISA, IntVal, -1); 6337 } else { 6338 Failed = true; 6339 } 6340 } 6341 return Failed; 6342 } 6343 6344 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6345 6346 SMLoc CntLoc = getLoc(); 6347 StringRef CntName = getTokenStr(); 6348 6349 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6350 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6351 return false; 6352 6353 int64_t CntVal; 6354 SMLoc ValLoc = getLoc(); 6355 if (!parseExpr(CntVal)) 6356 return false; 6357 6358 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6359 6360 bool Failed = true; 6361 bool Sat = CntName.endswith("_sat"); 6362 6363 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6364 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6365 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6366 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6367 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6368 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6369 } else { 6370 Error(CntLoc, "invalid counter name " + CntName); 6371 return false; 6372 } 6373 6374 if (Failed) { 6375 Error(ValLoc, "too large value for " + CntName); 6376 return false; 6377 } 6378 6379 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6380 return false; 6381 6382 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6383 if (isToken(AsmToken::EndOfStatement)) { 6384 Error(getLoc(), "expected a counter name"); 6385 return false; 6386 } 6387 } 6388 6389 return true; 6390 } 6391 6392 OperandMatchResultTy 6393 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6394 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6395 int64_t Waitcnt = getWaitcntBitMask(ISA); 6396 SMLoc S = getLoc(); 6397 6398 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6399 while (!isToken(AsmToken::EndOfStatement)) { 6400 if (!parseCnt(Waitcnt)) 6401 return MatchOperand_ParseFail; 6402 } 6403 } else { 6404 if (!parseExpr(Waitcnt)) 6405 return MatchOperand_ParseFail; 6406 } 6407 6408 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6409 return MatchOperand_Success; 6410 } 6411 6412 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6413 SMLoc FieldLoc = getLoc(); 6414 StringRef FieldName = getTokenStr(); 6415 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6416 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6417 return false; 6418 6419 SMLoc ValueLoc = getLoc(); 6420 StringRef ValueName = getTokenStr(); 6421 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6422 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6423 return false; 6424 6425 unsigned Shift; 6426 if (FieldName == "instid0") { 6427 Shift = 0; 6428 } else if (FieldName == "instskip") { 6429 Shift = 4; 6430 } else if (FieldName == "instid1") { 6431 Shift = 7; 6432 } else { 6433 Error(FieldLoc, "invalid field name " + FieldName); 6434 return false; 6435 } 6436 6437 int Value; 6438 if (Shift == 4) { 6439 // Parse values for instskip. 6440 Value = StringSwitch<int>(ValueName) 6441 .Case("SAME", 0) 6442 .Case("NEXT", 1) 6443 .Case("SKIP_1", 2) 6444 .Case("SKIP_2", 3) 6445 .Case("SKIP_3", 4) 6446 .Case("SKIP_4", 5) 6447 .Default(-1); 6448 } else { 6449 // Parse values for instid0 and instid1. 6450 Value = StringSwitch<int>(ValueName) 6451 .Case("NO_DEP", 0) 6452 .Case("VALU_DEP_1", 1) 6453 .Case("VALU_DEP_2", 2) 6454 .Case("VALU_DEP_3", 3) 6455 .Case("VALU_DEP_4", 4) 6456 .Case("TRANS32_DEP_1", 5) 6457 .Case("TRANS32_DEP_2", 6) 6458 .Case("TRANS32_DEP_3", 7) 6459 .Case("FMA_ACCUM_CYCLE_1", 8) 6460 .Case("SALU_CYCLE_1", 9) 6461 .Case("SALU_CYCLE_2", 10) 6462 .Case("SALU_CYCLE_3", 11) 6463 .Default(-1); 6464 } 6465 if (Value < 0) { 6466 Error(ValueLoc, "invalid value name " + ValueName); 6467 return false; 6468 } 6469 6470 Delay |= Value << Shift; 6471 return true; 6472 } 6473 6474 OperandMatchResultTy 6475 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) { 6476 int64_t Delay = 0; 6477 SMLoc S = getLoc(); 6478 6479 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6480 do { 6481 if (!parseDelay(Delay)) 6482 return MatchOperand_ParseFail; 6483 } while (trySkipToken(AsmToken::Pipe)); 6484 } else { 6485 if (!parseExpr(Delay)) 6486 return MatchOperand_ParseFail; 6487 } 6488 6489 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6490 return MatchOperand_Success; 6491 } 6492 6493 bool 6494 AMDGPUOperand::isSWaitCnt() const { 6495 return isImm(); 6496 } 6497 6498 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); } 6499 6500 //===----------------------------------------------------------------------===// 6501 // DepCtr 6502 //===----------------------------------------------------------------------===// 6503 6504 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6505 StringRef DepCtrName) { 6506 switch (ErrorId) { 6507 case OPR_ID_UNKNOWN: 6508 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6509 return; 6510 case OPR_ID_UNSUPPORTED: 6511 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6512 return; 6513 case OPR_ID_DUPLICATE: 6514 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6515 return; 6516 case OPR_VAL_INVALID: 6517 Error(Loc, Twine("invalid value for ", DepCtrName)); 6518 return; 6519 default: 6520 assert(false); 6521 } 6522 } 6523 6524 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6525 6526 using namespace llvm::AMDGPU::DepCtr; 6527 6528 SMLoc DepCtrLoc = getLoc(); 6529 StringRef DepCtrName = getTokenStr(); 6530 6531 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6532 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6533 return false; 6534 6535 int64_t ExprVal; 6536 if (!parseExpr(ExprVal)) 6537 return false; 6538 6539 unsigned PrevOprMask = UsedOprMask; 6540 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6541 6542 if (CntVal < 0) { 6543 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6544 return false; 6545 } 6546 6547 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6548 return false; 6549 6550 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6551 if (isToken(AsmToken::EndOfStatement)) { 6552 Error(getLoc(), "expected a counter name"); 6553 return false; 6554 } 6555 } 6556 6557 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6558 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6559 return true; 6560 } 6561 6562 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6563 using namespace llvm::AMDGPU::DepCtr; 6564 6565 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6566 SMLoc Loc = getLoc(); 6567 6568 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6569 unsigned UsedOprMask = 0; 6570 while (!isToken(AsmToken::EndOfStatement)) { 6571 if (!parseDepCtr(DepCtr, UsedOprMask)) 6572 return MatchOperand_ParseFail; 6573 } 6574 } else { 6575 if (!parseExpr(DepCtr)) 6576 return MatchOperand_ParseFail; 6577 } 6578 6579 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6580 return MatchOperand_Success; 6581 } 6582 6583 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6584 6585 //===----------------------------------------------------------------------===// 6586 // hwreg 6587 //===----------------------------------------------------------------------===// 6588 6589 bool 6590 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6591 OperandInfoTy &Offset, 6592 OperandInfoTy &Width) { 6593 using namespace llvm::AMDGPU::Hwreg; 6594 6595 // The register may be specified by name or using a numeric code 6596 HwReg.Loc = getLoc(); 6597 if (isToken(AsmToken::Identifier) && 6598 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6599 HwReg.IsSymbolic = true; 6600 lex(); // skip register name 6601 } else if (!parseExpr(HwReg.Id, "a register name")) { 6602 return false; 6603 } 6604 6605 if (trySkipToken(AsmToken::RParen)) 6606 return true; 6607 6608 // parse optional params 6609 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6610 return false; 6611 6612 Offset.Loc = getLoc(); 6613 if (!parseExpr(Offset.Id)) 6614 return false; 6615 6616 if (!skipToken(AsmToken::Comma, "expected a comma")) 6617 return false; 6618 6619 Width.Loc = getLoc(); 6620 return parseExpr(Width.Id) && 6621 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6622 } 6623 6624 bool 6625 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6626 const OperandInfoTy &Offset, 6627 const OperandInfoTy &Width) { 6628 6629 using namespace llvm::AMDGPU::Hwreg; 6630 6631 if (HwReg.IsSymbolic) { 6632 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6633 Error(HwReg.Loc, 6634 "specified hardware register is not supported on this GPU"); 6635 return false; 6636 } 6637 } else { 6638 if (!isValidHwreg(HwReg.Id)) { 6639 Error(HwReg.Loc, 6640 "invalid code of hardware register: only 6-bit values are legal"); 6641 return false; 6642 } 6643 } 6644 if (!isValidHwregOffset(Offset.Id)) { 6645 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6646 return false; 6647 } 6648 if (!isValidHwregWidth(Width.Id)) { 6649 Error(Width.Loc, 6650 "invalid bitfield width: only values from 1 to 32 are legal"); 6651 return false; 6652 } 6653 return true; 6654 } 6655 6656 OperandMatchResultTy 6657 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6658 using namespace llvm::AMDGPU::Hwreg; 6659 6660 int64_t ImmVal = 0; 6661 SMLoc Loc = getLoc(); 6662 6663 if (trySkipId("hwreg", AsmToken::LParen)) { 6664 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6665 OperandInfoTy Offset(OFFSET_DEFAULT_); 6666 OperandInfoTy Width(WIDTH_DEFAULT_); 6667 if (parseHwregBody(HwReg, Offset, Width) && 6668 validateHwreg(HwReg, Offset, Width)) { 6669 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6670 } else { 6671 return MatchOperand_ParseFail; 6672 } 6673 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6674 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6675 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6676 return MatchOperand_ParseFail; 6677 } 6678 } else { 6679 return MatchOperand_ParseFail; 6680 } 6681 6682 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6683 return MatchOperand_Success; 6684 } 6685 6686 bool AMDGPUOperand::isHwreg() const { 6687 return isImmTy(ImmTyHwreg); 6688 } 6689 6690 //===----------------------------------------------------------------------===// 6691 // sendmsg 6692 //===----------------------------------------------------------------------===// 6693 6694 bool 6695 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6696 OperandInfoTy &Op, 6697 OperandInfoTy &Stream) { 6698 using namespace llvm::AMDGPU::SendMsg; 6699 6700 Msg.Loc = getLoc(); 6701 if (isToken(AsmToken::Identifier) && 6702 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6703 Msg.IsSymbolic = true; 6704 lex(); // skip message name 6705 } else if (!parseExpr(Msg.Id, "a message name")) { 6706 return false; 6707 } 6708 6709 if (trySkipToken(AsmToken::Comma)) { 6710 Op.IsDefined = true; 6711 Op.Loc = getLoc(); 6712 if (isToken(AsmToken::Identifier) && 6713 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6714 lex(); // skip operation name 6715 } else if (!parseExpr(Op.Id, "an operation name")) { 6716 return false; 6717 } 6718 6719 if (trySkipToken(AsmToken::Comma)) { 6720 Stream.IsDefined = true; 6721 Stream.Loc = getLoc(); 6722 if (!parseExpr(Stream.Id)) 6723 return false; 6724 } 6725 } 6726 6727 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6728 } 6729 6730 bool 6731 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6732 const OperandInfoTy &Op, 6733 const OperandInfoTy &Stream) { 6734 using namespace llvm::AMDGPU::SendMsg; 6735 6736 // Validation strictness depends on whether message is specified 6737 // in a symbolic or in a numeric form. In the latter case 6738 // only encoding possibility is checked. 6739 bool Strict = Msg.IsSymbolic; 6740 6741 if (Strict) { 6742 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6743 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6744 return false; 6745 } 6746 } else { 6747 if (!isValidMsgId(Msg.Id, getSTI())) { 6748 Error(Msg.Loc, "invalid message id"); 6749 return false; 6750 } 6751 } 6752 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 6753 if (Op.IsDefined) { 6754 Error(Op.Loc, "message does not support operations"); 6755 } else { 6756 Error(Msg.Loc, "missing message operation"); 6757 } 6758 return false; 6759 } 6760 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6761 Error(Op.Loc, "invalid operation id"); 6762 return false; 6763 } 6764 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 6765 Stream.IsDefined) { 6766 Error(Stream.Loc, "message operation does not support streams"); 6767 return false; 6768 } 6769 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6770 Error(Stream.Loc, "invalid message stream id"); 6771 return false; 6772 } 6773 return true; 6774 } 6775 6776 OperandMatchResultTy 6777 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6778 using namespace llvm::AMDGPU::SendMsg; 6779 6780 int64_t ImmVal = 0; 6781 SMLoc Loc = getLoc(); 6782 6783 if (trySkipId("sendmsg", AsmToken::LParen)) { 6784 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6785 OperandInfoTy Op(OP_NONE_); 6786 OperandInfoTy Stream(STREAM_ID_NONE_); 6787 if (parseSendMsgBody(Msg, Op, Stream) && 6788 validateSendMsg(Msg, Op, Stream)) { 6789 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6790 } else { 6791 return MatchOperand_ParseFail; 6792 } 6793 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6794 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6795 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6796 return MatchOperand_ParseFail; 6797 } 6798 } else { 6799 return MatchOperand_ParseFail; 6800 } 6801 6802 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6803 return MatchOperand_Success; 6804 } 6805 6806 bool AMDGPUOperand::isSendMsg() const { 6807 return isImmTy(ImmTySendMsg); 6808 } 6809 6810 //===----------------------------------------------------------------------===// 6811 // v_interp 6812 //===----------------------------------------------------------------------===// 6813 6814 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6815 StringRef Str; 6816 SMLoc S = getLoc(); 6817 6818 if (!parseId(Str)) 6819 return MatchOperand_NoMatch; 6820 6821 int Slot = StringSwitch<int>(Str) 6822 .Case("p10", 0) 6823 .Case("p20", 1) 6824 .Case("p0", 2) 6825 .Default(-1); 6826 6827 if (Slot == -1) { 6828 Error(S, "invalid interpolation slot"); 6829 return MatchOperand_ParseFail; 6830 } 6831 6832 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6833 AMDGPUOperand::ImmTyInterpSlot)); 6834 return MatchOperand_Success; 6835 } 6836 6837 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6838 StringRef Str; 6839 SMLoc S = getLoc(); 6840 6841 if (!parseId(Str)) 6842 return MatchOperand_NoMatch; 6843 6844 if (!Str.startswith("attr")) { 6845 Error(S, "invalid interpolation attribute"); 6846 return MatchOperand_ParseFail; 6847 } 6848 6849 StringRef Chan = Str.take_back(2); 6850 int AttrChan = StringSwitch<int>(Chan) 6851 .Case(".x", 0) 6852 .Case(".y", 1) 6853 .Case(".z", 2) 6854 .Case(".w", 3) 6855 .Default(-1); 6856 if (AttrChan == -1) { 6857 Error(S, "invalid or missing interpolation attribute channel"); 6858 return MatchOperand_ParseFail; 6859 } 6860 6861 Str = Str.drop_back(2).drop_front(4); 6862 6863 uint8_t Attr; 6864 if (Str.getAsInteger(10, Attr)) { 6865 Error(S, "invalid or missing interpolation attribute number"); 6866 return MatchOperand_ParseFail; 6867 } 6868 6869 if (Attr > 63) { 6870 Error(S, "out of bounds interpolation attribute number"); 6871 return MatchOperand_ParseFail; 6872 } 6873 6874 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6875 6876 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6877 AMDGPUOperand::ImmTyInterpAttr)); 6878 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6879 AMDGPUOperand::ImmTyAttrChan)); 6880 return MatchOperand_Success; 6881 } 6882 6883 //===----------------------------------------------------------------------===// 6884 // exp 6885 //===----------------------------------------------------------------------===// 6886 6887 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6888 using namespace llvm::AMDGPU::Exp; 6889 6890 StringRef Str; 6891 SMLoc S = getLoc(); 6892 6893 if (!parseId(Str)) 6894 return MatchOperand_NoMatch; 6895 6896 unsigned Id = getTgtId(Str); 6897 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6898 Error(S, (Id == ET_INVALID) ? 6899 "invalid exp target" : 6900 "exp target is not supported on this GPU"); 6901 return MatchOperand_ParseFail; 6902 } 6903 6904 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6905 AMDGPUOperand::ImmTyExpTgt)); 6906 return MatchOperand_Success; 6907 } 6908 6909 //===----------------------------------------------------------------------===// 6910 // parser helpers 6911 //===----------------------------------------------------------------------===// 6912 6913 bool 6914 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6915 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6916 } 6917 6918 bool 6919 AMDGPUAsmParser::isId(const StringRef Id) const { 6920 return isId(getToken(), Id); 6921 } 6922 6923 bool 6924 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6925 return getTokenKind() == Kind; 6926 } 6927 6928 bool 6929 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6930 if (isId(Id)) { 6931 lex(); 6932 return true; 6933 } 6934 return false; 6935 } 6936 6937 bool 6938 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6939 if (isToken(AsmToken::Identifier)) { 6940 StringRef Tok = getTokenStr(); 6941 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6942 lex(); 6943 return true; 6944 } 6945 } 6946 return false; 6947 } 6948 6949 bool 6950 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6951 if (isId(Id) && peekToken().is(Kind)) { 6952 lex(); 6953 lex(); 6954 return true; 6955 } 6956 return false; 6957 } 6958 6959 bool 6960 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6961 if (isToken(Kind)) { 6962 lex(); 6963 return true; 6964 } 6965 return false; 6966 } 6967 6968 bool 6969 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6970 const StringRef ErrMsg) { 6971 if (!trySkipToken(Kind)) { 6972 Error(getLoc(), ErrMsg); 6973 return false; 6974 } 6975 return true; 6976 } 6977 6978 bool 6979 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6980 SMLoc S = getLoc(); 6981 6982 const MCExpr *Expr; 6983 if (Parser.parseExpression(Expr)) 6984 return false; 6985 6986 if (Expr->evaluateAsAbsolute(Imm)) 6987 return true; 6988 6989 if (Expected.empty()) { 6990 Error(S, "expected absolute expression"); 6991 } else { 6992 Error(S, Twine("expected ", Expected) + 6993 Twine(" or an absolute expression")); 6994 } 6995 return false; 6996 } 6997 6998 bool 6999 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 7000 SMLoc S = getLoc(); 7001 7002 const MCExpr *Expr; 7003 if (Parser.parseExpression(Expr)) 7004 return false; 7005 7006 int64_t IntVal; 7007 if (Expr->evaluateAsAbsolute(IntVal)) { 7008 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7009 } else { 7010 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7011 } 7012 return true; 7013 } 7014 7015 bool 7016 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7017 if (isToken(AsmToken::String)) { 7018 Val = getToken().getStringContents(); 7019 lex(); 7020 return true; 7021 } else { 7022 Error(getLoc(), ErrMsg); 7023 return false; 7024 } 7025 } 7026 7027 bool 7028 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7029 if (isToken(AsmToken::Identifier)) { 7030 Val = getTokenStr(); 7031 lex(); 7032 return true; 7033 } else { 7034 if (!ErrMsg.empty()) 7035 Error(getLoc(), ErrMsg); 7036 return false; 7037 } 7038 } 7039 7040 AsmToken 7041 AMDGPUAsmParser::getToken() const { 7042 return Parser.getTok(); 7043 } 7044 7045 AsmToken 7046 AMDGPUAsmParser::peekToken() { 7047 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 7048 } 7049 7050 void 7051 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7052 auto TokCount = getLexer().peekTokens(Tokens); 7053 7054 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7055 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7056 } 7057 7058 AsmToken::TokenKind 7059 AMDGPUAsmParser::getTokenKind() const { 7060 return getLexer().getKind(); 7061 } 7062 7063 SMLoc 7064 AMDGPUAsmParser::getLoc() const { 7065 return getToken().getLoc(); 7066 } 7067 7068 StringRef 7069 AMDGPUAsmParser::getTokenStr() const { 7070 return getToken().getString(); 7071 } 7072 7073 void 7074 AMDGPUAsmParser::lex() { 7075 Parser.Lex(); 7076 } 7077 7078 SMLoc 7079 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7080 const OperandVector &Operands) const { 7081 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7082 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7083 if (Test(Op)) 7084 return Op.getStartLoc(); 7085 } 7086 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7087 } 7088 7089 SMLoc 7090 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7091 const OperandVector &Operands) const { 7092 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7093 return getOperandLoc(Test, Operands); 7094 } 7095 7096 SMLoc 7097 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7098 const OperandVector &Operands) const { 7099 auto Test = [=](const AMDGPUOperand& Op) { 7100 return Op.isRegKind() && Op.getReg() == Reg; 7101 }; 7102 return getOperandLoc(Test, Operands); 7103 } 7104 7105 SMLoc 7106 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 7107 auto Test = [](const AMDGPUOperand& Op) { 7108 return Op.IsImmKindLiteral() || Op.isExpr(); 7109 }; 7110 return getOperandLoc(Test, Operands); 7111 } 7112 7113 SMLoc 7114 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7115 auto Test = [](const AMDGPUOperand& Op) { 7116 return Op.isImmKindConst(); 7117 }; 7118 return getOperandLoc(Test, Operands); 7119 } 7120 7121 //===----------------------------------------------------------------------===// 7122 // swizzle 7123 //===----------------------------------------------------------------------===// 7124 7125 LLVM_READNONE 7126 static unsigned 7127 encodeBitmaskPerm(const unsigned AndMask, 7128 const unsigned OrMask, 7129 const unsigned XorMask) { 7130 using namespace llvm::AMDGPU::Swizzle; 7131 7132 return BITMASK_PERM_ENC | 7133 (AndMask << BITMASK_AND_SHIFT) | 7134 (OrMask << BITMASK_OR_SHIFT) | 7135 (XorMask << BITMASK_XOR_SHIFT); 7136 } 7137 7138 bool 7139 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7140 const unsigned MinVal, 7141 const unsigned MaxVal, 7142 const StringRef ErrMsg, 7143 SMLoc &Loc) { 7144 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7145 return false; 7146 } 7147 Loc = getLoc(); 7148 if (!parseExpr(Op)) { 7149 return false; 7150 } 7151 if (Op < MinVal || Op > MaxVal) { 7152 Error(Loc, ErrMsg); 7153 return false; 7154 } 7155 7156 return true; 7157 } 7158 7159 bool 7160 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7161 const unsigned MinVal, 7162 const unsigned MaxVal, 7163 const StringRef ErrMsg) { 7164 SMLoc Loc; 7165 for (unsigned i = 0; i < OpNum; ++i) { 7166 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7167 return false; 7168 } 7169 7170 return true; 7171 } 7172 7173 bool 7174 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7175 using namespace llvm::AMDGPU::Swizzle; 7176 7177 int64_t Lane[LANE_NUM]; 7178 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7179 "expected a 2-bit lane id")) { 7180 Imm = QUAD_PERM_ENC; 7181 for (unsigned I = 0; I < LANE_NUM; ++I) { 7182 Imm |= Lane[I] << (LANE_SHIFT * I); 7183 } 7184 return true; 7185 } 7186 return false; 7187 } 7188 7189 bool 7190 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7191 using namespace llvm::AMDGPU::Swizzle; 7192 7193 SMLoc Loc; 7194 int64_t GroupSize; 7195 int64_t LaneIdx; 7196 7197 if (!parseSwizzleOperand(GroupSize, 7198 2, 32, 7199 "group size must be in the interval [2,32]", 7200 Loc)) { 7201 return false; 7202 } 7203 if (!isPowerOf2_64(GroupSize)) { 7204 Error(Loc, "group size must be a power of two"); 7205 return false; 7206 } 7207 if (parseSwizzleOperand(LaneIdx, 7208 0, GroupSize - 1, 7209 "lane id must be in the interval [0,group size - 1]", 7210 Loc)) { 7211 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7212 return true; 7213 } 7214 return false; 7215 } 7216 7217 bool 7218 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7219 using namespace llvm::AMDGPU::Swizzle; 7220 7221 SMLoc Loc; 7222 int64_t GroupSize; 7223 7224 if (!parseSwizzleOperand(GroupSize, 7225 2, 32, 7226 "group size must be in the interval [2,32]", 7227 Loc)) { 7228 return false; 7229 } 7230 if (!isPowerOf2_64(GroupSize)) { 7231 Error(Loc, "group size must be a power of two"); 7232 return false; 7233 } 7234 7235 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7236 return true; 7237 } 7238 7239 bool 7240 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7241 using namespace llvm::AMDGPU::Swizzle; 7242 7243 SMLoc Loc; 7244 int64_t GroupSize; 7245 7246 if (!parseSwizzleOperand(GroupSize, 7247 1, 16, 7248 "group size must be in the interval [1,16]", 7249 Loc)) { 7250 return false; 7251 } 7252 if (!isPowerOf2_64(GroupSize)) { 7253 Error(Loc, "group size must be a power of two"); 7254 return false; 7255 } 7256 7257 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7258 return true; 7259 } 7260 7261 bool 7262 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7263 using namespace llvm::AMDGPU::Swizzle; 7264 7265 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7266 return false; 7267 } 7268 7269 StringRef Ctl; 7270 SMLoc StrLoc = getLoc(); 7271 if (!parseString(Ctl)) { 7272 return false; 7273 } 7274 if (Ctl.size() != BITMASK_WIDTH) { 7275 Error(StrLoc, "expected a 5-character mask"); 7276 return false; 7277 } 7278 7279 unsigned AndMask = 0; 7280 unsigned OrMask = 0; 7281 unsigned XorMask = 0; 7282 7283 for (size_t i = 0; i < Ctl.size(); ++i) { 7284 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7285 switch(Ctl[i]) { 7286 default: 7287 Error(StrLoc, "invalid mask"); 7288 return false; 7289 case '0': 7290 break; 7291 case '1': 7292 OrMask |= Mask; 7293 break; 7294 case 'p': 7295 AndMask |= Mask; 7296 break; 7297 case 'i': 7298 AndMask |= Mask; 7299 XorMask |= Mask; 7300 break; 7301 } 7302 } 7303 7304 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7305 return true; 7306 } 7307 7308 bool 7309 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7310 7311 SMLoc OffsetLoc = getLoc(); 7312 7313 if (!parseExpr(Imm, "a swizzle macro")) { 7314 return false; 7315 } 7316 if (!isUInt<16>(Imm)) { 7317 Error(OffsetLoc, "expected a 16-bit offset"); 7318 return false; 7319 } 7320 return true; 7321 } 7322 7323 bool 7324 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7325 using namespace llvm::AMDGPU::Swizzle; 7326 7327 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7328 7329 SMLoc ModeLoc = getLoc(); 7330 bool Ok = false; 7331 7332 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7333 Ok = parseSwizzleQuadPerm(Imm); 7334 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7335 Ok = parseSwizzleBitmaskPerm(Imm); 7336 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7337 Ok = parseSwizzleBroadcast(Imm); 7338 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7339 Ok = parseSwizzleSwap(Imm); 7340 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7341 Ok = parseSwizzleReverse(Imm); 7342 } else { 7343 Error(ModeLoc, "expected a swizzle mode"); 7344 } 7345 7346 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7347 } 7348 7349 return false; 7350 } 7351 7352 OperandMatchResultTy 7353 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7354 SMLoc S = getLoc(); 7355 int64_t Imm = 0; 7356 7357 if (trySkipId("offset")) { 7358 7359 bool Ok = false; 7360 if (skipToken(AsmToken::Colon, "expected a colon")) { 7361 if (trySkipId("swizzle")) { 7362 Ok = parseSwizzleMacro(Imm); 7363 } else { 7364 Ok = parseSwizzleOffset(Imm); 7365 } 7366 } 7367 7368 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7369 7370 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7371 } else { 7372 // Swizzle "offset" operand is optional. 7373 // If it is omitted, try parsing other optional operands. 7374 return parseOptionalOpr(Operands); 7375 } 7376 } 7377 7378 bool 7379 AMDGPUOperand::isSwizzle() const { 7380 return isImmTy(ImmTySwizzle); 7381 } 7382 7383 //===----------------------------------------------------------------------===// 7384 // VGPR Index Mode 7385 //===----------------------------------------------------------------------===// 7386 7387 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7388 7389 using namespace llvm::AMDGPU::VGPRIndexMode; 7390 7391 if (trySkipToken(AsmToken::RParen)) { 7392 return OFF; 7393 } 7394 7395 int64_t Imm = 0; 7396 7397 while (true) { 7398 unsigned Mode = 0; 7399 SMLoc S = getLoc(); 7400 7401 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7402 if (trySkipId(IdSymbolic[ModeId])) { 7403 Mode = 1 << ModeId; 7404 break; 7405 } 7406 } 7407 7408 if (Mode == 0) { 7409 Error(S, (Imm == 0)? 7410 "expected a VGPR index mode or a closing parenthesis" : 7411 "expected a VGPR index mode"); 7412 return UNDEF; 7413 } 7414 7415 if (Imm & Mode) { 7416 Error(S, "duplicate VGPR index mode"); 7417 return UNDEF; 7418 } 7419 Imm |= Mode; 7420 7421 if (trySkipToken(AsmToken::RParen)) 7422 break; 7423 if (!skipToken(AsmToken::Comma, 7424 "expected a comma or a closing parenthesis")) 7425 return UNDEF; 7426 } 7427 7428 return Imm; 7429 } 7430 7431 OperandMatchResultTy 7432 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7433 7434 using namespace llvm::AMDGPU::VGPRIndexMode; 7435 7436 int64_t Imm = 0; 7437 SMLoc S = getLoc(); 7438 7439 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7440 Imm = parseGPRIdxMacro(); 7441 if (Imm == UNDEF) 7442 return MatchOperand_ParseFail; 7443 } else { 7444 if (getParser().parseAbsoluteExpression(Imm)) 7445 return MatchOperand_ParseFail; 7446 if (Imm < 0 || !isUInt<4>(Imm)) { 7447 Error(S, "invalid immediate: only 4-bit values are legal"); 7448 return MatchOperand_ParseFail; 7449 } 7450 } 7451 7452 Operands.push_back( 7453 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7454 return MatchOperand_Success; 7455 } 7456 7457 bool AMDGPUOperand::isGPRIdxMode() const { 7458 return isImmTy(ImmTyGprIdxMode); 7459 } 7460 7461 //===----------------------------------------------------------------------===// 7462 // sopp branch targets 7463 //===----------------------------------------------------------------------===// 7464 7465 OperandMatchResultTy 7466 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7467 7468 // Make sure we are not parsing something 7469 // that looks like a label or an expression but is not. 7470 // This will improve error messages. 7471 if (isRegister() || isModifier()) 7472 return MatchOperand_NoMatch; 7473 7474 if (!parseExpr(Operands)) 7475 return MatchOperand_ParseFail; 7476 7477 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7478 assert(Opr.isImm() || Opr.isExpr()); 7479 SMLoc Loc = Opr.getStartLoc(); 7480 7481 // Currently we do not support arbitrary expressions as branch targets. 7482 // Only labels and absolute expressions are accepted. 7483 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7484 Error(Loc, "expected an absolute expression or a label"); 7485 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7486 Error(Loc, "expected a 16-bit signed jump offset"); 7487 } 7488 7489 return MatchOperand_Success; 7490 } 7491 7492 //===----------------------------------------------------------------------===// 7493 // Boolean holding registers 7494 //===----------------------------------------------------------------------===// 7495 7496 OperandMatchResultTy 7497 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7498 return parseReg(Operands); 7499 } 7500 7501 //===----------------------------------------------------------------------===// 7502 // mubuf 7503 //===----------------------------------------------------------------------===// 7504 7505 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7506 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7507 } 7508 7509 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7510 const OperandVector &Operands, 7511 bool IsAtomic, 7512 bool IsLds) { 7513 OptionalImmIndexMap OptionalIdx; 7514 unsigned FirstOperandIdx = 1; 7515 bool IsAtomicReturn = false; 7516 7517 if (IsAtomic) { 7518 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7519 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7520 if (!Op.isCPol()) 7521 continue; 7522 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7523 break; 7524 } 7525 7526 if (!IsAtomicReturn) { 7527 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7528 if (NewOpc != -1) 7529 Inst.setOpcode(NewOpc); 7530 } 7531 7532 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7533 SIInstrFlags::IsAtomicRet; 7534 } 7535 7536 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7537 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7538 7539 // Add the register arguments 7540 if (Op.isReg()) { 7541 Op.addRegOperands(Inst, 1); 7542 // Insert a tied src for atomic return dst. 7543 // This cannot be postponed as subsequent calls to 7544 // addImmOperands rely on correct number of MC operands. 7545 if (IsAtomicReturn && i == FirstOperandIdx) 7546 Op.addRegOperands(Inst, 1); 7547 continue; 7548 } 7549 7550 // Handle the case where soffset is an immediate 7551 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7552 Op.addImmOperands(Inst, 1); 7553 continue; 7554 } 7555 7556 // Handle tokens like 'offen' which are sometimes hard-coded into the 7557 // asm string. There are no MCInst operands for these. 7558 if (Op.isToken()) { 7559 continue; 7560 } 7561 assert(Op.isImm()); 7562 7563 // Handle optional arguments 7564 OptionalIdx[Op.getImmTy()] = i; 7565 } 7566 7567 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7568 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7569 7570 if (!IsLds) { // tfe is not legal with lds opcodes 7571 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7572 } 7573 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7574 } 7575 7576 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7577 OptionalImmIndexMap OptionalIdx; 7578 7579 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7580 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7581 7582 // Add the register arguments 7583 if (Op.isReg()) { 7584 Op.addRegOperands(Inst, 1); 7585 continue; 7586 } 7587 7588 // Handle the case where soffset is an immediate 7589 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7590 Op.addImmOperands(Inst, 1); 7591 continue; 7592 } 7593 7594 // Handle tokens like 'offen' which are sometimes hard-coded into the 7595 // asm string. There are no MCInst operands for these. 7596 if (Op.isToken()) { 7597 continue; 7598 } 7599 assert(Op.isImm()); 7600 7601 // Handle optional arguments 7602 OptionalIdx[Op.getImmTy()] = i; 7603 } 7604 7605 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7606 AMDGPUOperand::ImmTyOffset); 7607 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7608 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7609 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7610 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7611 } 7612 7613 //===----------------------------------------------------------------------===// 7614 // mimg 7615 //===----------------------------------------------------------------------===// 7616 7617 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7618 bool IsAtomic) { 7619 unsigned I = 1; 7620 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7621 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7622 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7623 } 7624 7625 if (IsAtomic) { 7626 // Add src, same as dst 7627 assert(Desc.getNumDefs() == 1); 7628 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7629 } 7630 7631 OptionalImmIndexMap OptionalIdx; 7632 7633 for (unsigned E = Operands.size(); I != E; ++I) { 7634 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7635 7636 // Add the register arguments 7637 if (Op.isReg()) { 7638 Op.addRegOperands(Inst, 1); 7639 } else if (Op.isImmModifier()) { 7640 OptionalIdx[Op.getImmTy()] = I; 7641 } else if (!Op.isToken()) { 7642 llvm_unreachable("unexpected operand type"); 7643 } 7644 } 7645 7646 bool IsGFX10Plus = isGFX10Plus(); 7647 7648 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7649 if (IsGFX10Plus) 7650 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7651 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7652 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7653 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7654 if (IsGFX10Plus) 7655 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7656 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7657 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7658 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7659 if (!IsGFX10Plus) 7660 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7661 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7662 } 7663 7664 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7665 cvtMIMG(Inst, Operands, true); 7666 } 7667 7668 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7669 OptionalImmIndexMap OptionalIdx; 7670 bool IsAtomicReturn = false; 7671 7672 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7673 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7674 if (!Op.isCPol()) 7675 continue; 7676 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7677 break; 7678 } 7679 7680 if (!IsAtomicReturn) { 7681 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7682 if (NewOpc != -1) 7683 Inst.setOpcode(NewOpc); 7684 } 7685 7686 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7687 SIInstrFlags::IsAtomicRet; 7688 7689 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7690 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7691 7692 // Add the register arguments 7693 if (Op.isReg()) { 7694 Op.addRegOperands(Inst, 1); 7695 if (IsAtomicReturn && i == 1) 7696 Op.addRegOperands(Inst, 1); 7697 continue; 7698 } 7699 7700 // Handle the case where soffset is an immediate 7701 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7702 Op.addImmOperands(Inst, 1); 7703 continue; 7704 } 7705 7706 // Handle tokens like 'offen' which are sometimes hard-coded into the 7707 // asm string. There are no MCInst operands for these. 7708 if (Op.isToken()) { 7709 continue; 7710 } 7711 assert(Op.isImm()); 7712 7713 // Handle optional arguments 7714 OptionalIdx[Op.getImmTy()] = i; 7715 } 7716 7717 if ((int)Inst.getNumOperands() <= 7718 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7719 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7720 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7721 } 7722 7723 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7724 const OperandVector &Operands) { 7725 for (unsigned I = 1; I < Operands.size(); ++I) { 7726 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7727 if (Operand.isReg()) 7728 Operand.addRegOperands(Inst, 1); 7729 } 7730 7731 Inst.addOperand(MCOperand::createImm(1)); // a16 7732 } 7733 7734 //===----------------------------------------------------------------------===// 7735 // smrd 7736 //===----------------------------------------------------------------------===// 7737 7738 bool AMDGPUOperand::isSMRDOffset8() const { 7739 return isImm() && isUInt<8>(getImm()); 7740 } 7741 7742 bool AMDGPUOperand::isSMEMOffset() const { 7743 return isImmTy(ImmTyNone) || 7744 isImmTy(ImmTyOffset); // Offset range is checked later by validator. 7745 } 7746 7747 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7748 // 32-bit literals are only supported on CI and we only want to use them 7749 // when the offset is > 8-bits. 7750 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7751 } 7752 7753 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7754 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7755 } 7756 7757 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7758 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7759 } 7760 7761 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7762 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7763 } 7764 7765 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7766 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7767 } 7768 7769 //===----------------------------------------------------------------------===// 7770 // vop3 7771 //===----------------------------------------------------------------------===// 7772 7773 static bool ConvertOmodMul(int64_t &Mul) { 7774 if (Mul != 1 && Mul != 2 && Mul != 4) 7775 return false; 7776 7777 Mul >>= 1; 7778 return true; 7779 } 7780 7781 static bool ConvertOmodDiv(int64_t &Div) { 7782 if (Div == 1) { 7783 Div = 0; 7784 return true; 7785 } 7786 7787 if (Div == 2) { 7788 Div = 3; 7789 return true; 7790 } 7791 7792 return false; 7793 } 7794 7795 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7796 // This is intentional and ensures compatibility with sp3. 7797 // See bug 35397 for details. 7798 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7799 if (BoundCtrl == 0 || BoundCtrl == 1) { 7800 BoundCtrl = 1; 7801 return true; 7802 } 7803 return false; 7804 } 7805 7806 // Note: the order in this table matches the order of operands in AsmString. 7807 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7808 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7809 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7810 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7811 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7812 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7813 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7814 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7815 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7816 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7817 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7818 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7819 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7820 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7821 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7822 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7823 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7824 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7825 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7826 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7827 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7828 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7829 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7830 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7831 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7832 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7833 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7834 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7835 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7836 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7837 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7838 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7839 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7840 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7841 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7842 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7843 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7844 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7845 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7846 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7847 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7848 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}, 7849 {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr}, 7850 {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr} 7851 }; 7852 7853 void AMDGPUAsmParser::onBeginOfFile() { 7854 if (!getParser().getStreamer().getTargetStreamer() || 7855 getSTI().getTargetTriple().getArch() == Triple::r600) 7856 return; 7857 7858 if (!getTargetStreamer().getTargetID()) 7859 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7860 7861 if (isHsaAbiVersion3AndAbove(&getSTI())) 7862 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7863 } 7864 7865 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7866 7867 OperandMatchResultTy res = parseOptionalOpr(Operands); 7868 7869 // This is a hack to enable hardcoded mandatory operands which follow 7870 // optional operands. 7871 // 7872 // Current design assumes that all operands after the first optional operand 7873 // are also optional. However implementation of some instructions violates 7874 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7875 // 7876 // To alleviate this problem, we have to (implicitly) parse extra operands 7877 // to make sure autogenerated parser of custom operands never hit hardcoded 7878 // mandatory operands. 7879 7880 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7881 if (res != MatchOperand_Success || 7882 isToken(AsmToken::EndOfStatement)) 7883 break; 7884 7885 trySkipToken(AsmToken::Comma); 7886 res = parseOptionalOpr(Operands); 7887 } 7888 7889 return res; 7890 } 7891 7892 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7893 OperandMatchResultTy res; 7894 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7895 // try to parse any optional operand here 7896 if (Op.IsBit) { 7897 res = parseNamedBit(Op.Name, Operands, Op.Type); 7898 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7899 res = parseOModOperand(Operands); 7900 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7901 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7902 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7903 res = parseSDWASel(Operands, Op.Name, Op.Type); 7904 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7905 res = parseSDWADstUnused(Operands); 7906 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7907 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7908 Op.Type == AMDGPUOperand::ImmTyNegLo || 7909 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7910 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7911 Op.ConvertResult); 7912 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7913 res = parseDim(Operands); 7914 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7915 res = parseCPol(Operands); 7916 } else { 7917 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7918 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 7919 res = parseOperandArrayWithPrefix("neg", Operands, 7920 AMDGPUOperand::ImmTyBLGP, 7921 nullptr); 7922 } 7923 } 7924 if (res != MatchOperand_NoMatch) { 7925 return res; 7926 } 7927 } 7928 return MatchOperand_NoMatch; 7929 } 7930 7931 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7932 StringRef Name = getTokenStr(); 7933 if (Name == "mul") { 7934 return parseIntWithPrefix("mul", Operands, 7935 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7936 } 7937 7938 if (Name == "div") { 7939 return parseIntWithPrefix("div", Operands, 7940 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7941 } 7942 7943 return MatchOperand_NoMatch; 7944 } 7945 7946 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7947 cvtVOP3P(Inst, Operands); 7948 7949 int Opc = Inst.getOpcode(); 7950 7951 int SrcNum; 7952 const int Ops[] = { AMDGPU::OpName::src0, 7953 AMDGPU::OpName::src1, 7954 AMDGPU::OpName::src2 }; 7955 for (SrcNum = 0; 7956 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7957 ++SrcNum); 7958 assert(SrcNum > 0); 7959 7960 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7961 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7962 7963 if ((OpSel & (1 << SrcNum)) != 0) { 7964 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7965 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7966 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7967 } 7968 } 7969 7970 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7971 // 1. This operand is input modifiers 7972 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7973 // 2. This is not last operand 7974 && Desc.NumOperands > (OpNum + 1) 7975 // 3. Next operand is register class 7976 && Desc.OpInfo[OpNum + 1].RegClass != -1 7977 // 4. Next register is not tied to any other operand 7978 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7979 } 7980 7981 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7982 { 7983 OptionalImmIndexMap OptionalIdx; 7984 unsigned Opc = Inst.getOpcode(); 7985 7986 unsigned I = 1; 7987 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7988 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7989 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7990 } 7991 7992 for (unsigned E = Operands.size(); I != E; ++I) { 7993 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7994 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7995 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7996 } else if (Op.isInterpSlot() || 7997 Op.isInterpAttr() || 7998 Op.isAttrChan()) { 7999 Inst.addOperand(MCOperand::createImm(Op.getImm())); 8000 } else if (Op.isImmModifier()) { 8001 OptionalIdx[Op.getImmTy()] = I; 8002 } else { 8003 llvm_unreachable("unhandled operand type"); 8004 } 8005 } 8006 8007 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 8008 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 8009 } 8010 8011 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8012 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8013 } 8014 8015 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8016 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8017 } 8018 } 8019 8020 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 8021 { 8022 OptionalImmIndexMap OptionalIdx; 8023 unsigned Opc = Inst.getOpcode(); 8024 8025 unsigned I = 1; 8026 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8027 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8028 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8029 } 8030 8031 for (unsigned E = Operands.size(); I != E; ++I) { 8032 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8033 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8034 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8035 } else if (Op.isImmModifier()) { 8036 OptionalIdx[Op.getImmTy()] = I; 8037 } else { 8038 llvm_unreachable("unhandled operand type"); 8039 } 8040 } 8041 8042 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8043 8044 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8045 if (OpSelIdx != -1) 8046 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8047 8048 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 8049 8050 if (OpSelIdx == -1) 8051 return; 8052 8053 const int Ops[] = { AMDGPU::OpName::src0, 8054 AMDGPU::OpName::src1, 8055 AMDGPU::OpName::src2 }; 8056 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8057 AMDGPU::OpName::src1_modifiers, 8058 AMDGPU::OpName::src2_modifiers }; 8059 8060 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8061 8062 for (int J = 0; J < 3; ++J) { 8063 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8064 if (OpIdx == -1) 8065 break; 8066 8067 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8068 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8069 8070 if ((OpSel & (1 << J)) != 0) 8071 ModVal |= SISrcMods::OP_SEL_0; 8072 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 8073 (OpSel & (1 << 3)) != 0) 8074 ModVal |= SISrcMods::DST_OP_SEL; 8075 8076 Inst.getOperand(ModIdx).setImm(ModVal); 8077 } 8078 } 8079 8080 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8081 OptionalImmIndexMap &OptionalIdx) { 8082 unsigned Opc = Inst.getOpcode(); 8083 8084 unsigned I = 1; 8085 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8086 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8087 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8088 } 8089 8090 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 8091 // This instruction has src modifiers 8092 for (unsigned E = Operands.size(); I != E; ++I) { 8093 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8094 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8095 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8096 } else if (Op.isImmModifier()) { 8097 OptionalIdx[Op.getImmTy()] = I; 8098 } else if (Op.isRegOrImm()) { 8099 Op.addRegOrImmOperands(Inst, 1); 8100 } else { 8101 llvm_unreachable("unhandled operand type"); 8102 } 8103 } 8104 } else { 8105 // No src modifiers 8106 for (unsigned E = Operands.size(); I != E; ++I) { 8107 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8108 if (Op.isMod()) { 8109 OptionalIdx[Op.getImmTy()] = I; 8110 } else { 8111 Op.addRegOrImmOperands(Inst, 1); 8112 } 8113 } 8114 } 8115 8116 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8117 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8118 } 8119 8120 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8121 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8122 } 8123 8124 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8125 // it has src2 register operand that is tied to dst operand 8126 // we don't allow modifiers for this operand in assembler so src2_modifiers 8127 // should be 0. 8128 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 8129 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 8130 Opc == AMDGPU::V_MAC_F32_e64_vi || 8131 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 8132 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 8133 Opc == AMDGPU::V_MAC_F16_e64_vi || 8134 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 8135 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 8136 Opc == AMDGPU::V_FMAC_F32_e64_vi || 8137 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 8138 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 8139 auto it = Inst.begin(); 8140 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8141 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8142 ++it; 8143 // Copy the operand to ensure it's not invalidated when Inst grows. 8144 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8145 } 8146 } 8147 8148 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8149 OptionalImmIndexMap OptionalIdx; 8150 cvtVOP3(Inst, Operands, OptionalIdx); 8151 } 8152 8153 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8154 OptionalImmIndexMap &OptIdx) { 8155 const int Opc = Inst.getOpcode(); 8156 const MCInstrDesc &Desc = MII.get(Opc); 8157 8158 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8159 8160 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 8161 assert(!IsPacked); 8162 Inst.addOperand(Inst.getOperand(0)); 8163 } 8164 8165 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8166 // instruction, and then figure out where to actually put the modifiers 8167 8168 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8169 if (OpSelIdx != -1) { 8170 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8171 } 8172 8173 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8174 if (OpSelHiIdx != -1) { 8175 int DefaultVal = IsPacked ? -1 : 0; 8176 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8177 DefaultVal); 8178 } 8179 8180 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8181 if (NegLoIdx != -1) { 8182 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8183 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8184 } 8185 8186 const int Ops[] = { AMDGPU::OpName::src0, 8187 AMDGPU::OpName::src1, 8188 AMDGPU::OpName::src2 }; 8189 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8190 AMDGPU::OpName::src1_modifiers, 8191 AMDGPU::OpName::src2_modifiers }; 8192 8193 unsigned OpSel = 0; 8194 unsigned OpSelHi = 0; 8195 unsigned NegLo = 0; 8196 unsigned NegHi = 0; 8197 8198 if (OpSelIdx != -1) 8199 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8200 8201 if (OpSelHiIdx != -1) 8202 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8203 8204 if (NegLoIdx != -1) { 8205 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8206 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8207 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8208 } 8209 8210 for (int J = 0; J < 3; ++J) { 8211 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8212 if (OpIdx == -1) 8213 break; 8214 8215 uint32_t ModVal = 0; 8216 8217 if ((OpSel & (1 << J)) != 0) 8218 ModVal |= SISrcMods::OP_SEL_0; 8219 8220 if ((OpSelHi & (1 << J)) != 0) 8221 ModVal |= SISrcMods::OP_SEL_1; 8222 8223 if ((NegLo & (1 << J)) != 0) 8224 ModVal |= SISrcMods::NEG; 8225 8226 if ((NegHi & (1 << J)) != 0) 8227 ModVal |= SISrcMods::NEG_HI; 8228 8229 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8230 8231 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8232 } 8233 } 8234 8235 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8236 OptionalImmIndexMap OptIdx; 8237 cvtVOP3(Inst, Operands, OptIdx); 8238 cvtVOP3P(Inst, Operands, OptIdx); 8239 } 8240 8241 //===----------------------------------------------------------------------===// 8242 // dpp 8243 //===----------------------------------------------------------------------===// 8244 8245 bool AMDGPUOperand::isDPP8() const { 8246 return isImmTy(ImmTyDPP8); 8247 } 8248 8249 bool AMDGPUOperand::isDPPCtrl() const { 8250 using namespace AMDGPU::DPP; 8251 8252 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8253 if (result) { 8254 int64_t Imm = getImm(); 8255 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8256 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8257 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8258 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8259 (Imm == DppCtrl::WAVE_SHL1) || 8260 (Imm == DppCtrl::WAVE_ROL1) || 8261 (Imm == DppCtrl::WAVE_SHR1) || 8262 (Imm == DppCtrl::WAVE_ROR1) || 8263 (Imm == DppCtrl::ROW_MIRROR) || 8264 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8265 (Imm == DppCtrl::BCAST15) || 8266 (Imm == DppCtrl::BCAST31) || 8267 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8268 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8269 } 8270 return false; 8271 } 8272 8273 //===----------------------------------------------------------------------===// 8274 // mAI 8275 //===----------------------------------------------------------------------===// 8276 8277 bool AMDGPUOperand::isBLGP() const { 8278 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8279 } 8280 8281 bool AMDGPUOperand::isCBSZ() const { 8282 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8283 } 8284 8285 bool AMDGPUOperand::isABID() const { 8286 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8287 } 8288 8289 bool AMDGPUOperand::isS16Imm() const { 8290 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8291 } 8292 8293 bool AMDGPUOperand::isU16Imm() const { 8294 return isImm() && isUInt<16>(getImm()); 8295 } 8296 8297 //===----------------------------------------------------------------------===// 8298 // dim 8299 //===----------------------------------------------------------------------===// 8300 8301 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8302 // We want to allow "dim:1D" etc., 8303 // but the initial 1 is tokenized as an integer. 8304 std::string Token; 8305 if (isToken(AsmToken::Integer)) { 8306 SMLoc Loc = getToken().getEndLoc(); 8307 Token = std::string(getTokenStr()); 8308 lex(); 8309 if (getLoc() != Loc) 8310 return false; 8311 } 8312 8313 StringRef Suffix; 8314 if (!parseId(Suffix)) 8315 return false; 8316 Token += Suffix; 8317 8318 StringRef DimId = Token; 8319 if (DimId.startswith("SQ_RSRC_IMG_")) 8320 DimId = DimId.drop_front(12); 8321 8322 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8323 if (!DimInfo) 8324 return false; 8325 8326 Encoding = DimInfo->Encoding; 8327 return true; 8328 } 8329 8330 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8331 if (!isGFX10Plus()) 8332 return MatchOperand_NoMatch; 8333 8334 SMLoc S = getLoc(); 8335 8336 if (!trySkipId("dim", AsmToken::Colon)) 8337 return MatchOperand_NoMatch; 8338 8339 unsigned Encoding; 8340 SMLoc Loc = getLoc(); 8341 if (!parseDimId(Encoding)) { 8342 Error(Loc, "invalid dim value"); 8343 return MatchOperand_ParseFail; 8344 } 8345 8346 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8347 AMDGPUOperand::ImmTyDim)); 8348 return MatchOperand_Success; 8349 } 8350 8351 //===----------------------------------------------------------------------===// 8352 // dpp 8353 //===----------------------------------------------------------------------===// 8354 8355 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8356 SMLoc S = getLoc(); 8357 8358 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8359 return MatchOperand_NoMatch; 8360 8361 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8362 8363 int64_t Sels[8]; 8364 8365 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8366 return MatchOperand_ParseFail; 8367 8368 for (size_t i = 0; i < 8; ++i) { 8369 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8370 return MatchOperand_ParseFail; 8371 8372 SMLoc Loc = getLoc(); 8373 if (getParser().parseAbsoluteExpression(Sels[i])) 8374 return MatchOperand_ParseFail; 8375 if (0 > Sels[i] || 7 < Sels[i]) { 8376 Error(Loc, "expected a 3-bit value"); 8377 return MatchOperand_ParseFail; 8378 } 8379 } 8380 8381 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8382 return MatchOperand_ParseFail; 8383 8384 unsigned DPP8 = 0; 8385 for (size_t i = 0; i < 8; ++i) 8386 DPP8 |= (Sels[i] << (i * 3)); 8387 8388 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8389 return MatchOperand_Success; 8390 } 8391 8392 bool 8393 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8394 const OperandVector &Operands) { 8395 if (Ctrl == "row_newbcast") 8396 return isGFX90A(); 8397 8398 if (Ctrl == "row_share" || 8399 Ctrl == "row_xmask") 8400 return isGFX10Plus(); 8401 8402 if (Ctrl == "wave_shl" || 8403 Ctrl == "wave_shr" || 8404 Ctrl == "wave_rol" || 8405 Ctrl == "wave_ror" || 8406 Ctrl == "row_bcast") 8407 return isVI() || isGFX9(); 8408 8409 return Ctrl == "row_mirror" || 8410 Ctrl == "row_half_mirror" || 8411 Ctrl == "quad_perm" || 8412 Ctrl == "row_shl" || 8413 Ctrl == "row_shr" || 8414 Ctrl == "row_ror"; 8415 } 8416 8417 int64_t 8418 AMDGPUAsmParser::parseDPPCtrlPerm() { 8419 // quad_perm:[%d,%d,%d,%d] 8420 8421 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8422 return -1; 8423 8424 int64_t Val = 0; 8425 for (int i = 0; i < 4; ++i) { 8426 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8427 return -1; 8428 8429 int64_t Temp; 8430 SMLoc Loc = getLoc(); 8431 if (getParser().parseAbsoluteExpression(Temp)) 8432 return -1; 8433 if (Temp < 0 || Temp > 3) { 8434 Error(Loc, "expected a 2-bit value"); 8435 return -1; 8436 } 8437 8438 Val += (Temp << i * 2); 8439 } 8440 8441 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8442 return -1; 8443 8444 return Val; 8445 } 8446 8447 int64_t 8448 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8449 using namespace AMDGPU::DPP; 8450 8451 // sel:%d 8452 8453 int64_t Val; 8454 SMLoc Loc = getLoc(); 8455 8456 if (getParser().parseAbsoluteExpression(Val)) 8457 return -1; 8458 8459 struct DppCtrlCheck { 8460 int64_t Ctrl; 8461 int Lo; 8462 int Hi; 8463 }; 8464 8465 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8466 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8467 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8468 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8469 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8470 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8471 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8472 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8473 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8474 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8475 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8476 .Default({-1, 0, 0}); 8477 8478 bool Valid; 8479 if (Check.Ctrl == -1) { 8480 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8481 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8482 } else { 8483 Valid = Check.Lo <= Val && Val <= Check.Hi; 8484 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8485 } 8486 8487 if (!Valid) { 8488 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8489 return -1; 8490 } 8491 8492 return Val; 8493 } 8494 8495 OperandMatchResultTy 8496 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8497 using namespace AMDGPU::DPP; 8498 8499 if (!isToken(AsmToken::Identifier) || 8500 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8501 return MatchOperand_NoMatch; 8502 8503 SMLoc S = getLoc(); 8504 int64_t Val = -1; 8505 StringRef Ctrl; 8506 8507 parseId(Ctrl); 8508 8509 if (Ctrl == "row_mirror") { 8510 Val = DppCtrl::ROW_MIRROR; 8511 } else if (Ctrl == "row_half_mirror") { 8512 Val = DppCtrl::ROW_HALF_MIRROR; 8513 } else { 8514 if (skipToken(AsmToken::Colon, "expected a colon")) { 8515 if (Ctrl == "quad_perm") { 8516 Val = parseDPPCtrlPerm(); 8517 } else { 8518 Val = parseDPPCtrlSel(Ctrl); 8519 } 8520 } 8521 } 8522 8523 if (Val == -1) 8524 return MatchOperand_ParseFail; 8525 8526 Operands.push_back( 8527 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8528 return MatchOperand_Success; 8529 } 8530 8531 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8532 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8533 } 8534 8535 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8536 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8537 } 8538 8539 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8540 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8541 } 8542 8543 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8544 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8545 } 8546 8547 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8548 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8549 } 8550 8551 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8552 OptionalImmIndexMap OptionalIdx; 8553 8554 unsigned Opc = Inst.getOpcode(); 8555 bool HasModifiers = 8556 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8557 unsigned I = 1; 8558 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8559 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8560 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8561 } 8562 8563 int Fi = 0; 8564 for (unsigned E = Operands.size(); I != E; ++I) { 8565 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8566 MCOI::TIED_TO); 8567 if (TiedTo != -1) { 8568 assert((unsigned)TiedTo < Inst.getNumOperands()); 8569 // handle tied old or src2 for MAC instructions 8570 Inst.addOperand(Inst.getOperand(TiedTo)); 8571 } 8572 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8573 // Add the register arguments 8574 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8575 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8576 // Skip it. 8577 continue; 8578 } 8579 8580 if (IsDPP8) { 8581 if (Op.isDPP8()) { 8582 Op.addImmOperands(Inst, 1); 8583 } else if (HasModifiers && 8584 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8585 Op.addRegWithFPInputModsOperands(Inst, 2); 8586 } else if (Op.isFI()) { 8587 Fi = Op.getImm(); 8588 } else if (Op.isReg()) { 8589 Op.addRegOperands(Inst, 1); 8590 } else { 8591 llvm_unreachable("Invalid operand type"); 8592 } 8593 } else { 8594 if (HasModifiers && 8595 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8596 Op.addRegWithFPInputModsOperands(Inst, 2); 8597 } else if (Op.isReg()) { 8598 Op.addRegOperands(Inst, 1); 8599 } else if (Op.isDPPCtrl()) { 8600 Op.addImmOperands(Inst, 1); 8601 } else if (Op.isImm()) { 8602 // Handle optional arguments 8603 OptionalIdx[Op.getImmTy()] = I; 8604 } else { 8605 llvm_unreachable("Invalid operand type"); 8606 } 8607 } 8608 } 8609 8610 if (IsDPP8) { 8611 using namespace llvm::AMDGPU::DPP; 8612 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8613 } else { 8614 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8615 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8616 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8617 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8618 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8619 } 8620 } 8621 } 8622 8623 //===----------------------------------------------------------------------===// 8624 // sdwa 8625 //===----------------------------------------------------------------------===// 8626 8627 OperandMatchResultTy 8628 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8629 AMDGPUOperand::ImmTy Type) { 8630 using namespace llvm::AMDGPU::SDWA; 8631 8632 SMLoc S = getLoc(); 8633 StringRef Value; 8634 OperandMatchResultTy res; 8635 8636 SMLoc StringLoc; 8637 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8638 if (res != MatchOperand_Success) { 8639 return res; 8640 } 8641 8642 int64_t Int; 8643 Int = StringSwitch<int64_t>(Value) 8644 .Case("BYTE_0", SdwaSel::BYTE_0) 8645 .Case("BYTE_1", SdwaSel::BYTE_1) 8646 .Case("BYTE_2", SdwaSel::BYTE_2) 8647 .Case("BYTE_3", SdwaSel::BYTE_3) 8648 .Case("WORD_0", SdwaSel::WORD_0) 8649 .Case("WORD_1", SdwaSel::WORD_1) 8650 .Case("DWORD", SdwaSel::DWORD) 8651 .Default(0xffffffff); 8652 8653 if (Int == 0xffffffff) { 8654 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8655 return MatchOperand_ParseFail; 8656 } 8657 8658 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8659 return MatchOperand_Success; 8660 } 8661 8662 OperandMatchResultTy 8663 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8664 using namespace llvm::AMDGPU::SDWA; 8665 8666 SMLoc S = getLoc(); 8667 StringRef Value; 8668 OperandMatchResultTy res; 8669 8670 SMLoc StringLoc; 8671 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8672 if (res != MatchOperand_Success) { 8673 return res; 8674 } 8675 8676 int64_t Int; 8677 Int = StringSwitch<int64_t>(Value) 8678 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8679 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8680 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8681 .Default(0xffffffff); 8682 8683 if (Int == 0xffffffff) { 8684 Error(StringLoc, "invalid dst_unused value"); 8685 return MatchOperand_ParseFail; 8686 } 8687 8688 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8689 return MatchOperand_Success; 8690 } 8691 8692 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8693 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8694 } 8695 8696 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8697 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8698 } 8699 8700 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8701 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8702 } 8703 8704 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8705 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8706 } 8707 8708 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8709 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8710 } 8711 8712 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8713 uint64_t BasicInstType, 8714 bool SkipDstVcc, 8715 bool SkipSrcVcc) { 8716 using namespace llvm::AMDGPU::SDWA; 8717 8718 OptionalImmIndexMap OptionalIdx; 8719 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8720 bool SkippedVcc = false; 8721 8722 unsigned I = 1; 8723 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8724 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8725 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8726 } 8727 8728 for (unsigned E = Operands.size(); I != E; ++I) { 8729 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8730 if (SkipVcc && !SkippedVcc && Op.isReg() && 8731 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8732 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8733 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8734 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8735 // Skip VCC only if we didn't skip it on previous iteration. 8736 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8737 if (BasicInstType == SIInstrFlags::VOP2 && 8738 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8739 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8740 SkippedVcc = true; 8741 continue; 8742 } else if (BasicInstType == SIInstrFlags::VOPC && 8743 Inst.getNumOperands() == 0) { 8744 SkippedVcc = true; 8745 continue; 8746 } 8747 } 8748 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8749 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8750 } else if (Op.isImm()) { 8751 // Handle optional arguments 8752 OptionalIdx[Op.getImmTy()] = I; 8753 } else { 8754 llvm_unreachable("Invalid operand type"); 8755 } 8756 SkippedVcc = false; 8757 } 8758 8759 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8760 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8761 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8762 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8763 switch (BasicInstType) { 8764 case SIInstrFlags::VOP1: 8765 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8766 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8767 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8768 } 8769 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8770 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8771 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8772 break; 8773 8774 case SIInstrFlags::VOP2: 8775 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8776 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8777 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8778 } 8779 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8780 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8781 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8782 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8783 break; 8784 8785 case SIInstrFlags::VOPC: 8786 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8787 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8788 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8789 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8790 break; 8791 8792 default: 8793 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8794 } 8795 } 8796 8797 // special case v_mac_{f16, f32}: 8798 // it has src2 register operand that is tied to dst operand 8799 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8800 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8801 auto it = Inst.begin(); 8802 std::advance( 8803 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8804 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8805 } 8806 } 8807 8808 //===----------------------------------------------------------------------===// 8809 // mAI 8810 //===----------------------------------------------------------------------===// 8811 8812 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8813 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8814 } 8815 8816 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8817 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8818 } 8819 8820 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8821 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8822 } 8823 8824 /// Force static initialization. 8825 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8826 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8827 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8828 } 8829 8830 #define GET_REGISTER_MATCHER 8831 #define GET_MATCHER_IMPLEMENTATION 8832 #define GET_MNEMONIC_SPELL_CHECKER 8833 #define GET_MNEMONIC_CHECKER 8834 #include "AMDGPUGenAsmMatcher.inc" 8835 8836 // This function should be defined after auto-generated include so that we have 8837 // MatchClassKind enum defined 8838 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8839 unsigned Kind) { 8840 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8841 // But MatchInstructionImpl() expects to meet token and fails to validate 8842 // operand. This method checks if we are given immediate operand but expect to 8843 // get corresponding token. 8844 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8845 switch (Kind) { 8846 case MCK_addr64: 8847 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8848 case MCK_gds: 8849 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8850 case MCK_lds: 8851 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8852 case MCK_idxen: 8853 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8854 case MCK_offen: 8855 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8856 case MCK_SSrcB32: 8857 // When operands have expression values, they will return true for isToken, 8858 // because it is not possible to distinguish between a token and an 8859 // expression at parse time. MatchInstructionImpl() will always try to 8860 // match an operand as a token, when isToken returns true, and when the 8861 // name of the expression is not a valid token, the match will fail, 8862 // so we need to handle it here. 8863 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8864 case MCK_SSrcF32: 8865 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8866 case MCK_SoppBrTarget: 8867 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8868 case MCK_VReg32OrOff: 8869 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8870 case MCK_InterpSlot: 8871 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8872 case MCK_Attr: 8873 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8874 case MCK_AttrChan: 8875 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8876 case MCK_ImmSMEMOffset: 8877 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8878 case MCK_SReg_64: 8879 case MCK_SReg_64_XEXEC: 8880 // Null is defined as a 32-bit register but 8881 // it should also be enabled with 64-bit operands. 8882 // The following code enables it for SReg_64 operands 8883 // used as source and destination. Remaining source 8884 // operands are handled in isInlinableImm. 8885 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8886 default: 8887 return Match_InvalidOperand; 8888 } 8889 } 8890 8891 //===----------------------------------------------------------------------===// 8892 // endpgm 8893 //===----------------------------------------------------------------------===// 8894 8895 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8896 SMLoc S = getLoc(); 8897 int64_t Imm = 0; 8898 8899 if (!parseExpr(Imm)) { 8900 // The operand is optional, if not present default to 0 8901 Imm = 0; 8902 } 8903 8904 if (!isUInt<16>(Imm)) { 8905 Error(S, "expected a 16-bit value"); 8906 return MatchOperand_ParseFail; 8907 } 8908 8909 Operands.push_back( 8910 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8911 return MatchOperand_Success; 8912 } 8913 8914 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8915 8916 //===----------------------------------------------------------------------===// 8917 // LDSDIR 8918 //===----------------------------------------------------------------------===// 8919 8920 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const { 8921 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST); 8922 } 8923 8924 bool AMDGPUOperand::isWaitVDST() const { 8925 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); 8926 } 8927 8928 //===----------------------------------------------------------------------===// 8929 // VINTERP 8930 //===----------------------------------------------------------------------===// 8931 8932 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const { 8933 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP); 8934 } 8935 8936 bool AMDGPUOperand::isWaitEXP() const { 8937 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); 8938 } 8939