1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCInstrDesc.h" 29 #include "llvm/MC/MCParser/MCAsmLexer.h" 30 #include "llvm/MC/MCParser/MCAsmParser.h" 31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 32 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 33 #include "llvm/MC/MCSymbol.h" 34 #include "llvm/MC/TargetRegistry.h" 35 #include "llvm/Support/AMDGPUMetadata.h" 36 #include "llvm/Support/AMDHSAKernelDescriptor.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/MachineValueType.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/Support/TargetParser.h" 41 42 using namespace llvm; 43 using namespace llvm::AMDGPU; 44 using namespace llvm::amdhsa; 45 46 namespace { 47 48 class AMDGPUAsmParser; 49 50 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 51 52 //===----------------------------------------------------------------------===// 53 // Operand 54 //===----------------------------------------------------------------------===// 55 56 class AMDGPUOperand : public MCParsedAsmOperand { 57 enum KindTy { 58 Token, 59 Immediate, 60 Register, 61 Expression 62 } Kind; 63 64 SMLoc StartLoc, EndLoc; 65 const AMDGPUAsmParser *AsmParser; 66 67 public: 68 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 69 : Kind(Kind_), AsmParser(AsmParser_) {} 70 71 using Ptr = std::unique_ptr<AMDGPUOperand>; 72 73 struct Modifiers { 74 bool Abs = false; 75 bool Neg = false; 76 bool Sext = false; 77 78 bool hasFPModifiers() const { return Abs || Neg; } 79 bool hasIntModifiers() const { return Sext; } 80 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 81 82 int64_t getFPModifiersOperand() const { 83 int64_t Operand = 0; 84 Operand |= Abs ? SISrcMods::ABS : 0u; 85 Operand |= Neg ? SISrcMods::NEG : 0u; 86 return Operand; 87 } 88 89 int64_t getIntModifiersOperand() const { 90 int64_t Operand = 0; 91 Operand |= Sext ? SISrcMods::SEXT : 0u; 92 return Operand; 93 } 94 95 int64_t getModifiersOperand() const { 96 assert(!(hasFPModifiers() && hasIntModifiers()) 97 && "fp and int modifiers should not be used simultaneously"); 98 if (hasFPModifiers()) { 99 return getFPModifiersOperand(); 100 } else if (hasIntModifiers()) { 101 return getIntModifiersOperand(); 102 } else { 103 return 0; 104 } 105 } 106 107 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 108 }; 109 110 enum ImmTy { 111 ImmTyNone, 112 ImmTyGDS, 113 ImmTyLDS, 114 ImmTyOffen, 115 ImmTyIdxen, 116 ImmTyAddr64, 117 ImmTyOffset, 118 ImmTyInstOffset, 119 ImmTyOffset0, 120 ImmTyOffset1, 121 ImmTyCPol, 122 ImmTySWZ, 123 ImmTyTFE, 124 ImmTyD16, 125 ImmTyClampSI, 126 ImmTyOModSI, 127 ImmTySdwaDstSel, 128 ImmTySdwaSrc0Sel, 129 ImmTySdwaSrc1Sel, 130 ImmTySdwaDstUnused, 131 ImmTyDMask, 132 ImmTyDim, 133 ImmTyUNorm, 134 ImmTyDA, 135 ImmTyR128A16, 136 ImmTyA16, 137 ImmTyLWE, 138 ImmTyExpTgt, 139 ImmTyExpCompr, 140 ImmTyExpVM, 141 ImmTyFORMAT, 142 ImmTyHwreg, 143 ImmTyOff, 144 ImmTySendMsg, 145 ImmTyInterpSlot, 146 ImmTyInterpAttr, 147 ImmTyAttrChan, 148 ImmTyOpSel, 149 ImmTyOpSelHi, 150 ImmTyNegLo, 151 ImmTyNegHi, 152 ImmTyDPP8, 153 ImmTyDppCtrl, 154 ImmTyDppRowMask, 155 ImmTyDppBankMask, 156 ImmTyDppBoundCtrl, 157 ImmTyDppFi, 158 ImmTySwizzle, 159 ImmTyGprIdxMode, 160 ImmTyHigh, 161 ImmTyBLGP, 162 ImmTyCBSZ, 163 ImmTyABID, 164 ImmTyEndpgm, 165 ImmTyWaitVDST, 166 ImmTyWaitEXP, 167 }; 168 169 enum ImmKindTy { 170 ImmKindTyNone, 171 ImmKindTyLiteral, 172 ImmKindTyConst, 173 }; 174 175 private: 176 struct TokOp { 177 const char *Data; 178 unsigned Length; 179 }; 180 181 struct ImmOp { 182 int64_t Val; 183 ImmTy Type; 184 bool IsFPImm; 185 mutable ImmKindTy Kind; 186 Modifiers Mods; 187 }; 188 189 struct RegOp { 190 unsigned RegNo; 191 Modifiers Mods; 192 }; 193 194 union { 195 TokOp Tok; 196 ImmOp Imm; 197 RegOp Reg; 198 const MCExpr *Expr; 199 }; 200 201 public: 202 bool isToken() const override { 203 if (Kind == Token) 204 return true; 205 206 // When parsing operands, we can't always tell if something was meant to be 207 // a token, like 'gds', or an expression that references a global variable. 208 // In this case, we assume the string is an expression, and if we need to 209 // interpret is a token, then we treat the symbol name as the token. 210 return isSymbolRefExpr(); 211 } 212 213 bool isSymbolRefExpr() const { 214 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 215 } 216 217 bool isImm() const override { 218 return Kind == Immediate; 219 } 220 221 void setImmKindNone() const { 222 assert(isImm()); 223 Imm.Kind = ImmKindTyNone; 224 } 225 226 void setImmKindLiteral() const { 227 assert(isImm()); 228 Imm.Kind = ImmKindTyLiteral; 229 } 230 231 void setImmKindConst() const { 232 assert(isImm()); 233 Imm.Kind = ImmKindTyConst; 234 } 235 236 bool IsImmKindLiteral() const { 237 return isImm() && Imm.Kind == ImmKindTyLiteral; 238 } 239 240 bool isImmKindConst() const { 241 return isImm() && Imm.Kind == ImmKindTyConst; 242 } 243 244 bool isInlinableImm(MVT type) const; 245 bool isLiteralImm(MVT type) const; 246 247 bool isRegKind() const { 248 return Kind == Register; 249 } 250 251 bool isReg() const override { 252 return isRegKind() && !hasModifiers(); 253 } 254 255 bool isRegOrInline(unsigned RCID, MVT type) const { 256 return isRegClass(RCID) || isInlinableImm(type); 257 } 258 259 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 260 return isRegOrInline(RCID, type) || isLiteralImm(type); 261 } 262 263 bool isRegOrImmWithInt16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 265 } 266 267 bool isRegOrImmWithInt32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 269 } 270 271 bool isRegOrInlineImmWithInt16InputMods() const { 272 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); 273 } 274 275 bool isRegOrInlineImmWithInt32InputMods() const { 276 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); 277 } 278 279 bool isRegOrImmWithInt64InputMods() const { 280 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 281 } 282 283 bool isRegOrImmWithFP16InputMods() const { 284 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 285 } 286 287 bool isRegOrImmWithFP32InputMods() const { 288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 289 } 290 291 bool isRegOrImmWithFP64InputMods() const { 292 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 293 } 294 295 bool isRegOrInlineImmWithFP16InputMods() const { 296 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16); 297 } 298 299 bool isRegOrInlineImmWithFP32InputMods() const { 300 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); 301 } 302 303 304 bool isVReg() const { 305 return isRegClass(AMDGPU::VGPR_32RegClassID) || 306 isRegClass(AMDGPU::VReg_64RegClassID) || 307 isRegClass(AMDGPU::VReg_96RegClassID) || 308 isRegClass(AMDGPU::VReg_128RegClassID) || 309 isRegClass(AMDGPU::VReg_160RegClassID) || 310 isRegClass(AMDGPU::VReg_192RegClassID) || 311 isRegClass(AMDGPU::VReg_256RegClassID) || 312 isRegClass(AMDGPU::VReg_512RegClassID) || 313 isRegClass(AMDGPU::VReg_1024RegClassID); 314 } 315 316 bool isVReg32() const { 317 return isRegClass(AMDGPU::VGPR_32RegClassID); 318 } 319 320 bool isVReg32OrOff() const { 321 return isOff() || isVReg32(); 322 } 323 324 bool isNull() const { 325 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 326 } 327 328 bool isVRegWithInputMods() const; 329 330 bool isSDWAOperand(MVT type) const; 331 bool isSDWAFP16Operand() const; 332 bool isSDWAFP32Operand() const; 333 bool isSDWAInt16Operand() const; 334 bool isSDWAInt32Operand() const; 335 336 bool isImmTy(ImmTy ImmT) const { 337 return isImm() && Imm.Type == ImmT; 338 } 339 340 bool isImmModifier() const { 341 return isImm() && Imm.Type != ImmTyNone; 342 } 343 344 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 345 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 346 bool isDMask() const { return isImmTy(ImmTyDMask); } 347 bool isDim() const { return isImmTy(ImmTyDim); } 348 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 349 bool isDA() const { return isImmTy(ImmTyDA); } 350 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 351 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 352 bool isLWE() const { return isImmTy(ImmTyLWE); } 353 bool isOff() const { return isImmTy(ImmTyOff); } 354 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 355 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 356 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 357 bool isOffen() const { return isImmTy(ImmTyOffen); } 358 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 359 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 360 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 361 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 362 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 363 364 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 365 bool isGDS() const { return isImmTy(ImmTyGDS); } 366 bool isLDS() const { return isImmTy(ImmTyLDS); } 367 bool isCPol() const { return isImmTy(ImmTyCPol); } 368 bool isSWZ() const { return isImmTy(ImmTySWZ); } 369 bool isTFE() const { return isImmTy(ImmTyTFE); } 370 bool isD16() const { return isImmTy(ImmTyD16); } 371 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 372 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 373 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 374 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 375 bool isFI() const { return isImmTy(ImmTyDppFi); } 376 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 377 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 378 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 379 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 380 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 381 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 382 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 383 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 384 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 385 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 386 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 387 bool isHigh() const { return isImmTy(ImmTyHigh); } 388 389 bool isMod() const { 390 return isClampSI() || isOModSI(); 391 } 392 393 bool isRegOrImm() const { 394 return isReg() || isImm(); 395 } 396 397 bool isRegClass(unsigned RCID) const; 398 399 bool isInlineValue() const; 400 401 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 402 return isRegOrInline(RCID, type) && !hasModifiers(); 403 } 404 405 bool isSCSrcB16() const { 406 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 407 } 408 409 bool isSCSrcV2B16() const { 410 return isSCSrcB16(); 411 } 412 413 bool isSCSrcB32() const { 414 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 415 } 416 417 bool isSCSrcB64() const { 418 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 419 } 420 421 bool isBoolReg() const; 422 423 bool isSCSrcF16() const { 424 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 425 } 426 427 bool isSCSrcV2F16() const { 428 return isSCSrcF16(); 429 } 430 431 bool isSCSrcF32() const { 432 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 433 } 434 435 bool isSCSrcF64() const { 436 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 437 } 438 439 bool isSSrcB32() const { 440 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 441 } 442 443 bool isSSrcB16() const { 444 return isSCSrcB16() || isLiteralImm(MVT::i16); 445 } 446 447 bool isSSrcV2B16() const { 448 llvm_unreachable("cannot happen"); 449 return isSSrcB16(); 450 } 451 452 bool isSSrcB64() const { 453 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 454 // See isVSrc64(). 455 return isSCSrcB64() || isLiteralImm(MVT::i64); 456 } 457 458 bool isSSrcF32() const { 459 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 460 } 461 462 bool isSSrcF64() const { 463 return isSCSrcB64() || isLiteralImm(MVT::f64); 464 } 465 466 bool isSSrcF16() const { 467 return isSCSrcB16() || isLiteralImm(MVT::f16); 468 } 469 470 bool isSSrcV2F16() const { 471 llvm_unreachable("cannot happen"); 472 return isSSrcF16(); 473 } 474 475 bool isSSrcV2FP32() const { 476 llvm_unreachable("cannot happen"); 477 return isSSrcF32(); 478 } 479 480 bool isSCSrcV2FP32() const { 481 llvm_unreachable("cannot happen"); 482 return isSCSrcF32(); 483 } 484 485 bool isSSrcV2INT32() const { 486 llvm_unreachable("cannot happen"); 487 return isSSrcB32(); 488 } 489 490 bool isSCSrcV2INT32() const { 491 llvm_unreachable("cannot happen"); 492 return isSCSrcB32(); 493 } 494 495 bool isSSrcOrLdsB32() const { 496 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 497 isLiteralImm(MVT::i32) || isExpr(); 498 } 499 500 bool isVCSrcB32() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 502 } 503 504 bool isVCSrcB64() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 506 } 507 508 bool isVCSrcB16() const { 509 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 510 } 511 512 bool isVCSrcV2B16() const { 513 return isVCSrcB16(); 514 } 515 516 bool isVCSrcF32() const { 517 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 518 } 519 520 bool isVCSrcF64() const { 521 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 522 } 523 524 bool isVCSrcF16() const { 525 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 526 } 527 528 bool isVCSrcV2F16() const { 529 return isVCSrcF16(); 530 } 531 532 bool isVSrcB32() const { 533 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 534 } 535 536 bool isVSrcB64() const { 537 return isVCSrcF64() || isLiteralImm(MVT::i64); 538 } 539 540 bool isVSrcB16() const { 541 return isVCSrcB16() || isLiteralImm(MVT::i16); 542 } 543 544 bool isVSrcV2B16() const { 545 return isVSrcB16() || isLiteralImm(MVT::v2i16); 546 } 547 548 bool isVCSrcV2FP32() const { 549 return isVCSrcF64(); 550 } 551 552 bool isVSrcV2FP32() const { 553 return isVSrcF64() || isLiteralImm(MVT::v2f32); 554 } 555 556 bool isVCSrcV2INT32() const { 557 return isVCSrcB64(); 558 } 559 560 bool isVSrcV2INT32() const { 561 return isVSrcB64() || isLiteralImm(MVT::v2i32); 562 } 563 564 bool isVSrcF32() const { 565 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 566 } 567 568 bool isVSrcF64() const { 569 return isVCSrcF64() || isLiteralImm(MVT::f64); 570 } 571 572 bool isVSrcF16() const { 573 return isVCSrcF16() || isLiteralImm(MVT::f16); 574 } 575 576 bool isVSrcV2F16() const { 577 return isVSrcF16() || isLiteralImm(MVT::v2f16); 578 } 579 580 bool isVISrcB32() const { 581 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 582 } 583 584 bool isVISrcB16() const { 585 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 586 } 587 588 bool isVISrcV2B16() const { 589 return isVISrcB16(); 590 } 591 592 bool isVISrcF32() const { 593 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 594 } 595 596 bool isVISrcF16() const { 597 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 598 } 599 600 bool isVISrcV2F16() const { 601 return isVISrcF16() || isVISrcB32(); 602 } 603 604 bool isVISrc_64B64() const { 605 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 606 } 607 608 bool isVISrc_64F64() const { 609 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 610 } 611 612 bool isVISrc_64V2FP32() const { 613 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 614 } 615 616 bool isVISrc_64V2INT32() const { 617 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 618 } 619 620 bool isVISrc_256B64() const { 621 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 622 } 623 624 bool isVISrc_256F64() const { 625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 626 } 627 628 bool isVISrc_128B16() const { 629 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 630 } 631 632 bool isVISrc_128V2B16() const { 633 return isVISrc_128B16(); 634 } 635 636 bool isVISrc_128B32() const { 637 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 638 } 639 640 bool isVISrc_128F32() const { 641 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 642 } 643 644 bool isVISrc_256V2FP32() const { 645 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 646 } 647 648 bool isVISrc_256V2INT32() const { 649 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 650 } 651 652 bool isVISrc_512B32() const { 653 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 654 } 655 656 bool isVISrc_512B16() const { 657 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 658 } 659 660 bool isVISrc_512V2B16() const { 661 return isVISrc_512B16(); 662 } 663 664 bool isVISrc_512F32() const { 665 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 666 } 667 668 bool isVISrc_512F16() const { 669 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 670 } 671 672 bool isVISrc_512V2F16() const { 673 return isVISrc_512F16() || isVISrc_512B32(); 674 } 675 676 bool isVISrc_1024B32() const { 677 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 678 } 679 680 bool isVISrc_1024B16() const { 681 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 682 } 683 684 bool isVISrc_1024V2B16() const { 685 return isVISrc_1024B16(); 686 } 687 688 bool isVISrc_1024F32() const { 689 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 690 } 691 692 bool isVISrc_1024F16() const { 693 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 694 } 695 696 bool isVISrc_1024V2F16() const { 697 return isVISrc_1024F16() || isVISrc_1024B32(); 698 } 699 700 bool isAISrcB32() const { 701 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 702 } 703 704 bool isAISrcB16() const { 705 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 706 } 707 708 bool isAISrcV2B16() const { 709 return isAISrcB16(); 710 } 711 712 bool isAISrcF32() const { 713 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 714 } 715 716 bool isAISrcF16() const { 717 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 718 } 719 720 bool isAISrcV2F16() const { 721 return isAISrcF16() || isAISrcB32(); 722 } 723 724 bool isAISrc_64B64() const { 725 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 726 } 727 728 bool isAISrc_64F64() const { 729 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 730 } 731 732 bool isAISrc_128B32() const { 733 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 734 } 735 736 bool isAISrc_128B16() const { 737 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 738 } 739 740 bool isAISrc_128V2B16() const { 741 return isAISrc_128B16(); 742 } 743 744 bool isAISrc_128F32() const { 745 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 746 } 747 748 bool isAISrc_128F16() const { 749 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 750 } 751 752 bool isAISrc_128V2F16() const { 753 return isAISrc_128F16() || isAISrc_128B32(); 754 } 755 756 bool isVISrc_128F16() const { 757 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 758 } 759 760 bool isVISrc_128V2F16() const { 761 return isVISrc_128F16() || isVISrc_128B32(); 762 } 763 764 bool isAISrc_256B64() const { 765 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 766 } 767 768 bool isAISrc_256F64() const { 769 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 770 } 771 772 bool isAISrc_512B32() const { 773 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 774 } 775 776 bool isAISrc_512B16() const { 777 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 778 } 779 780 bool isAISrc_512V2B16() const { 781 return isAISrc_512B16(); 782 } 783 784 bool isAISrc_512F32() const { 785 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 786 } 787 788 bool isAISrc_512F16() const { 789 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 790 } 791 792 bool isAISrc_512V2F16() const { 793 return isAISrc_512F16() || isAISrc_512B32(); 794 } 795 796 bool isAISrc_1024B32() const { 797 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 798 } 799 800 bool isAISrc_1024B16() const { 801 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 802 } 803 804 bool isAISrc_1024V2B16() const { 805 return isAISrc_1024B16(); 806 } 807 808 bool isAISrc_1024F32() const { 809 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 810 } 811 812 bool isAISrc_1024F16() const { 813 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 814 } 815 816 bool isAISrc_1024V2F16() const { 817 return isAISrc_1024F16() || isAISrc_1024B32(); 818 } 819 820 bool isKImmFP32() const { 821 return isLiteralImm(MVT::f32); 822 } 823 824 bool isKImmFP16() const { 825 return isLiteralImm(MVT::f16); 826 } 827 828 bool isMem() const override { 829 return false; 830 } 831 832 bool isExpr() const { 833 return Kind == Expression; 834 } 835 836 bool isSoppBrTarget() const { 837 return isExpr() || isImm(); 838 } 839 840 bool isSWaitCnt() const; 841 bool isDepCtr() const; 842 bool isSDelayAlu() const; 843 bool isHwreg() const; 844 bool isSendMsg() const; 845 bool isSwizzle() const; 846 bool isSMRDOffset8() const; 847 bool isSMEMOffset() const; 848 bool isSMRDLiteralOffset() const; 849 bool isDPP8() const; 850 bool isDPPCtrl() const; 851 bool isBLGP() const; 852 bool isCBSZ() const; 853 bool isABID() const; 854 bool isGPRIdxMode() const; 855 bool isS16Imm() const; 856 bool isU16Imm() const; 857 bool isEndpgm() const; 858 bool isWaitVDST() const; 859 bool isWaitEXP() const; 860 861 StringRef getExpressionAsToken() const { 862 assert(isExpr()); 863 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 864 return S->getSymbol().getName(); 865 } 866 867 StringRef getToken() const { 868 assert(isToken()); 869 870 if (Kind == Expression) 871 return getExpressionAsToken(); 872 873 return StringRef(Tok.Data, Tok.Length); 874 } 875 876 int64_t getImm() const { 877 assert(isImm()); 878 return Imm.Val; 879 } 880 881 void setImm(int64_t Val) { 882 assert(isImm()); 883 Imm.Val = Val; 884 } 885 886 ImmTy getImmTy() const { 887 assert(isImm()); 888 return Imm.Type; 889 } 890 891 unsigned getReg() const override { 892 assert(isRegKind()); 893 return Reg.RegNo; 894 } 895 896 SMLoc getStartLoc() const override { 897 return StartLoc; 898 } 899 900 SMLoc getEndLoc() const override { 901 return EndLoc; 902 } 903 904 SMRange getLocRange() const { 905 return SMRange(StartLoc, EndLoc); 906 } 907 908 Modifiers getModifiers() const { 909 assert(isRegKind() || isImmTy(ImmTyNone)); 910 return isRegKind() ? Reg.Mods : Imm.Mods; 911 } 912 913 void setModifiers(Modifiers Mods) { 914 assert(isRegKind() || isImmTy(ImmTyNone)); 915 if (isRegKind()) 916 Reg.Mods = Mods; 917 else 918 Imm.Mods = Mods; 919 } 920 921 bool hasModifiers() const { 922 return getModifiers().hasModifiers(); 923 } 924 925 bool hasFPModifiers() const { 926 return getModifiers().hasFPModifiers(); 927 } 928 929 bool hasIntModifiers() const { 930 return getModifiers().hasIntModifiers(); 931 } 932 933 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 934 935 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 936 937 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 938 939 template <unsigned Bitwidth> 940 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 941 942 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 943 addKImmFPOperands<16>(Inst, N); 944 } 945 946 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 947 addKImmFPOperands<32>(Inst, N); 948 } 949 950 void addRegOperands(MCInst &Inst, unsigned N) const; 951 952 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 953 addRegOperands(Inst, N); 954 } 955 956 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 957 if (isRegKind()) 958 addRegOperands(Inst, N); 959 else if (isExpr()) 960 Inst.addOperand(MCOperand::createExpr(Expr)); 961 else 962 addImmOperands(Inst, N); 963 } 964 965 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 966 Modifiers Mods = getModifiers(); 967 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 968 if (isRegKind()) { 969 addRegOperands(Inst, N); 970 } else { 971 addImmOperands(Inst, N, false); 972 } 973 } 974 975 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 976 assert(!hasIntModifiers()); 977 addRegOrImmWithInputModsOperands(Inst, N); 978 } 979 980 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 981 assert(!hasFPModifiers()); 982 addRegOrImmWithInputModsOperands(Inst, N); 983 } 984 985 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 986 Modifiers Mods = getModifiers(); 987 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 988 assert(isRegKind()); 989 addRegOperands(Inst, N); 990 } 991 992 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 993 assert(!hasIntModifiers()); 994 addRegWithInputModsOperands(Inst, N); 995 } 996 997 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 998 assert(!hasFPModifiers()); 999 addRegWithInputModsOperands(Inst, N); 1000 } 1001 1002 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 1003 if (isImm()) 1004 addImmOperands(Inst, N); 1005 else { 1006 assert(isExpr()); 1007 Inst.addOperand(MCOperand::createExpr(Expr)); 1008 } 1009 } 1010 1011 static void printImmTy(raw_ostream& OS, ImmTy Type) { 1012 switch (Type) { 1013 case ImmTyNone: OS << "None"; break; 1014 case ImmTyGDS: OS << "GDS"; break; 1015 case ImmTyLDS: OS << "LDS"; break; 1016 case ImmTyOffen: OS << "Offen"; break; 1017 case ImmTyIdxen: OS << "Idxen"; break; 1018 case ImmTyAddr64: OS << "Addr64"; break; 1019 case ImmTyOffset: OS << "Offset"; break; 1020 case ImmTyInstOffset: OS << "InstOffset"; break; 1021 case ImmTyOffset0: OS << "Offset0"; break; 1022 case ImmTyOffset1: OS << "Offset1"; break; 1023 case ImmTyCPol: OS << "CPol"; break; 1024 case ImmTySWZ: OS << "SWZ"; break; 1025 case ImmTyTFE: OS << "TFE"; break; 1026 case ImmTyD16: OS << "D16"; break; 1027 case ImmTyFORMAT: OS << "FORMAT"; break; 1028 case ImmTyClampSI: OS << "ClampSI"; break; 1029 case ImmTyOModSI: OS << "OModSI"; break; 1030 case ImmTyDPP8: OS << "DPP8"; break; 1031 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1032 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1033 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1034 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1035 case ImmTyDppFi: OS << "FI"; break; 1036 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1037 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1038 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1039 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1040 case ImmTyDMask: OS << "DMask"; break; 1041 case ImmTyDim: OS << "Dim"; break; 1042 case ImmTyUNorm: OS << "UNorm"; break; 1043 case ImmTyDA: OS << "DA"; break; 1044 case ImmTyR128A16: OS << "R128A16"; break; 1045 case ImmTyA16: OS << "A16"; break; 1046 case ImmTyLWE: OS << "LWE"; break; 1047 case ImmTyOff: OS << "Off"; break; 1048 case ImmTyExpTgt: OS << "ExpTgt"; break; 1049 case ImmTyExpCompr: OS << "ExpCompr"; break; 1050 case ImmTyExpVM: OS << "ExpVM"; break; 1051 case ImmTyHwreg: OS << "Hwreg"; break; 1052 case ImmTySendMsg: OS << "SendMsg"; break; 1053 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1054 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1055 case ImmTyAttrChan: OS << "AttrChan"; break; 1056 case ImmTyOpSel: OS << "OpSel"; break; 1057 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1058 case ImmTyNegLo: OS << "NegLo"; break; 1059 case ImmTyNegHi: OS << "NegHi"; break; 1060 case ImmTySwizzle: OS << "Swizzle"; break; 1061 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1062 case ImmTyHigh: OS << "High"; break; 1063 case ImmTyBLGP: OS << "BLGP"; break; 1064 case ImmTyCBSZ: OS << "CBSZ"; break; 1065 case ImmTyABID: OS << "ABID"; break; 1066 case ImmTyEndpgm: OS << "Endpgm"; break; 1067 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1068 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1069 } 1070 } 1071 1072 void print(raw_ostream &OS) const override { 1073 switch (Kind) { 1074 case Register: 1075 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1076 break; 1077 case Immediate: 1078 OS << '<' << getImm(); 1079 if (getImmTy() != ImmTyNone) { 1080 OS << " type: "; printImmTy(OS, getImmTy()); 1081 } 1082 OS << " mods: " << Imm.Mods << '>'; 1083 break; 1084 case Token: 1085 OS << '\'' << getToken() << '\''; 1086 break; 1087 case Expression: 1088 OS << "<expr " << *Expr << '>'; 1089 break; 1090 } 1091 } 1092 1093 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1094 int64_t Val, SMLoc Loc, 1095 ImmTy Type = ImmTyNone, 1096 bool IsFPImm = false) { 1097 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1098 Op->Imm.Val = Val; 1099 Op->Imm.IsFPImm = IsFPImm; 1100 Op->Imm.Kind = ImmKindTyNone; 1101 Op->Imm.Type = Type; 1102 Op->Imm.Mods = Modifiers(); 1103 Op->StartLoc = Loc; 1104 Op->EndLoc = Loc; 1105 return Op; 1106 } 1107 1108 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1109 StringRef Str, SMLoc Loc, 1110 bool HasExplicitEncodingSize = true) { 1111 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1112 Res->Tok.Data = Str.data(); 1113 Res->Tok.Length = Str.size(); 1114 Res->StartLoc = Loc; 1115 Res->EndLoc = Loc; 1116 return Res; 1117 } 1118 1119 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1120 unsigned RegNo, SMLoc S, 1121 SMLoc E) { 1122 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1123 Op->Reg.RegNo = RegNo; 1124 Op->Reg.Mods = Modifiers(); 1125 Op->StartLoc = S; 1126 Op->EndLoc = E; 1127 return Op; 1128 } 1129 1130 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1131 const class MCExpr *Expr, SMLoc S) { 1132 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1133 Op->Expr = Expr; 1134 Op->StartLoc = S; 1135 Op->EndLoc = S; 1136 return Op; 1137 } 1138 }; 1139 1140 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1141 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1142 return OS; 1143 } 1144 1145 //===----------------------------------------------------------------------===// 1146 // AsmParser 1147 //===----------------------------------------------------------------------===// 1148 1149 // Holds info related to the current kernel, e.g. count of SGPRs used. 1150 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1151 // .amdgpu_hsa_kernel or at EOF. 1152 class KernelScopeInfo { 1153 int SgprIndexUnusedMin = -1; 1154 int VgprIndexUnusedMin = -1; 1155 int AgprIndexUnusedMin = -1; 1156 MCContext *Ctx = nullptr; 1157 MCSubtargetInfo const *MSTI = nullptr; 1158 1159 void usesSgprAt(int i) { 1160 if (i >= SgprIndexUnusedMin) { 1161 SgprIndexUnusedMin = ++i; 1162 if (Ctx) { 1163 MCSymbol* const Sym = 1164 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1165 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1166 } 1167 } 1168 } 1169 1170 void usesVgprAt(int i) { 1171 if (i >= VgprIndexUnusedMin) { 1172 VgprIndexUnusedMin = ++i; 1173 if (Ctx) { 1174 MCSymbol* const Sym = 1175 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1176 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1177 VgprIndexUnusedMin); 1178 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1179 } 1180 } 1181 } 1182 1183 void usesAgprAt(int i) { 1184 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1185 if (!hasMAIInsts(*MSTI)) 1186 return; 1187 1188 if (i >= AgprIndexUnusedMin) { 1189 AgprIndexUnusedMin = ++i; 1190 if (Ctx) { 1191 MCSymbol* const Sym = 1192 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1193 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1194 1195 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1196 MCSymbol* const vSym = 1197 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1198 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1199 VgprIndexUnusedMin); 1200 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1201 } 1202 } 1203 } 1204 1205 public: 1206 KernelScopeInfo() = default; 1207 1208 void initialize(MCContext &Context) { 1209 Ctx = &Context; 1210 MSTI = Ctx->getSubtargetInfo(); 1211 1212 usesSgprAt(SgprIndexUnusedMin = -1); 1213 usesVgprAt(VgprIndexUnusedMin = -1); 1214 if (hasMAIInsts(*MSTI)) { 1215 usesAgprAt(AgprIndexUnusedMin = -1); 1216 } 1217 } 1218 1219 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1220 unsigned RegWidth) { 1221 switch (RegKind) { 1222 case IS_SGPR: 1223 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1224 break; 1225 case IS_AGPR: 1226 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1227 break; 1228 case IS_VGPR: 1229 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1230 break; 1231 default: 1232 break; 1233 } 1234 } 1235 }; 1236 1237 class AMDGPUAsmParser : public MCTargetAsmParser { 1238 MCAsmParser &Parser; 1239 1240 // Number of extra operands parsed after the first optional operand. 1241 // This may be necessary to skip hardcoded mandatory operands. 1242 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1243 1244 unsigned ForcedEncodingSize = 0; 1245 bool ForcedDPP = false; 1246 bool ForcedSDWA = false; 1247 KernelScopeInfo KernelScope; 1248 unsigned CPolSeen; 1249 1250 /// @name Auto-generated Match Functions 1251 /// { 1252 1253 #define GET_ASSEMBLER_HEADER 1254 #include "AMDGPUGenAsmMatcher.inc" 1255 1256 /// } 1257 1258 private: 1259 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1260 bool OutOfRangeError(SMRange Range); 1261 /// Calculate VGPR/SGPR blocks required for given target, reserved 1262 /// registers, and user-specified NextFreeXGPR values. 1263 /// 1264 /// \param Features [in] Target features, used for bug corrections. 1265 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1266 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1267 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1268 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1269 /// descriptor field, if valid. 1270 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1271 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1272 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1273 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1274 /// \param VGPRBlocks [out] Result VGPR block count. 1275 /// \param SGPRBlocks [out] Result SGPR block count. 1276 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1277 bool FlatScrUsed, bool XNACKUsed, 1278 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1279 SMRange VGPRRange, unsigned NextFreeSGPR, 1280 SMRange SGPRRange, unsigned &VGPRBlocks, 1281 unsigned &SGPRBlocks); 1282 bool ParseDirectiveAMDGCNTarget(); 1283 bool ParseDirectiveAMDHSAKernel(); 1284 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1285 bool ParseDirectiveHSACodeObjectVersion(); 1286 bool ParseDirectiveHSACodeObjectISA(); 1287 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1288 bool ParseDirectiveAMDKernelCodeT(); 1289 // TODO: Possibly make subtargetHasRegister const. 1290 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1291 bool ParseDirectiveAMDGPUHsaKernel(); 1292 1293 bool ParseDirectiveISAVersion(); 1294 bool ParseDirectiveHSAMetadata(); 1295 bool ParseDirectivePALMetadataBegin(); 1296 bool ParseDirectivePALMetadata(); 1297 bool ParseDirectiveAMDGPULDS(); 1298 1299 /// Common code to parse out a block of text (typically YAML) between start and 1300 /// end directives. 1301 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1302 const char *AssemblerDirectiveEnd, 1303 std::string &CollectString); 1304 1305 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1306 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1307 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1308 unsigned &RegNum, unsigned &RegWidth, 1309 bool RestoreOnFailure = false); 1310 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1311 unsigned &RegNum, unsigned &RegWidth, 1312 SmallVectorImpl<AsmToken> &Tokens); 1313 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1314 unsigned &RegWidth, 1315 SmallVectorImpl<AsmToken> &Tokens); 1316 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1317 unsigned &RegWidth, 1318 SmallVectorImpl<AsmToken> &Tokens); 1319 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1320 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1321 bool ParseRegRange(unsigned& Num, unsigned& Width); 1322 unsigned getRegularReg(RegisterKind RegKind, 1323 unsigned RegNum, 1324 unsigned RegWidth, 1325 SMLoc Loc); 1326 1327 bool isRegister(); 1328 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1329 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1330 void initializeGprCountSymbol(RegisterKind RegKind); 1331 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1332 unsigned RegWidth); 1333 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1334 bool IsAtomic, bool IsLds = false); 1335 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1336 bool IsGdsHardcoded); 1337 1338 public: 1339 enum AMDGPUMatchResultTy { 1340 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1341 }; 1342 enum OperandMode { 1343 OperandMode_Default, 1344 OperandMode_NSA, 1345 }; 1346 1347 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1348 1349 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1350 const MCInstrInfo &MII, 1351 const MCTargetOptions &Options) 1352 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1353 MCAsmParserExtension::Initialize(Parser); 1354 1355 if (getFeatureBits().none()) { 1356 // Set default features. 1357 copySTI().ToggleFeature("southern-islands"); 1358 } 1359 1360 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1361 1362 { 1363 // TODO: make those pre-defined variables read-only. 1364 // Currently there is none suitable machinery in the core llvm-mc for this. 1365 // MCSymbol::isRedefinable is intended for another purpose, and 1366 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1367 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1368 MCContext &Ctx = getContext(); 1369 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1370 MCSymbol *Sym = 1371 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1372 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1373 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1374 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1375 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1376 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1377 } else { 1378 MCSymbol *Sym = 1379 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1380 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1381 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1382 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1383 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1384 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1385 } 1386 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1387 initializeGprCountSymbol(IS_VGPR); 1388 initializeGprCountSymbol(IS_SGPR); 1389 } else 1390 KernelScope.initialize(getContext()); 1391 } 1392 } 1393 1394 bool hasMIMG_R128() const { 1395 return AMDGPU::hasMIMG_R128(getSTI()); 1396 } 1397 1398 bool hasPackedD16() const { 1399 return AMDGPU::hasPackedD16(getSTI()); 1400 } 1401 1402 bool hasGFX10A16() const { 1403 return AMDGPU::hasGFX10A16(getSTI()); 1404 } 1405 1406 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1407 1408 bool isSI() const { 1409 return AMDGPU::isSI(getSTI()); 1410 } 1411 1412 bool isCI() const { 1413 return AMDGPU::isCI(getSTI()); 1414 } 1415 1416 bool isVI() const { 1417 return AMDGPU::isVI(getSTI()); 1418 } 1419 1420 bool isGFX9() const { 1421 return AMDGPU::isGFX9(getSTI()); 1422 } 1423 1424 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1425 bool isGFX90A() const { 1426 return AMDGPU::isGFX90A(getSTI()); 1427 } 1428 1429 bool isGFX940() const { 1430 return AMDGPU::isGFX940(getSTI()); 1431 } 1432 1433 bool isGFX9Plus() const { 1434 return AMDGPU::isGFX9Plus(getSTI()); 1435 } 1436 1437 bool isGFX10() const { 1438 return AMDGPU::isGFX10(getSTI()); 1439 } 1440 1441 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1442 1443 bool isGFX11() const { 1444 return AMDGPU::isGFX11(getSTI()); 1445 } 1446 1447 bool isGFX11Plus() const { 1448 return AMDGPU::isGFX11Plus(getSTI()); 1449 } 1450 1451 bool isGFX10_BEncoding() const { 1452 return AMDGPU::isGFX10_BEncoding(getSTI()); 1453 } 1454 1455 bool hasInv2PiInlineImm() const { 1456 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1457 } 1458 1459 bool hasFlatOffsets() const { 1460 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1461 } 1462 1463 bool hasArchitectedFlatScratch() const { 1464 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1465 } 1466 1467 bool hasSGPR102_SGPR103() const { 1468 return !isVI() && !isGFX9(); 1469 } 1470 1471 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1472 1473 bool hasIntClamp() const { 1474 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1475 } 1476 1477 AMDGPUTargetStreamer &getTargetStreamer() { 1478 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1479 return static_cast<AMDGPUTargetStreamer &>(TS); 1480 } 1481 1482 const MCRegisterInfo *getMRI() const { 1483 // We need this const_cast because for some reason getContext() is not const 1484 // in MCAsmParser. 1485 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1486 } 1487 1488 const MCInstrInfo *getMII() const { 1489 return &MII; 1490 } 1491 1492 const FeatureBitset &getFeatureBits() const { 1493 return getSTI().getFeatureBits(); 1494 } 1495 1496 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1497 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1498 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1499 1500 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1501 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1502 bool isForcedDPP() const { return ForcedDPP; } 1503 bool isForcedSDWA() const { return ForcedSDWA; } 1504 ArrayRef<unsigned> getMatchedVariants() const; 1505 StringRef getMatchedVariantName() const; 1506 1507 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1508 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1509 bool RestoreOnFailure); 1510 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1511 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1512 SMLoc &EndLoc) override; 1513 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1514 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1515 unsigned Kind) override; 1516 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1517 OperandVector &Operands, MCStreamer &Out, 1518 uint64_t &ErrorInfo, 1519 bool MatchingInlineAsm) override; 1520 bool ParseDirective(AsmToken DirectiveID) override; 1521 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1522 OperandMode Mode = OperandMode_Default); 1523 StringRef parseMnemonicSuffix(StringRef Name); 1524 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1525 SMLoc NameLoc, OperandVector &Operands) override; 1526 //bool ProcessInstruction(MCInst &Inst); 1527 1528 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1529 1530 OperandMatchResultTy 1531 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1532 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1533 bool (*ConvertResult)(int64_t &) = nullptr); 1534 1535 OperandMatchResultTy 1536 parseOperandArrayWithPrefix(const char *Prefix, 1537 OperandVector &Operands, 1538 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1539 bool (*ConvertResult)(int64_t&) = nullptr); 1540 1541 OperandMatchResultTy 1542 parseNamedBit(StringRef Name, OperandVector &Operands, 1543 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1544 OperandMatchResultTy parseCPol(OperandVector &Operands); 1545 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1546 StringRef &Value, 1547 SMLoc &StringLoc); 1548 1549 bool isModifier(); 1550 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1551 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1552 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1553 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1554 bool parseSP3NegModifier(); 1555 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1556 OperandMatchResultTy parseReg(OperandVector &Operands); 1557 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1558 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1559 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1560 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1561 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1562 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1563 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1564 OperandMatchResultTy parseUfmt(int64_t &Format); 1565 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1566 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1567 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1568 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1569 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1570 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1571 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1572 1573 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1574 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1575 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1576 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1577 1578 bool parseCnt(int64_t &IntVal); 1579 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1580 1581 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1582 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1583 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1584 1585 bool parseDelay(int64_t &Delay); 1586 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands); 1587 1588 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1589 1590 private: 1591 struct OperandInfoTy { 1592 SMLoc Loc; 1593 int64_t Id; 1594 bool IsSymbolic = false; 1595 bool IsDefined = false; 1596 1597 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1598 }; 1599 1600 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1601 bool validateSendMsg(const OperandInfoTy &Msg, 1602 const OperandInfoTy &Op, 1603 const OperandInfoTy &Stream); 1604 1605 bool parseHwregBody(OperandInfoTy &HwReg, 1606 OperandInfoTy &Offset, 1607 OperandInfoTy &Width); 1608 bool validateHwreg(const OperandInfoTy &HwReg, 1609 const OperandInfoTy &Offset, 1610 const OperandInfoTy &Width); 1611 1612 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1613 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1614 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1615 1616 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1617 const OperandVector &Operands) const; 1618 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1619 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1620 SMLoc getLitLoc(const OperandVector &Operands) const; 1621 SMLoc getConstLoc(const OperandVector &Operands) const; 1622 1623 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1624 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1625 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1626 bool validateSOPLiteral(const MCInst &Inst) const; 1627 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1628 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1629 bool validateIntClampSupported(const MCInst &Inst); 1630 bool validateMIMGAtomicDMask(const MCInst &Inst); 1631 bool validateMIMGGatherDMask(const MCInst &Inst); 1632 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1633 Optional<StringRef> validateMIMGDataSize(const MCInst &Inst); 1634 bool validateMIMGAddrSize(const MCInst &Inst); 1635 bool validateMIMGD16(const MCInst &Inst); 1636 bool validateMIMGDim(const MCInst &Inst); 1637 bool validateMIMGMSAA(const MCInst &Inst); 1638 bool validateOpSel(const MCInst &Inst); 1639 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1640 bool validateVccOperand(unsigned Reg) const; 1641 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1642 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1643 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1644 bool validateAGPRLdSt(const MCInst &Inst) const; 1645 bool validateVGPRAlign(const MCInst &Inst) const; 1646 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1647 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1648 bool validateDivScale(const MCInst &Inst); 1649 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1650 const SMLoc &IDLoc); 1651 bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands, 1652 const SMLoc &IDLoc); 1653 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1654 unsigned getConstantBusLimit(unsigned Opcode) const; 1655 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1656 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1657 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1658 1659 bool isSupportedMnemo(StringRef Mnemo, 1660 const FeatureBitset &FBS); 1661 bool isSupportedMnemo(StringRef Mnemo, 1662 const FeatureBitset &FBS, 1663 ArrayRef<unsigned> Variants); 1664 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1665 1666 bool isId(const StringRef Id) const; 1667 bool isId(const AsmToken &Token, const StringRef Id) const; 1668 bool isToken(const AsmToken::TokenKind Kind) const; 1669 bool trySkipId(const StringRef Id); 1670 bool trySkipId(const StringRef Pref, const StringRef Id); 1671 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1672 bool trySkipToken(const AsmToken::TokenKind Kind); 1673 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1674 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1675 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1676 1677 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1678 AsmToken::TokenKind getTokenKind() const; 1679 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1680 bool parseExpr(OperandVector &Operands); 1681 StringRef getTokenStr() const; 1682 AsmToken peekToken(); 1683 AsmToken getToken() const; 1684 SMLoc getLoc() const; 1685 void lex(); 1686 1687 public: 1688 void onBeginOfFile() override; 1689 1690 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1691 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1692 1693 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1694 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1695 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1696 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1697 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1698 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1699 1700 bool parseSwizzleOperand(int64_t &Op, 1701 const unsigned MinVal, 1702 const unsigned MaxVal, 1703 const StringRef ErrMsg, 1704 SMLoc &Loc); 1705 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1706 const unsigned MinVal, 1707 const unsigned MaxVal, 1708 const StringRef ErrMsg); 1709 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1710 bool parseSwizzleOffset(int64_t &Imm); 1711 bool parseSwizzleMacro(int64_t &Imm); 1712 bool parseSwizzleQuadPerm(int64_t &Imm); 1713 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1714 bool parseSwizzleBroadcast(int64_t &Imm); 1715 bool parseSwizzleSwap(int64_t &Imm); 1716 bool parseSwizzleReverse(int64_t &Imm); 1717 1718 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1719 int64_t parseGPRIdxMacro(); 1720 1721 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1722 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1723 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1724 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1725 1726 AMDGPUOperand::Ptr defaultCPol() const; 1727 1728 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1729 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1730 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1731 AMDGPUOperand::Ptr defaultFlatOffset() const; 1732 1733 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1734 1735 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1736 OptionalImmIndexMap &OptionalIdx); 1737 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1738 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1739 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1740 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1741 OptionalImmIndexMap &OptionalIdx); 1742 1743 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1744 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1745 1746 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1747 bool IsAtomic = false); 1748 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1749 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1750 1751 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1752 1753 bool parseDimId(unsigned &Encoding); 1754 OperandMatchResultTy parseDim(OperandVector &Operands); 1755 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1756 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1757 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1758 int64_t parseDPPCtrlSel(StringRef Ctrl); 1759 int64_t parseDPPCtrlPerm(); 1760 AMDGPUOperand::Ptr defaultRowMask() const; 1761 AMDGPUOperand::Ptr defaultBankMask() const; 1762 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1763 AMDGPUOperand::Ptr defaultFI() const; 1764 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1765 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { 1766 cvtDPP(Inst, Operands, true); 1767 } 1768 void cvtVOPCNoDstDPP(MCInst &Inst, const OperandVector &Operands, 1769 bool IsDPP8 = false); 1770 void cvtVOPCNoDstDPP8(MCInst &Inst, const OperandVector &Operands) { 1771 cvtVOPCNoDstDPP(Inst, Operands, true); 1772 } 1773 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 1774 bool IsDPP8 = false); 1775 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { 1776 cvtVOP3DPP(Inst, Operands, true); 1777 } 1778 void cvtVOPC64NoDstDPP(MCInst &Inst, const OperandVector &Operands, 1779 bool IsDPP8 = false); 1780 void cvtVOPC64NoDstDPP8(MCInst &Inst, const OperandVector &Operands) { 1781 cvtVOPC64NoDstDPP(Inst, Operands, true); 1782 } 1783 1784 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1785 AMDGPUOperand::ImmTy Type); 1786 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1787 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1788 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1789 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1790 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1791 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1792 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1793 uint64_t BasicInstType, 1794 bool SkipDstVcc = false, 1795 bool SkipSrcVcc = false); 1796 1797 AMDGPUOperand::Ptr defaultBLGP() const; 1798 AMDGPUOperand::Ptr defaultCBSZ() const; 1799 AMDGPUOperand::Ptr defaultABID() const; 1800 1801 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1802 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1803 1804 AMDGPUOperand::Ptr defaultWaitVDST() const; 1805 AMDGPUOperand::Ptr defaultWaitEXP() const; 1806 }; 1807 1808 struct OptionalOperand { 1809 const char *Name; 1810 AMDGPUOperand::ImmTy Type; 1811 bool IsBit; 1812 bool (*ConvertResult)(int64_t&); 1813 }; 1814 1815 } // end anonymous namespace 1816 1817 // May be called with integer type with equivalent bitwidth. 1818 static const fltSemantics *getFltSemantics(unsigned Size) { 1819 switch (Size) { 1820 case 4: 1821 return &APFloat::IEEEsingle(); 1822 case 8: 1823 return &APFloat::IEEEdouble(); 1824 case 2: 1825 return &APFloat::IEEEhalf(); 1826 default: 1827 llvm_unreachable("unsupported fp type"); 1828 } 1829 } 1830 1831 static const fltSemantics *getFltSemantics(MVT VT) { 1832 return getFltSemantics(VT.getSizeInBits() / 8); 1833 } 1834 1835 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1836 switch (OperandType) { 1837 case AMDGPU::OPERAND_REG_IMM_INT32: 1838 case AMDGPU::OPERAND_REG_IMM_FP32: 1839 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1840 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1841 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1842 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1843 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1844 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1845 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1846 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1847 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1848 case AMDGPU::OPERAND_KIMM32: 1849 return &APFloat::IEEEsingle(); 1850 case AMDGPU::OPERAND_REG_IMM_INT64: 1851 case AMDGPU::OPERAND_REG_IMM_FP64: 1852 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1853 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1854 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1855 return &APFloat::IEEEdouble(); 1856 case AMDGPU::OPERAND_REG_IMM_INT16: 1857 case AMDGPU::OPERAND_REG_IMM_FP16: 1858 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1859 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1860 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1861 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1862 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1863 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1864 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1865 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1866 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1867 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1868 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1869 case AMDGPU::OPERAND_KIMM16: 1870 return &APFloat::IEEEhalf(); 1871 default: 1872 llvm_unreachable("unsupported fp type"); 1873 } 1874 } 1875 1876 //===----------------------------------------------------------------------===// 1877 // Operand 1878 //===----------------------------------------------------------------------===// 1879 1880 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1881 bool Lost; 1882 1883 // Convert literal to single precision 1884 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1885 APFloat::rmNearestTiesToEven, 1886 &Lost); 1887 // We allow precision lost but not overflow or underflow 1888 if (Status != APFloat::opOK && 1889 Lost && 1890 ((Status & APFloat::opOverflow) != 0 || 1891 (Status & APFloat::opUnderflow) != 0)) { 1892 return false; 1893 } 1894 1895 return true; 1896 } 1897 1898 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1899 return isUIntN(Size, Val) || isIntN(Size, Val); 1900 } 1901 1902 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1903 if (VT.getScalarType() == MVT::i16) { 1904 // FP immediate values are broken. 1905 return isInlinableIntLiteral(Val); 1906 } 1907 1908 // f16/v2f16 operands work correctly for all values. 1909 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1910 } 1911 1912 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1913 1914 // This is a hack to enable named inline values like 1915 // shared_base with both 32-bit and 64-bit operands. 1916 // Note that these values are defined as 1917 // 32-bit operands only. 1918 if (isInlineValue()) { 1919 return true; 1920 } 1921 1922 if (!isImmTy(ImmTyNone)) { 1923 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1924 return false; 1925 } 1926 // TODO: We should avoid using host float here. It would be better to 1927 // check the float bit values which is what a few other places do. 1928 // We've had bot failures before due to weird NaN support on mips hosts. 1929 1930 APInt Literal(64, Imm.Val); 1931 1932 if (Imm.IsFPImm) { // We got fp literal token 1933 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1934 return AMDGPU::isInlinableLiteral64(Imm.Val, 1935 AsmParser->hasInv2PiInlineImm()); 1936 } 1937 1938 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1939 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1940 return false; 1941 1942 if (type.getScalarSizeInBits() == 16) { 1943 return isInlineableLiteralOp16( 1944 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1945 type, AsmParser->hasInv2PiInlineImm()); 1946 } 1947 1948 // Check if single precision literal is inlinable 1949 return AMDGPU::isInlinableLiteral32( 1950 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1951 AsmParser->hasInv2PiInlineImm()); 1952 } 1953 1954 // We got int literal token. 1955 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1956 return AMDGPU::isInlinableLiteral64(Imm.Val, 1957 AsmParser->hasInv2PiInlineImm()); 1958 } 1959 1960 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1961 return false; 1962 } 1963 1964 if (type.getScalarSizeInBits() == 16) { 1965 return isInlineableLiteralOp16( 1966 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1967 type, AsmParser->hasInv2PiInlineImm()); 1968 } 1969 1970 return AMDGPU::isInlinableLiteral32( 1971 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1972 AsmParser->hasInv2PiInlineImm()); 1973 } 1974 1975 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1976 // Check that this immediate can be added as literal 1977 if (!isImmTy(ImmTyNone)) { 1978 return false; 1979 } 1980 1981 if (!Imm.IsFPImm) { 1982 // We got int literal token. 1983 1984 if (type == MVT::f64 && hasFPModifiers()) { 1985 // Cannot apply fp modifiers to int literals preserving the same semantics 1986 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1987 // disable these cases. 1988 return false; 1989 } 1990 1991 unsigned Size = type.getSizeInBits(); 1992 if (Size == 64) 1993 Size = 32; 1994 1995 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1996 // types. 1997 return isSafeTruncation(Imm.Val, Size); 1998 } 1999 2000 // We got fp literal token 2001 if (type == MVT::f64) { // Expected 64-bit fp operand 2002 // We would set low 64-bits of literal to zeroes but we accept this literals 2003 return true; 2004 } 2005 2006 if (type == MVT::i64) { // Expected 64-bit int operand 2007 // We don't allow fp literals in 64-bit integer instructions. It is 2008 // unclear how we should encode them. 2009 return false; 2010 } 2011 2012 // We allow fp literals with f16x2 operands assuming that the specified 2013 // literal goes into the lower half and the upper half is zero. We also 2014 // require that the literal may be losslessly converted to f16. 2015 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 2016 (type == MVT::v2i16)? MVT::i16 : 2017 (type == MVT::v2f32)? MVT::f32 : type; 2018 2019 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 2020 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 2021 } 2022 2023 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 2024 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 2025 } 2026 2027 bool AMDGPUOperand::isVRegWithInputMods() const { 2028 return isRegClass(AMDGPU::VGPR_32RegClassID) || 2029 // GFX90A allows DPP on 64-bit operands. 2030 (isRegClass(AMDGPU::VReg_64RegClassID) && 2031 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 2032 } 2033 2034 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 2035 if (AsmParser->isVI()) 2036 return isVReg32(); 2037 else if (AsmParser->isGFX9Plus()) 2038 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2039 else 2040 return false; 2041 } 2042 2043 bool AMDGPUOperand::isSDWAFP16Operand() const { 2044 return isSDWAOperand(MVT::f16); 2045 } 2046 2047 bool AMDGPUOperand::isSDWAFP32Operand() const { 2048 return isSDWAOperand(MVT::f32); 2049 } 2050 2051 bool AMDGPUOperand::isSDWAInt16Operand() const { 2052 return isSDWAOperand(MVT::i16); 2053 } 2054 2055 bool AMDGPUOperand::isSDWAInt32Operand() const { 2056 return isSDWAOperand(MVT::i32); 2057 } 2058 2059 bool AMDGPUOperand::isBoolReg() const { 2060 auto FB = AsmParser->getFeatureBits(); 2061 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2062 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2063 } 2064 2065 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2066 { 2067 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2068 assert(Size == 2 || Size == 4 || Size == 8); 2069 2070 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2071 2072 if (Imm.Mods.Abs) { 2073 Val &= ~FpSignMask; 2074 } 2075 if (Imm.Mods.Neg) { 2076 Val ^= FpSignMask; 2077 } 2078 2079 return Val; 2080 } 2081 2082 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2083 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2084 Inst.getNumOperands())) { 2085 addLiteralImmOperand(Inst, Imm.Val, 2086 ApplyModifiers & 2087 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2088 } else { 2089 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2090 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2091 setImmKindNone(); 2092 } 2093 } 2094 2095 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2096 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2097 auto OpNum = Inst.getNumOperands(); 2098 // Check that this operand accepts literals 2099 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2100 2101 if (ApplyModifiers) { 2102 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2103 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2104 Val = applyInputFPModifiers(Val, Size); 2105 } 2106 2107 APInt Literal(64, Val); 2108 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2109 2110 if (Imm.IsFPImm) { // We got fp literal token 2111 switch (OpTy) { 2112 case AMDGPU::OPERAND_REG_IMM_INT64: 2113 case AMDGPU::OPERAND_REG_IMM_FP64: 2114 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2115 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2116 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2117 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2118 AsmParser->hasInv2PiInlineImm())) { 2119 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2120 setImmKindConst(); 2121 return; 2122 } 2123 2124 // Non-inlineable 2125 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2126 // For fp operands we check if low 32 bits are zeros 2127 if (Literal.getLoBits(32) != 0) { 2128 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2129 "Can't encode literal as exact 64-bit floating-point operand. " 2130 "Low 32-bits will be set to zero"); 2131 } 2132 2133 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2134 setImmKindLiteral(); 2135 return; 2136 } 2137 2138 // We don't allow fp literals in 64-bit integer instructions. It is 2139 // unclear how we should encode them. This case should be checked earlier 2140 // in predicate methods (isLiteralImm()) 2141 llvm_unreachable("fp literal in 64-bit integer instruction."); 2142 2143 case AMDGPU::OPERAND_REG_IMM_INT32: 2144 case AMDGPU::OPERAND_REG_IMM_FP32: 2145 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2146 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2147 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2148 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2149 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2150 case AMDGPU::OPERAND_REG_IMM_INT16: 2151 case AMDGPU::OPERAND_REG_IMM_FP16: 2152 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2153 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2154 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2155 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2156 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2157 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2158 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2159 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2160 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2161 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2162 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2163 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2164 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2165 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2166 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2167 case AMDGPU::OPERAND_KIMM32: 2168 case AMDGPU::OPERAND_KIMM16: { 2169 bool lost; 2170 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2171 // Convert literal to single precision 2172 FPLiteral.convert(*getOpFltSemantics(OpTy), 2173 APFloat::rmNearestTiesToEven, &lost); 2174 // We allow precision lost but not overflow or underflow. This should be 2175 // checked earlier in isLiteralImm() 2176 2177 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2178 Inst.addOperand(MCOperand::createImm(ImmVal)); 2179 setImmKindLiteral(); 2180 return; 2181 } 2182 default: 2183 llvm_unreachable("invalid operand size"); 2184 } 2185 2186 return; 2187 } 2188 2189 // We got int literal token. 2190 // Only sign extend inline immediates. 2191 switch (OpTy) { 2192 case AMDGPU::OPERAND_REG_IMM_INT32: 2193 case AMDGPU::OPERAND_REG_IMM_FP32: 2194 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2195 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2196 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2197 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2198 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2199 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2200 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2201 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2202 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2203 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2204 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2205 if (isSafeTruncation(Val, 32) && 2206 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2207 AsmParser->hasInv2PiInlineImm())) { 2208 Inst.addOperand(MCOperand::createImm(Val)); 2209 setImmKindConst(); 2210 return; 2211 } 2212 2213 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2214 setImmKindLiteral(); 2215 return; 2216 2217 case AMDGPU::OPERAND_REG_IMM_INT64: 2218 case AMDGPU::OPERAND_REG_IMM_FP64: 2219 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2220 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2221 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2222 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2223 Inst.addOperand(MCOperand::createImm(Val)); 2224 setImmKindConst(); 2225 return; 2226 } 2227 2228 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2229 setImmKindLiteral(); 2230 return; 2231 2232 case AMDGPU::OPERAND_REG_IMM_INT16: 2233 case AMDGPU::OPERAND_REG_IMM_FP16: 2234 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2235 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2236 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2237 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2238 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2239 if (isSafeTruncation(Val, 16) && 2240 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2241 AsmParser->hasInv2PiInlineImm())) { 2242 Inst.addOperand(MCOperand::createImm(Val)); 2243 setImmKindConst(); 2244 return; 2245 } 2246 2247 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2248 setImmKindLiteral(); 2249 return; 2250 2251 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2252 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2253 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2254 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2255 assert(isSafeTruncation(Val, 16)); 2256 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2257 AsmParser->hasInv2PiInlineImm())); 2258 2259 Inst.addOperand(MCOperand::createImm(Val)); 2260 return; 2261 } 2262 case AMDGPU::OPERAND_KIMM32: 2263 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2264 setImmKindNone(); 2265 return; 2266 case AMDGPU::OPERAND_KIMM16: 2267 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2268 setImmKindNone(); 2269 return; 2270 default: 2271 llvm_unreachable("invalid operand size"); 2272 } 2273 } 2274 2275 template <unsigned Bitwidth> 2276 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2277 APInt Literal(64, Imm.Val); 2278 setImmKindNone(); 2279 2280 if (!Imm.IsFPImm) { 2281 // We got int literal token. 2282 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2283 return; 2284 } 2285 2286 bool Lost; 2287 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2288 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2289 APFloat::rmNearestTiesToEven, &Lost); 2290 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2291 } 2292 2293 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2294 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2295 } 2296 2297 static bool isInlineValue(unsigned Reg) { 2298 switch (Reg) { 2299 case AMDGPU::SRC_SHARED_BASE: 2300 case AMDGPU::SRC_SHARED_LIMIT: 2301 case AMDGPU::SRC_PRIVATE_BASE: 2302 case AMDGPU::SRC_PRIVATE_LIMIT: 2303 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2304 return true; 2305 case AMDGPU::SRC_VCCZ: 2306 case AMDGPU::SRC_EXECZ: 2307 case AMDGPU::SRC_SCC: 2308 return true; 2309 case AMDGPU::SGPR_NULL: 2310 return true; 2311 default: 2312 return false; 2313 } 2314 } 2315 2316 bool AMDGPUOperand::isInlineValue() const { 2317 return isRegKind() && ::isInlineValue(getReg()); 2318 } 2319 2320 //===----------------------------------------------------------------------===// 2321 // AsmParser 2322 //===----------------------------------------------------------------------===// 2323 2324 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2325 if (Is == IS_VGPR) { 2326 switch (RegWidth) { 2327 default: return -1; 2328 case 32: 2329 return AMDGPU::VGPR_32RegClassID; 2330 case 64: 2331 return AMDGPU::VReg_64RegClassID; 2332 case 96: 2333 return AMDGPU::VReg_96RegClassID; 2334 case 128: 2335 return AMDGPU::VReg_128RegClassID; 2336 case 160: 2337 return AMDGPU::VReg_160RegClassID; 2338 case 192: 2339 return AMDGPU::VReg_192RegClassID; 2340 case 224: 2341 return AMDGPU::VReg_224RegClassID; 2342 case 256: 2343 return AMDGPU::VReg_256RegClassID; 2344 case 512: 2345 return AMDGPU::VReg_512RegClassID; 2346 case 1024: 2347 return AMDGPU::VReg_1024RegClassID; 2348 } 2349 } else if (Is == IS_TTMP) { 2350 switch (RegWidth) { 2351 default: return -1; 2352 case 32: 2353 return AMDGPU::TTMP_32RegClassID; 2354 case 64: 2355 return AMDGPU::TTMP_64RegClassID; 2356 case 128: 2357 return AMDGPU::TTMP_128RegClassID; 2358 case 256: 2359 return AMDGPU::TTMP_256RegClassID; 2360 case 512: 2361 return AMDGPU::TTMP_512RegClassID; 2362 } 2363 } else if (Is == IS_SGPR) { 2364 switch (RegWidth) { 2365 default: return -1; 2366 case 32: 2367 return AMDGPU::SGPR_32RegClassID; 2368 case 64: 2369 return AMDGPU::SGPR_64RegClassID; 2370 case 96: 2371 return AMDGPU::SGPR_96RegClassID; 2372 case 128: 2373 return AMDGPU::SGPR_128RegClassID; 2374 case 160: 2375 return AMDGPU::SGPR_160RegClassID; 2376 case 192: 2377 return AMDGPU::SGPR_192RegClassID; 2378 case 224: 2379 return AMDGPU::SGPR_224RegClassID; 2380 case 256: 2381 return AMDGPU::SGPR_256RegClassID; 2382 case 512: 2383 return AMDGPU::SGPR_512RegClassID; 2384 } 2385 } else if (Is == IS_AGPR) { 2386 switch (RegWidth) { 2387 default: return -1; 2388 case 32: 2389 return AMDGPU::AGPR_32RegClassID; 2390 case 64: 2391 return AMDGPU::AReg_64RegClassID; 2392 case 96: 2393 return AMDGPU::AReg_96RegClassID; 2394 case 128: 2395 return AMDGPU::AReg_128RegClassID; 2396 case 160: 2397 return AMDGPU::AReg_160RegClassID; 2398 case 192: 2399 return AMDGPU::AReg_192RegClassID; 2400 case 224: 2401 return AMDGPU::AReg_224RegClassID; 2402 case 256: 2403 return AMDGPU::AReg_256RegClassID; 2404 case 512: 2405 return AMDGPU::AReg_512RegClassID; 2406 case 1024: 2407 return AMDGPU::AReg_1024RegClassID; 2408 } 2409 } 2410 return -1; 2411 } 2412 2413 static unsigned getSpecialRegForName(StringRef RegName) { 2414 return StringSwitch<unsigned>(RegName) 2415 .Case("exec", AMDGPU::EXEC) 2416 .Case("vcc", AMDGPU::VCC) 2417 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2418 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2419 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2420 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2421 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2422 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2423 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2424 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2425 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2426 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2427 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2428 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2429 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2430 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2431 .Case("m0", AMDGPU::M0) 2432 .Case("vccz", AMDGPU::SRC_VCCZ) 2433 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2434 .Case("execz", AMDGPU::SRC_EXECZ) 2435 .Case("src_execz", AMDGPU::SRC_EXECZ) 2436 .Case("scc", AMDGPU::SRC_SCC) 2437 .Case("src_scc", AMDGPU::SRC_SCC) 2438 .Case("tba", AMDGPU::TBA) 2439 .Case("tma", AMDGPU::TMA) 2440 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2441 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2442 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2443 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2444 .Case("vcc_lo", AMDGPU::VCC_LO) 2445 .Case("vcc_hi", AMDGPU::VCC_HI) 2446 .Case("exec_lo", AMDGPU::EXEC_LO) 2447 .Case("exec_hi", AMDGPU::EXEC_HI) 2448 .Case("tma_lo", AMDGPU::TMA_LO) 2449 .Case("tma_hi", AMDGPU::TMA_HI) 2450 .Case("tba_lo", AMDGPU::TBA_LO) 2451 .Case("tba_hi", AMDGPU::TBA_HI) 2452 .Case("pc", AMDGPU::PC_REG) 2453 .Case("null", AMDGPU::SGPR_NULL) 2454 .Default(AMDGPU::NoRegister); 2455 } 2456 2457 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2458 SMLoc &EndLoc, bool RestoreOnFailure) { 2459 auto R = parseRegister(); 2460 if (!R) return true; 2461 assert(R->isReg()); 2462 RegNo = R->getReg(); 2463 StartLoc = R->getStartLoc(); 2464 EndLoc = R->getEndLoc(); 2465 return false; 2466 } 2467 2468 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2469 SMLoc &EndLoc) { 2470 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2471 } 2472 2473 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2474 SMLoc &StartLoc, 2475 SMLoc &EndLoc) { 2476 bool Result = 2477 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2478 bool PendingErrors = getParser().hasPendingError(); 2479 getParser().clearPendingErrors(); 2480 if (PendingErrors) 2481 return MatchOperand_ParseFail; 2482 if (Result) 2483 return MatchOperand_NoMatch; 2484 return MatchOperand_Success; 2485 } 2486 2487 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2488 RegisterKind RegKind, unsigned Reg1, 2489 SMLoc Loc) { 2490 switch (RegKind) { 2491 case IS_SPECIAL: 2492 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2493 Reg = AMDGPU::EXEC; 2494 RegWidth = 64; 2495 return true; 2496 } 2497 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2498 Reg = AMDGPU::FLAT_SCR; 2499 RegWidth = 64; 2500 return true; 2501 } 2502 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2503 Reg = AMDGPU::XNACK_MASK; 2504 RegWidth = 64; 2505 return true; 2506 } 2507 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2508 Reg = AMDGPU::VCC; 2509 RegWidth = 64; 2510 return true; 2511 } 2512 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2513 Reg = AMDGPU::TBA; 2514 RegWidth = 64; 2515 return true; 2516 } 2517 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2518 Reg = AMDGPU::TMA; 2519 RegWidth = 64; 2520 return true; 2521 } 2522 Error(Loc, "register does not fit in the list"); 2523 return false; 2524 case IS_VGPR: 2525 case IS_SGPR: 2526 case IS_AGPR: 2527 case IS_TTMP: 2528 if (Reg1 != Reg + RegWidth / 32) { 2529 Error(Loc, "registers in a list must have consecutive indices"); 2530 return false; 2531 } 2532 RegWidth += 32; 2533 return true; 2534 default: 2535 llvm_unreachable("unexpected register kind"); 2536 } 2537 } 2538 2539 struct RegInfo { 2540 StringLiteral Name; 2541 RegisterKind Kind; 2542 }; 2543 2544 static constexpr RegInfo RegularRegisters[] = { 2545 {{"v"}, IS_VGPR}, 2546 {{"s"}, IS_SGPR}, 2547 {{"ttmp"}, IS_TTMP}, 2548 {{"acc"}, IS_AGPR}, 2549 {{"a"}, IS_AGPR}, 2550 }; 2551 2552 static bool isRegularReg(RegisterKind Kind) { 2553 return Kind == IS_VGPR || 2554 Kind == IS_SGPR || 2555 Kind == IS_TTMP || 2556 Kind == IS_AGPR; 2557 } 2558 2559 static const RegInfo* getRegularRegInfo(StringRef Str) { 2560 for (const RegInfo &Reg : RegularRegisters) 2561 if (Str.startswith(Reg.Name)) 2562 return &Reg; 2563 return nullptr; 2564 } 2565 2566 static bool getRegNum(StringRef Str, unsigned& Num) { 2567 return !Str.getAsInteger(10, Num); 2568 } 2569 2570 bool 2571 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2572 const AsmToken &NextToken) const { 2573 2574 // A list of consecutive registers: [s0,s1,s2,s3] 2575 if (Token.is(AsmToken::LBrac)) 2576 return true; 2577 2578 if (!Token.is(AsmToken::Identifier)) 2579 return false; 2580 2581 // A single register like s0 or a range of registers like s[0:1] 2582 2583 StringRef Str = Token.getString(); 2584 const RegInfo *Reg = getRegularRegInfo(Str); 2585 if (Reg) { 2586 StringRef RegName = Reg->Name; 2587 StringRef RegSuffix = Str.substr(RegName.size()); 2588 if (!RegSuffix.empty()) { 2589 unsigned Num; 2590 // A single register with an index: rXX 2591 if (getRegNum(RegSuffix, Num)) 2592 return true; 2593 } else { 2594 // A range of registers: r[XX:YY]. 2595 if (NextToken.is(AsmToken::LBrac)) 2596 return true; 2597 } 2598 } 2599 2600 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2601 } 2602 2603 bool 2604 AMDGPUAsmParser::isRegister() 2605 { 2606 return isRegister(getToken(), peekToken()); 2607 } 2608 2609 unsigned 2610 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2611 unsigned RegNum, 2612 unsigned RegWidth, 2613 SMLoc Loc) { 2614 2615 assert(isRegularReg(RegKind)); 2616 2617 unsigned AlignSize = 1; 2618 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2619 // SGPR and TTMP registers must be aligned. 2620 // Max required alignment is 4 dwords. 2621 AlignSize = std::min(RegWidth / 32, 4u); 2622 } 2623 2624 if (RegNum % AlignSize != 0) { 2625 Error(Loc, "invalid register alignment"); 2626 return AMDGPU::NoRegister; 2627 } 2628 2629 unsigned RegIdx = RegNum / AlignSize; 2630 int RCID = getRegClass(RegKind, RegWidth); 2631 if (RCID == -1) { 2632 Error(Loc, "invalid or unsupported register size"); 2633 return AMDGPU::NoRegister; 2634 } 2635 2636 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2637 const MCRegisterClass RC = TRI->getRegClass(RCID); 2638 if (RegIdx >= RC.getNumRegs()) { 2639 Error(Loc, "register index is out of range"); 2640 return AMDGPU::NoRegister; 2641 } 2642 2643 return RC.getRegister(RegIdx); 2644 } 2645 2646 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2647 int64_t RegLo, RegHi; 2648 if (!skipToken(AsmToken::LBrac, "missing register index")) 2649 return false; 2650 2651 SMLoc FirstIdxLoc = getLoc(); 2652 SMLoc SecondIdxLoc; 2653 2654 if (!parseExpr(RegLo)) 2655 return false; 2656 2657 if (trySkipToken(AsmToken::Colon)) { 2658 SecondIdxLoc = getLoc(); 2659 if (!parseExpr(RegHi)) 2660 return false; 2661 } else { 2662 RegHi = RegLo; 2663 } 2664 2665 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2666 return false; 2667 2668 if (!isUInt<32>(RegLo)) { 2669 Error(FirstIdxLoc, "invalid register index"); 2670 return false; 2671 } 2672 2673 if (!isUInt<32>(RegHi)) { 2674 Error(SecondIdxLoc, "invalid register index"); 2675 return false; 2676 } 2677 2678 if (RegLo > RegHi) { 2679 Error(FirstIdxLoc, "first register index should not exceed second index"); 2680 return false; 2681 } 2682 2683 Num = static_cast<unsigned>(RegLo); 2684 RegWidth = 32 * ((RegHi - RegLo) + 1); 2685 return true; 2686 } 2687 2688 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2689 unsigned &RegNum, unsigned &RegWidth, 2690 SmallVectorImpl<AsmToken> &Tokens) { 2691 assert(isToken(AsmToken::Identifier)); 2692 unsigned Reg = getSpecialRegForName(getTokenStr()); 2693 if (Reg) { 2694 RegNum = 0; 2695 RegWidth = 32; 2696 RegKind = IS_SPECIAL; 2697 Tokens.push_back(getToken()); 2698 lex(); // skip register name 2699 } 2700 return Reg; 2701 } 2702 2703 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2704 unsigned &RegNum, unsigned &RegWidth, 2705 SmallVectorImpl<AsmToken> &Tokens) { 2706 assert(isToken(AsmToken::Identifier)); 2707 StringRef RegName = getTokenStr(); 2708 auto Loc = getLoc(); 2709 2710 const RegInfo *RI = getRegularRegInfo(RegName); 2711 if (!RI) { 2712 Error(Loc, "invalid register name"); 2713 return AMDGPU::NoRegister; 2714 } 2715 2716 Tokens.push_back(getToken()); 2717 lex(); // skip register name 2718 2719 RegKind = RI->Kind; 2720 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2721 if (!RegSuffix.empty()) { 2722 // Single 32-bit register: vXX. 2723 if (!getRegNum(RegSuffix, RegNum)) { 2724 Error(Loc, "invalid register index"); 2725 return AMDGPU::NoRegister; 2726 } 2727 RegWidth = 32; 2728 } else { 2729 // Range of registers: v[XX:YY]. ":YY" is optional. 2730 if (!ParseRegRange(RegNum, RegWidth)) 2731 return AMDGPU::NoRegister; 2732 } 2733 2734 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2735 } 2736 2737 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2738 unsigned &RegWidth, 2739 SmallVectorImpl<AsmToken> &Tokens) { 2740 unsigned Reg = AMDGPU::NoRegister; 2741 auto ListLoc = getLoc(); 2742 2743 if (!skipToken(AsmToken::LBrac, 2744 "expected a register or a list of registers")) { 2745 return AMDGPU::NoRegister; 2746 } 2747 2748 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2749 2750 auto Loc = getLoc(); 2751 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2752 return AMDGPU::NoRegister; 2753 if (RegWidth != 32) { 2754 Error(Loc, "expected a single 32-bit register"); 2755 return AMDGPU::NoRegister; 2756 } 2757 2758 for (; trySkipToken(AsmToken::Comma); ) { 2759 RegisterKind NextRegKind; 2760 unsigned NextReg, NextRegNum, NextRegWidth; 2761 Loc = getLoc(); 2762 2763 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2764 NextRegNum, NextRegWidth, 2765 Tokens)) { 2766 return AMDGPU::NoRegister; 2767 } 2768 if (NextRegWidth != 32) { 2769 Error(Loc, "expected a single 32-bit register"); 2770 return AMDGPU::NoRegister; 2771 } 2772 if (NextRegKind != RegKind) { 2773 Error(Loc, "registers in a list must be of the same kind"); 2774 return AMDGPU::NoRegister; 2775 } 2776 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2777 return AMDGPU::NoRegister; 2778 } 2779 2780 if (!skipToken(AsmToken::RBrac, 2781 "expected a comma or a closing square bracket")) { 2782 return AMDGPU::NoRegister; 2783 } 2784 2785 if (isRegularReg(RegKind)) 2786 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2787 2788 return Reg; 2789 } 2790 2791 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2792 unsigned &RegNum, unsigned &RegWidth, 2793 SmallVectorImpl<AsmToken> &Tokens) { 2794 auto Loc = getLoc(); 2795 Reg = AMDGPU::NoRegister; 2796 2797 if (isToken(AsmToken::Identifier)) { 2798 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2799 if (Reg == AMDGPU::NoRegister) 2800 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2801 } else { 2802 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2803 } 2804 2805 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2806 if (Reg == AMDGPU::NoRegister) { 2807 assert(Parser.hasPendingError()); 2808 return false; 2809 } 2810 2811 if (!subtargetHasRegister(*TRI, Reg)) { 2812 if (Reg == AMDGPU::SGPR_NULL) { 2813 Error(Loc, "'null' operand is not supported on this GPU"); 2814 } else { 2815 Error(Loc, "register not available on this GPU"); 2816 } 2817 return false; 2818 } 2819 2820 return true; 2821 } 2822 2823 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2824 unsigned &RegNum, unsigned &RegWidth, 2825 bool RestoreOnFailure /*=false*/) { 2826 Reg = AMDGPU::NoRegister; 2827 2828 SmallVector<AsmToken, 1> Tokens; 2829 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2830 if (RestoreOnFailure) { 2831 while (!Tokens.empty()) { 2832 getLexer().UnLex(Tokens.pop_back_val()); 2833 } 2834 } 2835 return true; 2836 } 2837 return false; 2838 } 2839 2840 Optional<StringRef> 2841 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2842 switch (RegKind) { 2843 case IS_VGPR: 2844 return StringRef(".amdgcn.next_free_vgpr"); 2845 case IS_SGPR: 2846 return StringRef(".amdgcn.next_free_sgpr"); 2847 default: 2848 return None; 2849 } 2850 } 2851 2852 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2853 auto SymbolName = getGprCountSymbolName(RegKind); 2854 assert(SymbolName && "initializing invalid register kind"); 2855 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2856 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2857 } 2858 2859 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2860 unsigned DwordRegIndex, 2861 unsigned RegWidth) { 2862 // Symbols are only defined for GCN targets 2863 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2864 return true; 2865 2866 auto SymbolName = getGprCountSymbolName(RegKind); 2867 if (!SymbolName) 2868 return true; 2869 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2870 2871 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2872 int64_t OldCount; 2873 2874 if (!Sym->isVariable()) 2875 return !Error(getLoc(), 2876 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2877 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2878 return !Error( 2879 getLoc(), 2880 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2881 2882 if (OldCount <= NewMax) 2883 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2884 2885 return true; 2886 } 2887 2888 std::unique_ptr<AMDGPUOperand> 2889 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2890 const auto &Tok = getToken(); 2891 SMLoc StartLoc = Tok.getLoc(); 2892 SMLoc EndLoc = Tok.getEndLoc(); 2893 RegisterKind RegKind; 2894 unsigned Reg, RegNum, RegWidth; 2895 2896 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2897 return nullptr; 2898 } 2899 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2900 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2901 return nullptr; 2902 } else 2903 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2904 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2905 } 2906 2907 OperandMatchResultTy 2908 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2909 // TODO: add syntactic sugar for 1/(2*PI) 2910 2911 assert(!isRegister()); 2912 assert(!isModifier()); 2913 2914 const auto& Tok = getToken(); 2915 const auto& NextTok = peekToken(); 2916 bool IsReal = Tok.is(AsmToken::Real); 2917 SMLoc S = getLoc(); 2918 bool Negate = false; 2919 2920 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2921 lex(); 2922 IsReal = true; 2923 Negate = true; 2924 } 2925 2926 if (IsReal) { 2927 // Floating-point expressions are not supported. 2928 // Can only allow floating-point literals with an 2929 // optional sign. 2930 2931 StringRef Num = getTokenStr(); 2932 lex(); 2933 2934 APFloat RealVal(APFloat::IEEEdouble()); 2935 auto roundMode = APFloat::rmNearestTiesToEven; 2936 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2937 return MatchOperand_ParseFail; 2938 } 2939 if (Negate) 2940 RealVal.changeSign(); 2941 2942 Operands.push_back( 2943 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2944 AMDGPUOperand::ImmTyNone, true)); 2945 2946 return MatchOperand_Success; 2947 2948 } else { 2949 int64_t IntVal; 2950 const MCExpr *Expr; 2951 SMLoc S = getLoc(); 2952 2953 if (HasSP3AbsModifier) { 2954 // This is a workaround for handling expressions 2955 // as arguments of SP3 'abs' modifier, for example: 2956 // |1.0| 2957 // |-1| 2958 // |1+x| 2959 // This syntax is not compatible with syntax of standard 2960 // MC expressions (due to the trailing '|'). 2961 SMLoc EndLoc; 2962 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2963 return MatchOperand_ParseFail; 2964 } else { 2965 if (Parser.parseExpression(Expr)) 2966 return MatchOperand_ParseFail; 2967 } 2968 2969 if (Expr->evaluateAsAbsolute(IntVal)) { 2970 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2971 } else { 2972 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2973 } 2974 2975 return MatchOperand_Success; 2976 } 2977 2978 return MatchOperand_NoMatch; 2979 } 2980 2981 OperandMatchResultTy 2982 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2983 if (!isRegister()) 2984 return MatchOperand_NoMatch; 2985 2986 if (auto R = parseRegister()) { 2987 assert(R->isReg()); 2988 Operands.push_back(std::move(R)); 2989 return MatchOperand_Success; 2990 } 2991 return MatchOperand_ParseFail; 2992 } 2993 2994 OperandMatchResultTy 2995 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2996 auto res = parseReg(Operands); 2997 if (res != MatchOperand_NoMatch) { 2998 return res; 2999 } else if (isModifier()) { 3000 return MatchOperand_NoMatch; 3001 } else { 3002 return parseImm(Operands, HasSP3AbsMod); 3003 } 3004 } 3005 3006 bool 3007 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3008 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 3009 const auto &str = Token.getString(); 3010 return str == "abs" || str == "neg" || str == "sext"; 3011 } 3012 return false; 3013 } 3014 3015 bool 3016 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 3017 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 3018 } 3019 3020 bool 3021 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3022 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 3023 } 3024 3025 bool 3026 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3027 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 3028 } 3029 3030 // Check if this is an operand modifier or an opcode modifier 3031 // which may look like an expression but it is not. We should 3032 // avoid parsing these modifiers as expressions. Currently 3033 // recognized sequences are: 3034 // |...| 3035 // abs(...) 3036 // neg(...) 3037 // sext(...) 3038 // -reg 3039 // -|...| 3040 // -abs(...) 3041 // name:... 3042 // Note that simple opcode modifiers like 'gds' may be parsed as 3043 // expressions; this is a special case. See getExpressionAsToken. 3044 // 3045 bool 3046 AMDGPUAsmParser::isModifier() { 3047 3048 AsmToken Tok = getToken(); 3049 AsmToken NextToken[2]; 3050 peekTokens(NextToken); 3051 3052 return isOperandModifier(Tok, NextToken[0]) || 3053 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3054 isOpcodeModifierWithVal(Tok, NextToken[0]); 3055 } 3056 3057 // Check if the current token is an SP3 'neg' modifier. 3058 // Currently this modifier is allowed in the following context: 3059 // 3060 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3061 // 2. Before an 'abs' modifier: -abs(...) 3062 // 3. Before an SP3 'abs' modifier: -|...| 3063 // 3064 // In all other cases "-" is handled as a part 3065 // of an expression that follows the sign. 3066 // 3067 // Note: When "-" is followed by an integer literal, 3068 // this is interpreted as integer negation rather 3069 // than a floating-point NEG modifier applied to N. 3070 // Beside being contr-intuitive, such use of floating-point 3071 // NEG modifier would have resulted in different meaning 3072 // of integer literals used with VOP1/2/C and VOP3, 3073 // for example: 3074 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3075 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3076 // Negative fp literals with preceding "-" are 3077 // handled likewise for uniformity 3078 // 3079 bool 3080 AMDGPUAsmParser::parseSP3NegModifier() { 3081 3082 AsmToken NextToken[2]; 3083 peekTokens(NextToken); 3084 3085 if (isToken(AsmToken::Minus) && 3086 (isRegister(NextToken[0], NextToken[1]) || 3087 NextToken[0].is(AsmToken::Pipe) || 3088 isId(NextToken[0], "abs"))) { 3089 lex(); 3090 return true; 3091 } 3092 3093 return false; 3094 } 3095 3096 OperandMatchResultTy 3097 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3098 bool AllowImm) { 3099 bool Neg, SP3Neg; 3100 bool Abs, SP3Abs; 3101 SMLoc Loc; 3102 3103 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3104 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3105 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3106 return MatchOperand_ParseFail; 3107 } 3108 3109 SP3Neg = parseSP3NegModifier(); 3110 3111 Loc = getLoc(); 3112 Neg = trySkipId("neg"); 3113 if (Neg && SP3Neg) { 3114 Error(Loc, "expected register or immediate"); 3115 return MatchOperand_ParseFail; 3116 } 3117 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3118 return MatchOperand_ParseFail; 3119 3120 Abs = trySkipId("abs"); 3121 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3122 return MatchOperand_ParseFail; 3123 3124 Loc = getLoc(); 3125 SP3Abs = trySkipToken(AsmToken::Pipe); 3126 if (Abs && SP3Abs) { 3127 Error(Loc, "expected register or immediate"); 3128 return MatchOperand_ParseFail; 3129 } 3130 3131 OperandMatchResultTy Res; 3132 if (AllowImm) { 3133 Res = parseRegOrImm(Operands, SP3Abs); 3134 } else { 3135 Res = parseReg(Operands); 3136 } 3137 if (Res != MatchOperand_Success) { 3138 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3139 } 3140 3141 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3142 return MatchOperand_ParseFail; 3143 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3144 return MatchOperand_ParseFail; 3145 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3146 return MatchOperand_ParseFail; 3147 3148 AMDGPUOperand::Modifiers Mods; 3149 Mods.Abs = Abs || SP3Abs; 3150 Mods.Neg = Neg || SP3Neg; 3151 3152 if (Mods.hasFPModifiers()) { 3153 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3154 if (Op.isExpr()) { 3155 Error(Op.getStartLoc(), "expected an absolute expression"); 3156 return MatchOperand_ParseFail; 3157 } 3158 Op.setModifiers(Mods); 3159 } 3160 return MatchOperand_Success; 3161 } 3162 3163 OperandMatchResultTy 3164 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3165 bool AllowImm) { 3166 bool Sext = trySkipId("sext"); 3167 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3168 return MatchOperand_ParseFail; 3169 3170 OperandMatchResultTy Res; 3171 if (AllowImm) { 3172 Res = parseRegOrImm(Operands); 3173 } else { 3174 Res = parseReg(Operands); 3175 } 3176 if (Res != MatchOperand_Success) { 3177 return Sext? MatchOperand_ParseFail : Res; 3178 } 3179 3180 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3181 return MatchOperand_ParseFail; 3182 3183 AMDGPUOperand::Modifiers Mods; 3184 Mods.Sext = Sext; 3185 3186 if (Mods.hasIntModifiers()) { 3187 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3188 if (Op.isExpr()) { 3189 Error(Op.getStartLoc(), "expected an absolute expression"); 3190 return MatchOperand_ParseFail; 3191 } 3192 Op.setModifiers(Mods); 3193 } 3194 3195 return MatchOperand_Success; 3196 } 3197 3198 OperandMatchResultTy 3199 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3200 return parseRegOrImmWithFPInputMods(Operands, false); 3201 } 3202 3203 OperandMatchResultTy 3204 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3205 return parseRegOrImmWithIntInputMods(Operands, false); 3206 } 3207 3208 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3209 auto Loc = getLoc(); 3210 if (trySkipId("off")) { 3211 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3212 AMDGPUOperand::ImmTyOff, false)); 3213 return MatchOperand_Success; 3214 } 3215 3216 if (!isRegister()) 3217 return MatchOperand_NoMatch; 3218 3219 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3220 if (Reg) { 3221 Operands.push_back(std::move(Reg)); 3222 return MatchOperand_Success; 3223 } 3224 3225 return MatchOperand_ParseFail; 3226 3227 } 3228 3229 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3230 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3231 3232 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3233 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3234 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3235 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3236 return Match_InvalidOperand; 3237 3238 if ((TSFlags & SIInstrFlags::VOP3) && 3239 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3240 getForcedEncodingSize() != 64) 3241 return Match_PreferE32; 3242 3243 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3244 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3245 // v_mac_f32/16 allow only dst_sel == DWORD; 3246 auto OpNum = 3247 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3248 const auto &Op = Inst.getOperand(OpNum); 3249 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3250 return Match_InvalidOperand; 3251 } 3252 } 3253 3254 return Match_Success; 3255 } 3256 3257 static ArrayRef<unsigned> getAllVariants() { 3258 static const unsigned Variants[] = { 3259 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3260 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, 3261 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP 3262 }; 3263 3264 return makeArrayRef(Variants); 3265 } 3266 3267 // What asm variants we should check 3268 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3269 if (isForcedDPP() && isForcedVOP3()) { 3270 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; 3271 return makeArrayRef(Variants); 3272 } 3273 if (getForcedEncodingSize() == 32) { 3274 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3275 return makeArrayRef(Variants); 3276 } 3277 3278 if (isForcedVOP3()) { 3279 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3280 return makeArrayRef(Variants); 3281 } 3282 3283 if (isForcedSDWA()) { 3284 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3285 AMDGPUAsmVariants::SDWA9}; 3286 return makeArrayRef(Variants); 3287 } 3288 3289 if (isForcedDPP()) { 3290 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3291 return makeArrayRef(Variants); 3292 } 3293 3294 return getAllVariants(); 3295 } 3296 3297 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3298 if (isForcedDPP() && isForcedVOP3()) 3299 return "e64_dpp"; 3300 3301 if (getForcedEncodingSize() == 32) 3302 return "e32"; 3303 3304 if (isForcedVOP3()) 3305 return "e64"; 3306 3307 if (isForcedSDWA()) 3308 return "sdwa"; 3309 3310 if (isForcedDPP()) 3311 return "dpp"; 3312 3313 return ""; 3314 } 3315 3316 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3317 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3318 const unsigned Num = Desc.getNumImplicitUses(); 3319 for (unsigned i = 0; i < Num; ++i) { 3320 unsigned Reg = Desc.ImplicitUses[i]; 3321 switch (Reg) { 3322 case AMDGPU::FLAT_SCR: 3323 case AMDGPU::VCC: 3324 case AMDGPU::VCC_LO: 3325 case AMDGPU::VCC_HI: 3326 case AMDGPU::M0: 3327 return Reg; 3328 default: 3329 break; 3330 } 3331 } 3332 return AMDGPU::NoRegister; 3333 } 3334 3335 // NB: This code is correct only when used to check constant 3336 // bus limitations because GFX7 support no f16 inline constants. 3337 // Note that there are no cases when a GFX7 opcode violates 3338 // constant bus limitations due to the use of an f16 constant. 3339 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3340 unsigned OpIdx) const { 3341 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3342 3343 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3344 return false; 3345 } 3346 3347 const MCOperand &MO = Inst.getOperand(OpIdx); 3348 3349 int64_t Val = MO.getImm(); 3350 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3351 3352 switch (OpSize) { // expected operand size 3353 case 8: 3354 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3355 case 4: 3356 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3357 case 2: { 3358 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3359 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3360 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3361 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3362 return AMDGPU::isInlinableIntLiteral(Val); 3363 3364 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3365 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3366 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3367 return AMDGPU::isInlinableIntLiteralV216(Val); 3368 3369 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3370 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3371 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3372 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3373 3374 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3375 } 3376 default: 3377 llvm_unreachable("invalid operand size"); 3378 } 3379 } 3380 3381 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3382 if (!isGFX10Plus()) 3383 return 1; 3384 3385 switch (Opcode) { 3386 // 64-bit shift instructions can use only one scalar value input 3387 case AMDGPU::V_LSHLREV_B64_e64: 3388 case AMDGPU::V_LSHLREV_B64_gfx10: 3389 case AMDGPU::V_LSHLREV_B64_e64_gfx11: 3390 case AMDGPU::V_LSHRREV_B64_e64: 3391 case AMDGPU::V_LSHRREV_B64_gfx10: 3392 case AMDGPU::V_LSHRREV_B64_e64_gfx11: 3393 case AMDGPU::V_ASHRREV_I64_e64: 3394 case AMDGPU::V_ASHRREV_I64_gfx10: 3395 case AMDGPU::V_ASHRREV_I64_e64_gfx11: 3396 case AMDGPU::V_LSHL_B64_e64: 3397 case AMDGPU::V_LSHR_B64_e64: 3398 case AMDGPU::V_ASHR_I64_e64: 3399 return 1; 3400 default: 3401 return 2; 3402 } 3403 } 3404 3405 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3406 const MCOperand &MO = Inst.getOperand(OpIdx); 3407 if (MO.isImm()) { 3408 return !isInlineConstant(Inst, OpIdx); 3409 } else if (MO.isReg()) { 3410 auto Reg = MO.getReg(); 3411 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3412 auto PReg = mc2PseudoReg(Reg); 3413 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3414 } else { 3415 return true; 3416 } 3417 } 3418 3419 bool 3420 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3421 const OperandVector &Operands) { 3422 const unsigned Opcode = Inst.getOpcode(); 3423 const MCInstrDesc &Desc = MII.get(Opcode); 3424 unsigned LastSGPR = AMDGPU::NoRegister; 3425 unsigned ConstantBusUseCount = 0; 3426 unsigned NumLiterals = 0; 3427 unsigned LiteralSize; 3428 3429 if (Desc.TSFlags & 3430 (SIInstrFlags::VOPC | 3431 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3432 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3433 SIInstrFlags::SDWA)) { 3434 // Check special imm operands (used by madmk, etc) 3435 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3436 ++NumLiterals; 3437 LiteralSize = 4; 3438 } 3439 3440 SmallDenseSet<unsigned> SGPRsUsed; 3441 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3442 if (SGPRUsed != AMDGPU::NoRegister) { 3443 SGPRsUsed.insert(SGPRUsed); 3444 ++ConstantBusUseCount; 3445 } 3446 3447 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3448 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3449 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3450 3451 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3452 3453 for (int OpIdx : OpIndices) { 3454 if (OpIdx == -1) break; 3455 3456 const MCOperand &MO = Inst.getOperand(OpIdx); 3457 if (usesConstantBus(Inst, OpIdx)) { 3458 if (MO.isReg()) { 3459 LastSGPR = mc2PseudoReg(MO.getReg()); 3460 // Pairs of registers with a partial intersections like these 3461 // s0, s[0:1] 3462 // flat_scratch_lo, flat_scratch 3463 // flat_scratch_lo, flat_scratch_hi 3464 // are theoretically valid but they are disabled anyway. 3465 // Note that this code mimics SIInstrInfo::verifyInstruction 3466 if (!SGPRsUsed.count(LastSGPR)) { 3467 SGPRsUsed.insert(LastSGPR); 3468 ++ConstantBusUseCount; 3469 } 3470 } else { // Expression or a literal 3471 3472 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3473 continue; // special operand like VINTERP attr_chan 3474 3475 // An instruction may use only one literal. 3476 // This has been validated on the previous step. 3477 // See validateVOPLiteral. 3478 // This literal may be used as more than one operand. 3479 // If all these operands are of the same size, 3480 // this literal counts as one scalar value. 3481 // Otherwise it counts as 2 scalar values. 3482 // See "GFX10 Shader Programming", section 3.6.2.3. 3483 3484 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3485 if (Size < 4) Size = 4; 3486 3487 if (NumLiterals == 0) { 3488 NumLiterals = 1; 3489 LiteralSize = Size; 3490 } else if (LiteralSize != Size) { 3491 NumLiterals = 2; 3492 } 3493 } 3494 } 3495 } 3496 } 3497 ConstantBusUseCount += NumLiterals; 3498 3499 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3500 return true; 3501 3502 SMLoc LitLoc = getLitLoc(Operands); 3503 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3504 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3505 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3506 return false; 3507 } 3508 3509 bool 3510 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3511 const OperandVector &Operands) { 3512 const unsigned Opcode = Inst.getOpcode(); 3513 const MCInstrDesc &Desc = MII.get(Opcode); 3514 3515 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3516 if (DstIdx == -1 || 3517 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3518 return true; 3519 } 3520 3521 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3522 3523 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3524 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3525 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3526 3527 assert(DstIdx != -1); 3528 const MCOperand &Dst = Inst.getOperand(DstIdx); 3529 assert(Dst.isReg()); 3530 3531 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3532 3533 for (int SrcIdx : SrcIndices) { 3534 if (SrcIdx == -1) break; 3535 const MCOperand &Src = Inst.getOperand(SrcIdx); 3536 if (Src.isReg()) { 3537 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3538 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3539 Error(getRegLoc(SrcReg, Operands), 3540 "destination must be different than all sources"); 3541 return false; 3542 } 3543 } 3544 } 3545 3546 return true; 3547 } 3548 3549 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3550 3551 const unsigned Opc = Inst.getOpcode(); 3552 const MCInstrDesc &Desc = MII.get(Opc); 3553 3554 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3555 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3556 assert(ClampIdx != -1); 3557 return Inst.getOperand(ClampIdx).getImm() == 0; 3558 } 3559 3560 return true; 3561 } 3562 3563 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3564 3565 const unsigned Opc = Inst.getOpcode(); 3566 const MCInstrDesc &Desc = MII.get(Opc); 3567 3568 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3569 return None; 3570 3571 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3572 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3573 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3574 3575 assert(VDataIdx != -1); 3576 3577 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3578 return None; 3579 3580 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3581 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3582 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3583 if (DMask == 0) 3584 DMask = 1; 3585 3586 bool isPackedD16 = false; 3587 unsigned DataSize = 3588 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3589 if (hasPackedD16()) { 3590 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3591 isPackedD16 = D16Idx >= 0; 3592 if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) 3593 DataSize = (DataSize + 1) / 2; 3594 } 3595 3596 if ((VDataSize / 4) == DataSize + TFESize) 3597 return None; 3598 3599 return StringRef(isPackedD16 3600 ? "image data size does not match dmask, d16 and tfe" 3601 : "image data size does not match dmask and tfe"); 3602 } 3603 3604 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3605 const unsigned Opc = Inst.getOpcode(); 3606 const MCInstrDesc &Desc = MII.get(Opc); 3607 3608 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3609 return true; 3610 3611 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3612 3613 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3614 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3615 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3616 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3617 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3618 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3619 3620 assert(VAddr0Idx != -1); 3621 assert(SrsrcIdx != -1); 3622 assert(SrsrcIdx > VAddr0Idx); 3623 3624 if (DimIdx == -1) 3625 return true; // intersect_ray 3626 3627 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3628 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3629 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3630 unsigned ActualAddrSize = 3631 IsNSA ? SrsrcIdx - VAddr0Idx 3632 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3633 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3634 3635 unsigned ExpectedAddrSize = 3636 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3637 3638 if (!IsNSA) { 3639 if (ExpectedAddrSize > 8) 3640 ExpectedAddrSize = 16; 3641 3642 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3643 // This provides backward compatibility for assembly created 3644 // before 160b/192b/224b types were directly supported. 3645 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3646 return true; 3647 } 3648 3649 return ActualAddrSize == ExpectedAddrSize; 3650 } 3651 3652 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3653 3654 const unsigned Opc = Inst.getOpcode(); 3655 const MCInstrDesc &Desc = MII.get(Opc); 3656 3657 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3658 return true; 3659 if (!Desc.mayLoad() || !Desc.mayStore()) 3660 return true; // Not atomic 3661 3662 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3663 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3664 3665 // This is an incomplete check because image_atomic_cmpswap 3666 // may only use 0x3 and 0xf while other atomic operations 3667 // may use 0x1 and 0x3. However these limitations are 3668 // verified when we check that dmask matches dst size. 3669 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3670 } 3671 3672 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3673 3674 const unsigned Opc = Inst.getOpcode(); 3675 const MCInstrDesc &Desc = MII.get(Opc); 3676 3677 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3678 return true; 3679 3680 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3681 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3682 3683 // GATHER4 instructions use dmask in a different fashion compared to 3684 // other MIMG instructions. The only useful DMASK values are 3685 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3686 // (red,red,red,red) etc.) The ISA document doesn't mention 3687 // this. 3688 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3689 } 3690 3691 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3692 const unsigned Opc = Inst.getOpcode(); 3693 const MCInstrDesc &Desc = MII.get(Opc); 3694 3695 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3696 return true; 3697 3698 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3699 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3700 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3701 3702 if (!BaseOpcode->MSAA) 3703 return true; 3704 3705 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3706 assert(DimIdx != -1); 3707 3708 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3709 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3710 3711 return DimInfo->MSAA; 3712 } 3713 3714 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3715 { 3716 switch (Opcode) { 3717 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3718 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3719 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3720 return true; 3721 default: 3722 return false; 3723 } 3724 } 3725 3726 // movrels* opcodes should only allow VGPRS as src0. 3727 // This is specified in .td description for vop1/vop3, 3728 // but sdwa is handled differently. See isSDWAOperand. 3729 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3730 const OperandVector &Operands) { 3731 3732 const unsigned Opc = Inst.getOpcode(); 3733 const MCInstrDesc &Desc = MII.get(Opc); 3734 3735 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3736 return true; 3737 3738 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3739 assert(Src0Idx != -1); 3740 3741 SMLoc ErrLoc; 3742 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3743 if (Src0.isReg()) { 3744 auto Reg = mc2PseudoReg(Src0.getReg()); 3745 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3746 if (!isSGPR(Reg, TRI)) 3747 return true; 3748 ErrLoc = getRegLoc(Reg, Operands); 3749 } else { 3750 ErrLoc = getConstLoc(Operands); 3751 } 3752 3753 Error(ErrLoc, "source operand must be a VGPR"); 3754 return false; 3755 } 3756 3757 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3758 const OperandVector &Operands) { 3759 3760 const unsigned Opc = Inst.getOpcode(); 3761 3762 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3763 return true; 3764 3765 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3766 assert(Src0Idx != -1); 3767 3768 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3769 if (!Src0.isReg()) 3770 return true; 3771 3772 auto Reg = mc2PseudoReg(Src0.getReg()); 3773 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3774 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3775 Error(getRegLoc(Reg, Operands), 3776 "source operand must be either a VGPR or an inline constant"); 3777 return false; 3778 } 3779 3780 return true; 3781 } 3782 3783 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3784 const OperandVector &Operands) { 3785 const unsigned Opc = Inst.getOpcode(); 3786 const MCInstrDesc &Desc = MII.get(Opc); 3787 3788 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3789 return true; 3790 3791 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3792 if (Src2Idx == -1) 3793 return true; 3794 3795 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3796 if (!Src2.isReg()) 3797 return true; 3798 3799 MCRegister Src2Reg = Src2.getReg(); 3800 MCRegister DstReg = Inst.getOperand(0).getReg(); 3801 if (Src2Reg == DstReg) 3802 return true; 3803 3804 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3805 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3806 return true; 3807 3808 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3809 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3810 "source 2 operand must not partially overlap with dst"); 3811 return false; 3812 } 3813 3814 return true; 3815 } 3816 3817 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3818 switch (Inst.getOpcode()) { 3819 default: 3820 return true; 3821 case V_DIV_SCALE_F32_gfx6_gfx7: 3822 case V_DIV_SCALE_F32_vi: 3823 case V_DIV_SCALE_F32_gfx10: 3824 case V_DIV_SCALE_F64_gfx6_gfx7: 3825 case V_DIV_SCALE_F64_vi: 3826 case V_DIV_SCALE_F64_gfx10: 3827 break; 3828 } 3829 3830 // TODO: Check that src0 = src1 or src2. 3831 3832 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3833 AMDGPU::OpName::src2_modifiers, 3834 AMDGPU::OpName::src2_modifiers}) { 3835 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3836 .getImm() & 3837 SISrcMods::ABS) { 3838 return false; 3839 } 3840 } 3841 3842 return true; 3843 } 3844 3845 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3846 3847 const unsigned Opc = Inst.getOpcode(); 3848 const MCInstrDesc &Desc = MII.get(Opc); 3849 3850 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3851 return true; 3852 3853 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3854 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3855 if (isCI() || isSI()) 3856 return false; 3857 } 3858 3859 return true; 3860 } 3861 3862 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3863 const unsigned Opc = Inst.getOpcode(); 3864 const MCInstrDesc &Desc = MII.get(Opc); 3865 3866 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3867 return true; 3868 3869 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3870 if (DimIdx < 0) 3871 return true; 3872 3873 long Imm = Inst.getOperand(DimIdx).getImm(); 3874 if (Imm < 0 || Imm >= 8) 3875 return false; 3876 3877 return true; 3878 } 3879 3880 static bool IsRevOpcode(const unsigned Opcode) 3881 { 3882 switch (Opcode) { 3883 case AMDGPU::V_SUBREV_F32_e32: 3884 case AMDGPU::V_SUBREV_F32_e64: 3885 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3886 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3887 case AMDGPU::V_SUBREV_F32_e32_vi: 3888 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3889 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3890 case AMDGPU::V_SUBREV_F32_e64_vi: 3891 3892 case AMDGPU::V_SUBREV_CO_U32_e32: 3893 case AMDGPU::V_SUBREV_CO_U32_e64: 3894 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3895 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3896 3897 case AMDGPU::V_SUBBREV_U32_e32: 3898 case AMDGPU::V_SUBBREV_U32_e64: 3899 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3900 case AMDGPU::V_SUBBREV_U32_e32_vi: 3901 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3902 case AMDGPU::V_SUBBREV_U32_e64_vi: 3903 3904 case AMDGPU::V_SUBREV_U32_e32: 3905 case AMDGPU::V_SUBREV_U32_e64: 3906 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3907 case AMDGPU::V_SUBREV_U32_e32_vi: 3908 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3909 case AMDGPU::V_SUBREV_U32_e64_vi: 3910 3911 case AMDGPU::V_SUBREV_F16_e32: 3912 case AMDGPU::V_SUBREV_F16_e64: 3913 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3914 case AMDGPU::V_SUBREV_F16_e32_vi: 3915 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3916 case AMDGPU::V_SUBREV_F16_e64_vi: 3917 3918 case AMDGPU::V_SUBREV_U16_e32: 3919 case AMDGPU::V_SUBREV_U16_e64: 3920 case AMDGPU::V_SUBREV_U16_e32_vi: 3921 case AMDGPU::V_SUBREV_U16_e64_vi: 3922 3923 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3924 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3925 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3926 3927 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3928 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3929 3930 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3931 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3932 3933 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3934 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3935 3936 case AMDGPU::V_LSHRREV_B32_e32: 3937 case AMDGPU::V_LSHRREV_B32_e64: 3938 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3939 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3940 case AMDGPU::V_LSHRREV_B32_e32_vi: 3941 case AMDGPU::V_LSHRREV_B32_e64_vi: 3942 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3943 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3944 3945 case AMDGPU::V_ASHRREV_I32_e32: 3946 case AMDGPU::V_ASHRREV_I32_e64: 3947 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3948 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3949 case AMDGPU::V_ASHRREV_I32_e32_vi: 3950 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3951 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3952 case AMDGPU::V_ASHRREV_I32_e64_vi: 3953 3954 case AMDGPU::V_LSHLREV_B32_e32: 3955 case AMDGPU::V_LSHLREV_B32_e64: 3956 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3957 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3958 case AMDGPU::V_LSHLREV_B32_e32_vi: 3959 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3960 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3961 case AMDGPU::V_LSHLREV_B32_e64_vi: 3962 3963 case AMDGPU::V_LSHLREV_B16_e32: 3964 case AMDGPU::V_LSHLREV_B16_e64: 3965 case AMDGPU::V_LSHLREV_B16_e32_vi: 3966 case AMDGPU::V_LSHLREV_B16_e64_vi: 3967 case AMDGPU::V_LSHLREV_B16_gfx10: 3968 3969 case AMDGPU::V_LSHRREV_B16_e32: 3970 case AMDGPU::V_LSHRREV_B16_e64: 3971 case AMDGPU::V_LSHRREV_B16_e32_vi: 3972 case AMDGPU::V_LSHRREV_B16_e64_vi: 3973 case AMDGPU::V_LSHRREV_B16_gfx10: 3974 3975 case AMDGPU::V_ASHRREV_I16_e32: 3976 case AMDGPU::V_ASHRREV_I16_e64: 3977 case AMDGPU::V_ASHRREV_I16_e32_vi: 3978 case AMDGPU::V_ASHRREV_I16_e64_vi: 3979 case AMDGPU::V_ASHRREV_I16_gfx10: 3980 3981 case AMDGPU::V_LSHLREV_B64_e64: 3982 case AMDGPU::V_LSHLREV_B64_gfx10: 3983 case AMDGPU::V_LSHLREV_B64_vi: 3984 3985 case AMDGPU::V_LSHRREV_B64_e64: 3986 case AMDGPU::V_LSHRREV_B64_gfx10: 3987 case AMDGPU::V_LSHRREV_B64_vi: 3988 3989 case AMDGPU::V_ASHRREV_I64_e64: 3990 case AMDGPU::V_ASHRREV_I64_gfx10: 3991 case AMDGPU::V_ASHRREV_I64_vi: 3992 3993 case AMDGPU::V_PK_LSHLREV_B16: 3994 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3995 case AMDGPU::V_PK_LSHLREV_B16_vi: 3996 3997 case AMDGPU::V_PK_LSHRREV_B16: 3998 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3999 case AMDGPU::V_PK_LSHRREV_B16_vi: 4000 case AMDGPU::V_PK_ASHRREV_I16: 4001 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 4002 case AMDGPU::V_PK_ASHRREV_I16_vi: 4003 return true; 4004 default: 4005 return false; 4006 } 4007 } 4008 4009 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 4010 4011 using namespace SIInstrFlags; 4012 const unsigned Opcode = Inst.getOpcode(); 4013 const MCInstrDesc &Desc = MII.get(Opcode); 4014 4015 // lds_direct register is defined so that it can be used 4016 // with 9-bit operands only. Ignore encodings which do not accept these. 4017 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 4018 if ((Desc.TSFlags & Enc) == 0) 4019 return None; 4020 4021 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 4022 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 4023 if (SrcIdx == -1) 4024 break; 4025 const auto &Src = Inst.getOperand(SrcIdx); 4026 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 4027 4028 if (isGFX90A() || isGFX11Plus()) 4029 return StringRef("lds_direct is not supported on this GPU"); 4030 4031 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 4032 return StringRef("lds_direct cannot be used with this instruction"); 4033 4034 if (SrcName != OpName::src0) 4035 return StringRef("lds_direct may be used as src0 only"); 4036 } 4037 } 4038 4039 return None; 4040 } 4041 4042 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 4043 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4044 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4045 if (Op.isFlatOffset()) 4046 return Op.getStartLoc(); 4047 } 4048 return getLoc(); 4049 } 4050 4051 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4052 const OperandVector &Operands) { 4053 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4054 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4055 return true; 4056 4057 auto Opcode = Inst.getOpcode(); 4058 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4059 assert(OpNum != -1); 4060 4061 const auto &Op = Inst.getOperand(OpNum); 4062 if (!hasFlatOffsets() && Op.getImm() != 0) { 4063 Error(getFlatOffsetLoc(Operands), 4064 "flat offset modifier is not supported on this GPU"); 4065 return false; 4066 } 4067 4068 // For FLAT segment the offset must be positive; 4069 // MSB is ignored and forced to zero. 4070 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 4071 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 4072 if (!isIntN(OffsetSize, Op.getImm())) { 4073 Error(getFlatOffsetLoc(Operands), 4074 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4075 return false; 4076 } 4077 } else { 4078 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 4079 if (!isUIntN(OffsetSize, Op.getImm())) { 4080 Error(getFlatOffsetLoc(Operands), 4081 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4082 return false; 4083 } 4084 } 4085 4086 return true; 4087 } 4088 4089 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4090 // Start with second operand because SMEM Offset cannot be dst or src0. 4091 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4092 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4093 if (Op.isSMEMOffset()) 4094 return Op.getStartLoc(); 4095 } 4096 return getLoc(); 4097 } 4098 4099 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4100 const OperandVector &Operands) { 4101 if (isCI() || isSI()) 4102 return true; 4103 4104 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4105 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4106 return true; 4107 4108 auto Opcode = Inst.getOpcode(); 4109 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4110 if (OpNum == -1) 4111 return true; 4112 4113 const auto &Op = Inst.getOperand(OpNum); 4114 if (!Op.isImm()) 4115 return true; 4116 4117 uint64_t Offset = Op.getImm(); 4118 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4119 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4120 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4121 return true; 4122 4123 Error(getSMEMOffsetLoc(Operands), 4124 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4125 "expected a 21-bit signed offset"); 4126 4127 return false; 4128 } 4129 4130 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4131 unsigned Opcode = Inst.getOpcode(); 4132 const MCInstrDesc &Desc = MII.get(Opcode); 4133 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4134 return true; 4135 4136 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4137 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4138 4139 const int OpIndices[] = { Src0Idx, Src1Idx }; 4140 4141 unsigned NumExprs = 0; 4142 unsigned NumLiterals = 0; 4143 uint32_t LiteralValue; 4144 4145 for (int OpIdx : OpIndices) { 4146 if (OpIdx == -1) break; 4147 4148 const MCOperand &MO = Inst.getOperand(OpIdx); 4149 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4150 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4151 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4152 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4153 if (NumLiterals == 0 || LiteralValue != Value) { 4154 LiteralValue = Value; 4155 ++NumLiterals; 4156 } 4157 } else if (MO.isExpr()) { 4158 ++NumExprs; 4159 } 4160 } 4161 } 4162 4163 return NumLiterals + NumExprs <= 1; 4164 } 4165 4166 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4167 const unsigned Opc = Inst.getOpcode(); 4168 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4169 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4170 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4171 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4172 4173 if (OpSel & ~3) 4174 return false; 4175 } 4176 4177 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4178 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4179 if (OpSelIdx != -1) { 4180 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4181 return false; 4182 } 4183 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4184 if (OpSelHiIdx != -1) { 4185 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4186 return false; 4187 } 4188 } 4189 4190 return true; 4191 } 4192 4193 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4194 const OperandVector &Operands) { 4195 const unsigned Opc = Inst.getOpcode(); 4196 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4197 if (DppCtrlIdx < 0) 4198 return true; 4199 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4200 4201 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4202 // DPP64 is supported for row_newbcast only. 4203 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4204 if (Src0Idx >= 0 && 4205 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4206 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4207 Error(S, "64 bit dpp only supports row_newbcast"); 4208 return false; 4209 } 4210 } 4211 4212 return true; 4213 } 4214 4215 // Check if VCC register matches wavefront size 4216 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4217 auto FB = getFeatureBits(); 4218 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4219 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4220 } 4221 4222 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4223 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4224 const OperandVector &Operands) { 4225 unsigned Opcode = Inst.getOpcode(); 4226 const MCInstrDesc &Desc = MII.get(Opcode); 4227 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4228 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4229 ImmIdx == -1) 4230 return true; 4231 4232 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4233 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4234 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4235 4236 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4237 4238 unsigned NumExprs = 0; 4239 unsigned NumLiterals = 0; 4240 uint32_t LiteralValue; 4241 4242 for (int OpIdx : OpIndices) { 4243 if (OpIdx == -1) 4244 continue; 4245 4246 const MCOperand &MO = Inst.getOperand(OpIdx); 4247 if (!MO.isImm() && !MO.isExpr()) 4248 continue; 4249 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4250 continue; 4251 4252 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4253 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4254 Error(getConstLoc(Operands), 4255 "inline constants are not allowed for this operand"); 4256 return false; 4257 } 4258 4259 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4260 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4261 if (NumLiterals == 0 || LiteralValue != Value) { 4262 LiteralValue = Value; 4263 ++NumLiterals; 4264 } 4265 } else if (MO.isExpr()) { 4266 ++NumExprs; 4267 } 4268 } 4269 NumLiterals += NumExprs; 4270 4271 if (!NumLiterals) 4272 return true; 4273 4274 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4275 Error(getLitLoc(Operands), "literal operands are not supported"); 4276 return false; 4277 } 4278 4279 if (NumLiterals > 1) { 4280 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4281 return false; 4282 } 4283 4284 return true; 4285 } 4286 4287 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4288 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4289 const MCRegisterInfo *MRI) { 4290 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4291 if (OpIdx < 0) 4292 return -1; 4293 4294 const MCOperand &Op = Inst.getOperand(OpIdx); 4295 if (!Op.isReg()) 4296 return -1; 4297 4298 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4299 auto Reg = Sub ? Sub : Op.getReg(); 4300 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4301 return AGPR32.contains(Reg) ? 1 : 0; 4302 } 4303 4304 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4305 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4306 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4307 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4308 SIInstrFlags::DS)) == 0) 4309 return true; 4310 4311 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4312 : AMDGPU::OpName::vdata; 4313 4314 const MCRegisterInfo *MRI = getMRI(); 4315 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4316 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4317 4318 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4319 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4320 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4321 return false; 4322 } 4323 4324 auto FB = getFeatureBits(); 4325 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4326 if (DataAreg < 0 || DstAreg < 0) 4327 return true; 4328 return DstAreg == DataAreg; 4329 } 4330 4331 return DstAreg < 1 && DataAreg < 1; 4332 } 4333 4334 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4335 auto FB = getFeatureBits(); 4336 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4337 return true; 4338 4339 const MCRegisterInfo *MRI = getMRI(); 4340 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4341 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4342 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4343 const MCOperand &Op = Inst.getOperand(I); 4344 if (!Op.isReg()) 4345 continue; 4346 4347 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4348 if (!Sub) 4349 continue; 4350 4351 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4352 return false; 4353 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4354 return false; 4355 } 4356 4357 return true; 4358 } 4359 4360 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4361 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4362 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4363 if (Op.isBLGP()) 4364 return Op.getStartLoc(); 4365 } 4366 return SMLoc(); 4367 } 4368 4369 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4370 const OperandVector &Operands) { 4371 unsigned Opc = Inst.getOpcode(); 4372 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4373 if (BlgpIdx == -1) 4374 return true; 4375 SMLoc BLGPLoc = getBLGPLoc(Operands); 4376 if (!BLGPLoc.isValid()) 4377 return true; 4378 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4379 auto FB = getFeatureBits(); 4380 bool UsesNeg = false; 4381 if (FB[AMDGPU::FeatureGFX940Insts]) { 4382 switch (Opc) { 4383 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4384 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4385 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4386 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4387 UsesNeg = true; 4388 } 4389 } 4390 4391 if (IsNeg == UsesNeg) 4392 return true; 4393 4394 Error(BLGPLoc, 4395 UsesNeg ? "invalid modifier: blgp is not supported" 4396 : "invalid modifier: neg is not supported"); 4397 4398 return false; 4399 } 4400 4401 // gfx90a has an undocumented limitation: 4402 // DS_GWS opcodes must use even aligned registers. 4403 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4404 const OperandVector &Operands) { 4405 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4406 return true; 4407 4408 int Opc = Inst.getOpcode(); 4409 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4410 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4411 return true; 4412 4413 const MCRegisterInfo *MRI = getMRI(); 4414 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4415 int Data0Pos = 4416 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4417 assert(Data0Pos != -1); 4418 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4419 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4420 if (RegIdx & 1) { 4421 SMLoc RegLoc = getRegLoc(Reg, Operands); 4422 Error(RegLoc, "vgpr must be even aligned"); 4423 return false; 4424 } 4425 4426 return true; 4427 } 4428 4429 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4430 const OperandVector &Operands, 4431 const SMLoc &IDLoc) { 4432 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4433 AMDGPU::OpName::cpol); 4434 if (CPolPos == -1) 4435 return true; 4436 4437 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4438 4439 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4440 if (TSFlags & SIInstrFlags::SMRD) { 4441 if (CPol && (isSI() || isCI())) { 4442 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4443 Error(S, "cache policy is not supported for SMRD instructions"); 4444 return false; 4445 } 4446 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4447 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4448 return false; 4449 } 4450 } 4451 4452 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4453 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4454 StringRef CStr(S.getPointer()); 4455 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4456 Error(S, "scc is not supported on this GPU"); 4457 return false; 4458 } 4459 4460 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4461 return true; 4462 4463 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4464 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4465 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4466 : "instruction must use glc"); 4467 return false; 4468 } 4469 } else { 4470 if (CPol & CPol::GLC) { 4471 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4472 StringRef CStr(S.getPointer()); 4473 S = SMLoc::getFromPointer( 4474 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4475 Error(S, isGFX940() ? "instruction must not use sc0" 4476 : "instruction must not use glc"); 4477 return false; 4478 } 4479 } 4480 4481 return true; 4482 } 4483 4484 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst, 4485 const OperandVector &Operands, 4486 const SMLoc &IDLoc) { 4487 if (isGFX940()) 4488 return true; 4489 4490 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4491 if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) != 4492 (SIInstrFlags::VALU | SIInstrFlags::FLAT)) 4493 return true; 4494 // This is FLAT LDS DMA. 4495 4496 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands); 4497 StringRef CStr(S.getPointer()); 4498 if (!CStr.startswith("lds")) { 4499 // This is incorrectly selected LDS DMA version of a FLAT load opcode. 4500 // And LDS version should have 'lds' modifier, but it follows optional 4501 // operands so its absense is ignored by the matcher. 4502 Error(IDLoc, "invalid operands for instruction"); 4503 return false; 4504 } 4505 4506 return true; 4507 } 4508 4509 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4510 const SMLoc &IDLoc, 4511 const OperandVector &Operands) { 4512 if (auto ErrMsg = validateLdsDirect(Inst)) { 4513 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4514 return false; 4515 } 4516 if (!validateSOPLiteral(Inst)) { 4517 Error(getLitLoc(Operands), 4518 "only one literal operand is allowed"); 4519 return false; 4520 } 4521 if (!validateVOPLiteral(Inst, Operands)) { 4522 return false; 4523 } 4524 if (!validateConstantBusLimitations(Inst, Operands)) { 4525 return false; 4526 } 4527 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4528 return false; 4529 } 4530 if (!validateIntClampSupported(Inst)) { 4531 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4532 "integer clamping is not supported on this GPU"); 4533 return false; 4534 } 4535 if (!validateOpSel(Inst)) { 4536 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4537 "invalid op_sel operand"); 4538 return false; 4539 } 4540 if (!validateDPP(Inst, Operands)) { 4541 return false; 4542 } 4543 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4544 if (!validateMIMGD16(Inst)) { 4545 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4546 "d16 modifier is not supported on this GPU"); 4547 return false; 4548 } 4549 if (!validateMIMGDim(Inst)) { 4550 Error(IDLoc, "dim modifier is required on this GPU"); 4551 return false; 4552 } 4553 if (!validateMIMGMSAA(Inst)) { 4554 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4555 "invalid dim; must be MSAA type"); 4556 return false; 4557 } 4558 if (auto ErrMsg = validateMIMGDataSize(Inst)) { 4559 Error(IDLoc, *ErrMsg); 4560 return false; 4561 } 4562 if (!validateMIMGAddrSize(Inst)) { 4563 Error(IDLoc, 4564 "image address size does not match dim and a16"); 4565 return false; 4566 } 4567 if (!validateMIMGAtomicDMask(Inst)) { 4568 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4569 "invalid atomic image dmask"); 4570 return false; 4571 } 4572 if (!validateMIMGGatherDMask(Inst)) { 4573 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4574 "invalid image_gather dmask: only one bit must be set"); 4575 return false; 4576 } 4577 if (!validateMovrels(Inst, Operands)) { 4578 return false; 4579 } 4580 if (!validateFlatOffset(Inst, Operands)) { 4581 return false; 4582 } 4583 if (!validateSMEMOffset(Inst, Operands)) { 4584 return false; 4585 } 4586 if (!validateMAIAccWrite(Inst, Operands)) { 4587 return false; 4588 } 4589 if (!validateMFMA(Inst, Operands)) { 4590 return false; 4591 } 4592 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4593 return false; 4594 } 4595 4596 if (!validateAGPRLdSt(Inst)) { 4597 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4598 ? "invalid register class: data and dst should be all VGPR or AGPR" 4599 : "invalid register class: agpr loads and stores not supported on this GPU" 4600 ); 4601 return false; 4602 } 4603 if (!validateVGPRAlign(Inst)) { 4604 Error(IDLoc, 4605 "invalid register class: vgpr tuples must be 64 bit aligned"); 4606 return false; 4607 } 4608 if (!validateGWS(Inst, Operands)) { 4609 return false; 4610 } 4611 4612 if (!validateBLGP(Inst, Operands)) { 4613 return false; 4614 } 4615 4616 if (!validateDivScale(Inst)) { 4617 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4618 return false; 4619 } 4620 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4621 return false; 4622 } 4623 4624 if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) { 4625 return false; 4626 } 4627 4628 return true; 4629 } 4630 4631 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4632 const FeatureBitset &FBS, 4633 unsigned VariantID = 0); 4634 4635 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4636 const FeatureBitset &AvailableFeatures, 4637 unsigned VariantID); 4638 4639 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4640 const FeatureBitset &FBS) { 4641 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4642 } 4643 4644 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4645 const FeatureBitset &FBS, 4646 ArrayRef<unsigned> Variants) { 4647 for (auto Variant : Variants) { 4648 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4649 return true; 4650 } 4651 4652 return false; 4653 } 4654 4655 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4656 const SMLoc &IDLoc) { 4657 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4658 4659 // Check if requested instruction variant is supported. 4660 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4661 return false; 4662 4663 // This instruction is not supported. 4664 // Clear any other pending errors because they are no longer relevant. 4665 getParser().clearPendingErrors(); 4666 4667 // Requested instruction variant is not supported. 4668 // Check if any other variants are supported. 4669 StringRef VariantName = getMatchedVariantName(); 4670 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4671 return Error(IDLoc, 4672 Twine(VariantName, 4673 " variant of this instruction is not supported")); 4674 } 4675 4676 // Finally check if this instruction is supported on any other GPU. 4677 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4678 return Error(IDLoc, "instruction not supported on this GPU"); 4679 } 4680 4681 // Instruction not supported on any GPU. Probably a typo. 4682 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4683 return Error(IDLoc, "invalid instruction" + Suggestion); 4684 } 4685 4686 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4687 OperandVector &Operands, 4688 MCStreamer &Out, 4689 uint64_t &ErrorInfo, 4690 bool MatchingInlineAsm) { 4691 MCInst Inst; 4692 unsigned Result = Match_Success; 4693 for (auto Variant : getMatchedVariants()) { 4694 uint64_t EI; 4695 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4696 Variant); 4697 // We order match statuses from least to most specific. We use most specific 4698 // status as resulting 4699 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4700 if ((R == Match_Success) || 4701 (R == Match_PreferE32) || 4702 (R == Match_MissingFeature && Result != Match_PreferE32) || 4703 (R == Match_InvalidOperand && Result != Match_MissingFeature 4704 && Result != Match_PreferE32) || 4705 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4706 && Result != Match_MissingFeature 4707 && Result != Match_PreferE32)) { 4708 Result = R; 4709 ErrorInfo = EI; 4710 } 4711 if (R == Match_Success) 4712 break; 4713 } 4714 4715 if (Result == Match_Success) { 4716 if (!validateInstruction(Inst, IDLoc, Operands)) { 4717 return true; 4718 } 4719 Inst.setLoc(IDLoc); 4720 Out.emitInstruction(Inst, getSTI()); 4721 return false; 4722 } 4723 4724 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4725 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4726 return true; 4727 } 4728 4729 switch (Result) { 4730 default: break; 4731 case Match_MissingFeature: 4732 // It has been verified that the specified instruction 4733 // mnemonic is valid. A match was found but it requires 4734 // features which are not supported on this GPU. 4735 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4736 4737 case Match_InvalidOperand: { 4738 SMLoc ErrorLoc = IDLoc; 4739 if (ErrorInfo != ~0ULL) { 4740 if (ErrorInfo >= Operands.size()) { 4741 return Error(IDLoc, "too few operands for instruction"); 4742 } 4743 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4744 if (ErrorLoc == SMLoc()) 4745 ErrorLoc = IDLoc; 4746 } 4747 return Error(ErrorLoc, "invalid operand for instruction"); 4748 } 4749 4750 case Match_PreferE32: 4751 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4752 "should be encoded as e32"); 4753 case Match_MnemonicFail: 4754 llvm_unreachable("Invalid instructions should have been handled already"); 4755 } 4756 llvm_unreachable("Implement any new match types added!"); 4757 } 4758 4759 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4760 int64_t Tmp = -1; 4761 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4762 return true; 4763 } 4764 if (getParser().parseAbsoluteExpression(Tmp)) { 4765 return true; 4766 } 4767 Ret = static_cast<uint32_t>(Tmp); 4768 return false; 4769 } 4770 4771 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4772 uint32_t &Minor) { 4773 if (ParseAsAbsoluteExpression(Major)) 4774 return TokError("invalid major version"); 4775 4776 if (!trySkipToken(AsmToken::Comma)) 4777 return TokError("minor version number required, comma expected"); 4778 4779 if (ParseAsAbsoluteExpression(Minor)) 4780 return TokError("invalid minor version"); 4781 4782 return false; 4783 } 4784 4785 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4786 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4787 return TokError("directive only supported for amdgcn architecture"); 4788 4789 std::string TargetIDDirective; 4790 SMLoc TargetStart = getTok().getLoc(); 4791 if (getParser().parseEscapedString(TargetIDDirective)) 4792 return true; 4793 4794 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4795 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4796 return getParser().Error(TargetRange.Start, 4797 (Twine(".amdgcn_target directive's target id ") + 4798 Twine(TargetIDDirective) + 4799 Twine(" does not match the specified target id ") + 4800 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4801 4802 return false; 4803 } 4804 4805 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4806 return Error(Range.Start, "value out of range", Range); 4807 } 4808 4809 bool AMDGPUAsmParser::calculateGPRBlocks( 4810 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4811 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4812 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4813 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4814 // TODO(scott.linder): These calculations are duplicated from 4815 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4816 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4817 4818 unsigned NumVGPRs = NextFreeVGPR; 4819 unsigned NumSGPRs = NextFreeSGPR; 4820 4821 if (Version.Major >= 10) 4822 NumSGPRs = 0; 4823 else { 4824 unsigned MaxAddressableNumSGPRs = 4825 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4826 4827 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4828 NumSGPRs > MaxAddressableNumSGPRs) 4829 return OutOfRangeError(SGPRRange); 4830 4831 NumSGPRs += 4832 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4833 4834 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4835 NumSGPRs > MaxAddressableNumSGPRs) 4836 return OutOfRangeError(SGPRRange); 4837 4838 if (Features.test(FeatureSGPRInitBug)) 4839 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4840 } 4841 4842 VGPRBlocks = 4843 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4844 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4845 4846 return false; 4847 } 4848 4849 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4850 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4851 return TokError("directive only supported for amdgcn architecture"); 4852 4853 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4854 return TokError("directive only supported for amdhsa OS"); 4855 4856 StringRef KernelName; 4857 if (getParser().parseIdentifier(KernelName)) 4858 return true; 4859 4860 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4861 4862 StringSet<> Seen; 4863 4864 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4865 4866 SMRange VGPRRange; 4867 uint64_t NextFreeVGPR = 0; 4868 uint64_t AccumOffset = 0; 4869 uint64_t SharedVGPRCount = 0; 4870 SMRange SGPRRange; 4871 uint64_t NextFreeSGPR = 0; 4872 4873 // Count the number of user SGPRs implied from the enabled feature bits. 4874 unsigned ImpliedUserSGPRCount = 0; 4875 4876 // Track if the asm explicitly contains the directive for the user SGPR 4877 // count. 4878 Optional<unsigned> ExplicitUserSGPRCount; 4879 bool ReserveVCC = true; 4880 bool ReserveFlatScr = true; 4881 Optional<bool> EnableWavefrontSize32; 4882 4883 while (true) { 4884 while (trySkipToken(AsmToken::EndOfStatement)); 4885 4886 StringRef ID; 4887 SMRange IDRange = getTok().getLocRange(); 4888 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4889 return true; 4890 4891 if (ID == ".end_amdhsa_kernel") 4892 break; 4893 4894 if (Seen.find(ID) != Seen.end()) 4895 return TokError(".amdhsa_ directives cannot be repeated"); 4896 Seen.insert(ID); 4897 4898 SMLoc ValStart = getLoc(); 4899 int64_t IVal; 4900 if (getParser().parseAbsoluteExpression(IVal)) 4901 return true; 4902 SMLoc ValEnd = getLoc(); 4903 SMRange ValRange = SMRange(ValStart, ValEnd); 4904 4905 if (IVal < 0) 4906 return OutOfRangeError(ValRange); 4907 4908 uint64_t Val = IVal; 4909 4910 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4911 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4912 return OutOfRangeError(RANGE); \ 4913 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4914 4915 if (ID == ".amdhsa_group_segment_fixed_size") { 4916 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4917 return OutOfRangeError(ValRange); 4918 KD.group_segment_fixed_size = Val; 4919 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4920 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4921 return OutOfRangeError(ValRange); 4922 KD.private_segment_fixed_size = Val; 4923 } else if (ID == ".amdhsa_kernarg_size") { 4924 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4925 return OutOfRangeError(ValRange); 4926 KD.kernarg_size = Val; 4927 } else if (ID == ".amdhsa_user_sgpr_count") { 4928 ExplicitUserSGPRCount = Val; 4929 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4930 if (hasArchitectedFlatScratch()) 4931 return Error(IDRange.Start, 4932 "directive is not supported with architected flat scratch", 4933 IDRange); 4934 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4935 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4936 Val, ValRange); 4937 if (Val) 4938 ImpliedUserSGPRCount += 4; 4939 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4940 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4941 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4942 ValRange); 4943 if (Val) 4944 ImpliedUserSGPRCount += 2; 4945 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4946 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4947 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4948 ValRange); 4949 if (Val) 4950 ImpliedUserSGPRCount += 2; 4951 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4952 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4953 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4954 Val, ValRange); 4955 if (Val) 4956 ImpliedUserSGPRCount += 2; 4957 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4958 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4959 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4960 ValRange); 4961 if (Val) 4962 ImpliedUserSGPRCount += 2; 4963 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4964 if (hasArchitectedFlatScratch()) 4965 return Error(IDRange.Start, 4966 "directive is not supported with architected flat scratch", 4967 IDRange); 4968 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4969 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4970 ValRange); 4971 if (Val) 4972 ImpliedUserSGPRCount += 2; 4973 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4974 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4975 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4976 Val, ValRange); 4977 if (Val) 4978 ImpliedUserSGPRCount += 1; 4979 } else if (ID == ".amdhsa_wavefront_size32") { 4980 if (IVersion.Major < 10) 4981 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4982 EnableWavefrontSize32 = Val; 4983 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4984 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4985 Val, ValRange); 4986 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4987 if (hasArchitectedFlatScratch()) 4988 return Error(IDRange.Start, 4989 "directive is not supported with architected flat scratch", 4990 IDRange); 4991 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4992 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4993 } else if (ID == ".amdhsa_enable_private_segment") { 4994 if (!hasArchitectedFlatScratch()) 4995 return Error( 4996 IDRange.Start, 4997 "directive is not supported without architected flat scratch", 4998 IDRange); 4999 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5000 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5001 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 5002 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5003 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 5004 ValRange); 5005 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 5006 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5007 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 5008 ValRange); 5009 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 5010 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5011 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 5012 ValRange); 5013 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 5014 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5015 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 5016 ValRange); 5017 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 5018 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5019 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 5020 ValRange); 5021 } else if (ID == ".amdhsa_next_free_vgpr") { 5022 VGPRRange = ValRange; 5023 NextFreeVGPR = Val; 5024 } else if (ID == ".amdhsa_next_free_sgpr") { 5025 SGPRRange = ValRange; 5026 NextFreeSGPR = Val; 5027 } else if (ID == ".amdhsa_accum_offset") { 5028 if (!isGFX90A()) 5029 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5030 AccumOffset = Val; 5031 } else if (ID == ".amdhsa_reserve_vcc") { 5032 if (!isUInt<1>(Val)) 5033 return OutOfRangeError(ValRange); 5034 ReserveVCC = Val; 5035 } else if (ID == ".amdhsa_reserve_flat_scratch") { 5036 if (IVersion.Major < 7) 5037 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 5038 if (hasArchitectedFlatScratch()) 5039 return Error(IDRange.Start, 5040 "directive is not supported with architected flat scratch", 5041 IDRange); 5042 if (!isUInt<1>(Val)) 5043 return OutOfRangeError(ValRange); 5044 ReserveFlatScr = Val; 5045 } else if (ID == ".amdhsa_reserve_xnack_mask") { 5046 if (IVersion.Major < 8) 5047 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 5048 if (!isUInt<1>(Val)) 5049 return OutOfRangeError(ValRange); 5050 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5051 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5052 IDRange); 5053 } else if (ID == ".amdhsa_float_round_mode_32") { 5054 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5055 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 5056 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5057 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5058 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 5059 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5060 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5061 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5062 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5063 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5064 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5065 ValRange); 5066 } else if (ID == ".amdhsa_dx10_clamp") { 5067 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5068 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 5069 } else if (ID == ".amdhsa_ieee_mode") { 5070 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 5071 Val, ValRange); 5072 } else if (ID == ".amdhsa_fp16_overflow") { 5073 if (IVersion.Major < 9) 5074 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5075 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 5076 ValRange); 5077 } else if (ID == ".amdhsa_tg_split") { 5078 if (!isGFX90A()) 5079 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5080 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5081 ValRange); 5082 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5083 if (IVersion.Major < 10) 5084 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5085 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 5086 ValRange); 5087 } else if (ID == ".amdhsa_memory_ordered") { 5088 if (IVersion.Major < 10) 5089 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5090 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 5091 ValRange); 5092 } else if (ID == ".amdhsa_forward_progress") { 5093 if (IVersion.Major < 10) 5094 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5095 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 5096 ValRange); 5097 } else if (ID == ".amdhsa_shared_vgpr_count") { 5098 if (IVersion.Major < 10) 5099 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5100 SharedVGPRCount = Val; 5101 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5102 COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val, 5103 ValRange); 5104 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5105 PARSE_BITS_ENTRY( 5106 KD.compute_pgm_rsrc2, 5107 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5108 ValRange); 5109 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5110 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5111 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5112 Val, ValRange); 5113 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5114 PARSE_BITS_ENTRY( 5115 KD.compute_pgm_rsrc2, 5116 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5117 ValRange); 5118 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5119 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5120 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5121 Val, ValRange); 5122 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5123 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5124 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5125 Val, ValRange); 5126 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5127 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5128 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5129 Val, ValRange); 5130 } else if (ID == ".amdhsa_exception_int_div_zero") { 5131 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5132 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5133 Val, ValRange); 5134 } else { 5135 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5136 } 5137 5138 #undef PARSE_BITS_ENTRY 5139 } 5140 5141 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5142 return TokError(".amdhsa_next_free_vgpr directive is required"); 5143 5144 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5145 return TokError(".amdhsa_next_free_sgpr directive is required"); 5146 5147 unsigned VGPRBlocks; 5148 unsigned SGPRBlocks; 5149 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5150 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5151 EnableWavefrontSize32, NextFreeVGPR, 5152 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5153 SGPRBlocks)) 5154 return true; 5155 5156 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5157 VGPRBlocks)) 5158 return OutOfRangeError(VGPRRange); 5159 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5160 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5161 5162 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5163 SGPRBlocks)) 5164 return OutOfRangeError(SGPRRange); 5165 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5166 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5167 SGPRBlocks); 5168 5169 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5170 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5171 "enabled user SGPRs"); 5172 5173 unsigned UserSGPRCount = 5174 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5175 5176 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5177 return TokError("too many user SGPRs enabled"); 5178 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5179 UserSGPRCount); 5180 5181 if (isGFX90A()) { 5182 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5183 return TokError(".amdhsa_accum_offset directive is required"); 5184 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5185 return TokError("accum_offset should be in range [4..256] in " 5186 "increments of 4"); 5187 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5188 return TokError("accum_offset exceeds total VGPR allocation"); 5189 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5190 (AccumOffset / 4 - 1)); 5191 } 5192 5193 if (IVersion.Major == 10) { 5194 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5195 if (SharedVGPRCount && EnableWavefrontSize32) { 5196 return TokError("shared_vgpr_count directive not valid on " 5197 "wavefront size 32"); 5198 } 5199 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5200 return TokError("shared_vgpr_count*2 + " 5201 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5202 "exceed 63\n"); 5203 } 5204 } 5205 5206 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5207 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5208 ReserveFlatScr); 5209 return false; 5210 } 5211 5212 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5213 uint32_t Major; 5214 uint32_t Minor; 5215 5216 if (ParseDirectiveMajorMinor(Major, Minor)) 5217 return true; 5218 5219 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5220 return false; 5221 } 5222 5223 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5224 uint32_t Major; 5225 uint32_t Minor; 5226 uint32_t Stepping; 5227 StringRef VendorName; 5228 StringRef ArchName; 5229 5230 // If this directive has no arguments, then use the ISA version for the 5231 // targeted GPU. 5232 if (isToken(AsmToken::EndOfStatement)) { 5233 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5234 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5235 ISA.Stepping, 5236 "AMD", "AMDGPU"); 5237 return false; 5238 } 5239 5240 if (ParseDirectiveMajorMinor(Major, Minor)) 5241 return true; 5242 5243 if (!trySkipToken(AsmToken::Comma)) 5244 return TokError("stepping version number required, comma expected"); 5245 5246 if (ParseAsAbsoluteExpression(Stepping)) 5247 return TokError("invalid stepping version"); 5248 5249 if (!trySkipToken(AsmToken::Comma)) 5250 return TokError("vendor name required, comma expected"); 5251 5252 if (!parseString(VendorName, "invalid vendor name")) 5253 return true; 5254 5255 if (!trySkipToken(AsmToken::Comma)) 5256 return TokError("arch name required, comma expected"); 5257 5258 if (!parseString(ArchName, "invalid arch name")) 5259 return true; 5260 5261 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5262 VendorName, ArchName); 5263 return false; 5264 } 5265 5266 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5267 amd_kernel_code_t &Header) { 5268 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5269 // assembly for backwards compatibility. 5270 if (ID == "max_scratch_backing_memory_byte_size") { 5271 Parser.eatToEndOfStatement(); 5272 return false; 5273 } 5274 5275 SmallString<40> ErrStr; 5276 raw_svector_ostream Err(ErrStr); 5277 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5278 return TokError(Err.str()); 5279 } 5280 Lex(); 5281 5282 if (ID == "enable_wavefront_size32") { 5283 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5284 if (!isGFX10Plus()) 5285 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5286 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5287 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5288 } else { 5289 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5290 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5291 } 5292 } 5293 5294 if (ID == "wavefront_size") { 5295 if (Header.wavefront_size == 5) { 5296 if (!isGFX10Plus()) 5297 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5298 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5299 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5300 } else if (Header.wavefront_size == 6) { 5301 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5302 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5303 } 5304 } 5305 5306 if (ID == "enable_wgp_mode") { 5307 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5308 !isGFX10Plus()) 5309 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5310 } 5311 5312 if (ID == "enable_mem_ordered") { 5313 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5314 !isGFX10Plus()) 5315 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5316 } 5317 5318 if (ID == "enable_fwd_progress") { 5319 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5320 !isGFX10Plus()) 5321 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5322 } 5323 5324 return false; 5325 } 5326 5327 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5328 amd_kernel_code_t Header; 5329 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5330 5331 while (true) { 5332 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5333 // will set the current token to EndOfStatement. 5334 while(trySkipToken(AsmToken::EndOfStatement)); 5335 5336 StringRef ID; 5337 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5338 return true; 5339 5340 if (ID == ".end_amd_kernel_code_t") 5341 break; 5342 5343 if (ParseAMDKernelCodeTValue(ID, Header)) 5344 return true; 5345 } 5346 5347 getTargetStreamer().EmitAMDKernelCodeT(Header); 5348 5349 return false; 5350 } 5351 5352 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5353 StringRef KernelName; 5354 if (!parseId(KernelName, "expected symbol name")) 5355 return true; 5356 5357 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5358 ELF::STT_AMDGPU_HSA_KERNEL); 5359 5360 KernelScope.initialize(getContext()); 5361 return false; 5362 } 5363 5364 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5365 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5366 return Error(getLoc(), 5367 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5368 "architectures"); 5369 } 5370 5371 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5372 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5373 return Error(getParser().getTok().getLoc(), "target id must match options"); 5374 5375 getTargetStreamer().EmitISAVersion(); 5376 Lex(); 5377 5378 return false; 5379 } 5380 5381 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5382 const char *AssemblerDirectiveBegin; 5383 const char *AssemblerDirectiveEnd; 5384 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5385 isHsaAbiVersion3AndAbove(&getSTI()) 5386 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5387 HSAMD::V3::AssemblerDirectiveEnd) 5388 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5389 HSAMD::AssemblerDirectiveEnd); 5390 5391 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5392 return Error(getLoc(), 5393 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5394 "not available on non-amdhsa OSes")).str()); 5395 } 5396 5397 std::string HSAMetadataString; 5398 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5399 HSAMetadataString)) 5400 return true; 5401 5402 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5403 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5404 return Error(getLoc(), "invalid HSA metadata"); 5405 } else { 5406 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5407 return Error(getLoc(), "invalid HSA metadata"); 5408 } 5409 5410 return false; 5411 } 5412 5413 /// Common code to parse out a block of text (typically YAML) between start and 5414 /// end directives. 5415 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5416 const char *AssemblerDirectiveEnd, 5417 std::string &CollectString) { 5418 5419 raw_string_ostream CollectStream(CollectString); 5420 5421 getLexer().setSkipSpace(false); 5422 5423 bool FoundEnd = false; 5424 while (!isToken(AsmToken::Eof)) { 5425 while (isToken(AsmToken::Space)) { 5426 CollectStream << getTokenStr(); 5427 Lex(); 5428 } 5429 5430 if (trySkipId(AssemblerDirectiveEnd)) { 5431 FoundEnd = true; 5432 break; 5433 } 5434 5435 CollectStream << Parser.parseStringToEndOfStatement() 5436 << getContext().getAsmInfo()->getSeparatorString(); 5437 5438 Parser.eatToEndOfStatement(); 5439 } 5440 5441 getLexer().setSkipSpace(true); 5442 5443 if (isToken(AsmToken::Eof) && !FoundEnd) { 5444 return TokError(Twine("expected directive ") + 5445 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5446 } 5447 5448 CollectStream.flush(); 5449 return false; 5450 } 5451 5452 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5453 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5454 std::string String; 5455 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5456 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5457 return true; 5458 5459 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5460 if (!PALMetadata->setFromString(String)) 5461 return Error(getLoc(), "invalid PAL metadata"); 5462 return false; 5463 } 5464 5465 /// Parse the assembler directive for old linear-format PAL metadata. 5466 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5467 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5468 return Error(getLoc(), 5469 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5470 "not available on non-amdpal OSes")).str()); 5471 } 5472 5473 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5474 PALMetadata->setLegacy(); 5475 for (;;) { 5476 uint32_t Key, Value; 5477 if (ParseAsAbsoluteExpression(Key)) { 5478 return TokError(Twine("invalid value in ") + 5479 Twine(PALMD::AssemblerDirective)); 5480 } 5481 if (!trySkipToken(AsmToken::Comma)) { 5482 return TokError(Twine("expected an even number of values in ") + 5483 Twine(PALMD::AssemblerDirective)); 5484 } 5485 if (ParseAsAbsoluteExpression(Value)) { 5486 return TokError(Twine("invalid value in ") + 5487 Twine(PALMD::AssemblerDirective)); 5488 } 5489 PALMetadata->setRegister(Key, Value); 5490 if (!trySkipToken(AsmToken::Comma)) 5491 break; 5492 } 5493 return false; 5494 } 5495 5496 /// ParseDirectiveAMDGPULDS 5497 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5498 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5499 if (getParser().checkForValidSection()) 5500 return true; 5501 5502 StringRef Name; 5503 SMLoc NameLoc = getLoc(); 5504 if (getParser().parseIdentifier(Name)) 5505 return TokError("expected identifier in directive"); 5506 5507 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5508 if (parseToken(AsmToken::Comma, "expected ','")) 5509 return true; 5510 5511 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5512 5513 int64_t Size; 5514 SMLoc SizeLoc = getLoc(); 5515 if (getParser().parseAbsoluteExpression(Size)) 5516 return true; 5517 if (Size < 0) 5518 return Error(SizeLoc, "size must be non-negative"); 5519 if (Size > LocalMemorySize) 5520 return Error(SizeLoc, "size is too large"); 5521 5522 int64_t Alignment = 4; 5523 if (trySkipToken(AsmToken::Comma)) { 5524 SMLoc AlignLoc = getLoc(); 5525 if (getParser().parseAbsoluteExpression(Alignment)) 5526 return true; 5527 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5528 return Error(AlignLoc, "alignment must be a power of two"); 5529 5530 // Alignment larger than the size of LDS is possible in theory, as long 5531 // as the linker manages to place to symbol at address 0, but we do want 5532 // to make sure the alignment fits nicely into a 32-bit integer. 5533 if (Alignment >= 1u << 31) 5534 return Error(AlignLoc, "alignment is too large"); 5535 } 5536 5537 if (parseEOL()) 5538 return true; 5539 5540 Symbol->redefineIfPossible(); 5541 if (!Symbol->isUndefined()) 5542 return Error(NameLoc, "invalid symbol redefinition"); 5543 5544 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5545 return false; 5546 } 5547 5548 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5549 StringRef IDVal = DirectiveID.getString(); 5550 5551 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5552 if (IDVal == ".amdhsa_kernel") 5553 return ParseDirectiveAMDHSAKernel(); 5554 5555 // TODO: Restructure/combine with PAL metadata directive. 5556 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5557 return ParseDirectiveHSAMetadata(); 5558 } else { 5559 if (IDVal == ".hsa_code_object_version") 5560 return ParseDirectiveHSACodeObjectVersion(); 5561 5562 if (IDVal == ".hsa_code_object_isa") 5563 return ParseDirectiveHSACodeObjectISA(); 5564 5565 if (IDVal == ".amd_kernel_code_t") 5566 return ParseDirectiveAMDKernelCodeT(); 5567 5568 if (IDVal == ".amdgpu_hsa_kernel") 5569 return ParseDirectiveAMDGPUHsaKernel(); 5570 5571 if (IDVal == ".amd_amdgpu_isa") 5572 return ParseDirectiveISAVersion(); 5573 5574 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5575 return ParseDirectiveHSAMetadata(); 5576 } 5577 5578 if (IDVal == ".amdgcn_target") 5579 return ParseDirectiveAMDGCNTarget(); 5580 5581 if (IDVal == ".amdgpu_lds") 5582 return ParseDirectiveAMDGPULDS(); 5583 5584 if (IDVal == PALMD::AssemblerDirectiveBegin) 5585 return ParseDirectivePALMetadataBegin(); 5586 5587 if (IDVal == PALMD::AssemblerDirective) 5588 return ParseDirectivePALMetadata(); 5589 5590 return true; 5591 } 5592 5593 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5594 unsigned RegNo) { 5595 5596 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5597 return isGFX9Plus(); 5598 5599 // GFX10 has 2 more SGPRs 104 and 105. 5600 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5601 return hasSGPR104_SGPR105(); 5602 5603 switch (RegNo) { 5604 case AMDGPU::SRC_SHARED_BASE: 5605 case AMDGPU::SRC_SHARED_LIMIT: 5606 case AMDGPU::SRC_PRIVATE_BASE: 5607 case AMDGPU::SRC_PRIVATE_LIMIT: 5608 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5609 return isGFX9Plus(); 5610 case AMDGPU::TBA: 5611 case AMDGPU::TBA_LO: 5612 case AMDGPU::TBA_HI: 5613 case AMDGPU::TMA: 5614 case AMDGPU::TMA_LO: 5615 case AMDGPU::TMA_HI: 5616 return !isGFX9Plus(); 5617 case AMDGPU::XNACK_MASK: 5618 case AMDGPU::XNACK_MASK_LO: 5619 case AMDGPU::XNACK_MASK_HI: 5620 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5621 case AMDGPU::SGPR_NULL: 5622 return isGFX10Plus(); 5623 default: 5624 break; 5625 } 5626 5627 if (isCI()) 5628 return true; 5629 5630 if (isSI() || isGFX10Plus()) { 5631 // No flat_scr on SI. 5632 // On GFX10 flat scratch is not a valid register operand and can only be 5633 // accessed with s_setreg/s_getreg. 5634 switch (RegNo) { 5635 case AMDGPU::FLAT_SCR: 5636 case AMDGPU::FLAT_SCR_LO: 5637 case AMDGPU::FLAT_SCR_HI: 5638 return false; 5639 default: 5640 return true; 5641 } 5642 } 5643 5644 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5645 // SI/CI have. 5646 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5647 return hasSGPR102_SGPR103(); 5648 5649 return true; 5650 } 5651 5652 OperandMatchResultTy 5653 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5654 OperandMode Mode) { 5655 // Try to parse with a custom parser 5656 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5657 5658 // If we successfully parsed the operand or if there as an error parsing, 5659 // we are done. 5660 // 5661 // If we are parsing after we reach EndOfStatement then this means we 5662 // are appending default values to the Operands list. This is only done 5663 // by custom parser, so we shouldn't continue on to the generic parsing. 5664 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5665 isToken(AsmToken::EndOfStatement)) 5666 return ResTy; 5667 5668 SMLoc RBraceLoc; 5669 SMLoc LBraceLoc = getLoc(); 5670 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5671 unsigned Prefix = Operands.size(); 5672 5673 for (;;) { 5674 auto Loc = getLoc(); 5675 ResTy = parseReg(Operands); 5676 if (ResTy == MatchOperand_NoMatch) 5677 Error(Loc, "expected a register"); 5678 if (ResTy != MatchOperand_Success) 5679 return MatchOperand_ParseFail; 5680 5681 RBraceLoc = getLoc(); 5682 if (trySkipToken(AsmToken::RBrac)) 5683 break; 5684 5685 if (!skipToken(AsmToken::Comma, 5686 "expected a comma or a closing square bracket")) { 5687 return MatchOperand_ParseFail; 5688 } 5689 } 5690 5691 if (Operands.size() - Prefix > 1) { 5692 Operands.insert(Operands.begin() + Prefix, 5693 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5694 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5695 } 5696 5697 return MatchOperand_Success; 5698 } 5699 5700 return parseRegOrImm(Operands); 5701 } 5702 5703 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5704 // Clear any forced encodings from the previous instruction. 5705 setForcedEncodingSize(0); 5706 setForcedDPP(false); 5707 setForcedSDWA(false); 5708 5709 if (Name.endswith("_e64_dpp")) { 5710 setForcedDPP(true); 5711 setForcedEncodingSize(64); 5712 return Name.substr(0, Name.size() - 8); 5713 } else if (Name.endswith("_e64")) { 5714 setForcedEncodingSize(64); 5715 return Name.substr(0, Name.size() - 4); 5716 } else if (Name.endswith("_e32")) { 5717 setForcedEncodingSize(32); 5718 return Name.substr(0, Name.size() - 4); 5719 } else if (Name.endswith("_dpp")) { 5720 setForcedDPP(true); 5721 return Name.substr(0, Name.size() - 4); 5722 } else if (Name.endswith("_sdwa")) { 5723 setForcedSDWA(true); 5724 return Name.substr(0, Name.size() - 5); 5725 } 5726 return Name; 5727 } 5728 5729 static void applyMnemonicAliases(StringRef &Mnemonic, 5730 const FeatureBitset &Features, 5731 unsigned VariantID); 5732 5733 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5734 StringRef Name, 5735 SMLoc NameLoc, OperandVector &Operands) { 5736 // Add the instruction mnemonic 5737 Name = parseMnemonicSuffix(Name); 5738 5739 // If the target architecture uses MnemonicAlias, call it here to parse 5740 // operands correctly. 5741 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 5742 5743 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5744 5745 bool IsMIMG = Name.startswith("image_"); 5746 5747 while (!trySkipToken(AsmToken::EndOfStatement)) { 5748 OperandMode Mode = OperandMode_Default; 5749 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5750 Mode = OperandMode_NSA; 5751 CPolSeen = 0; 5752 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5753 5754 if (Res != MatchOperand_Success) { 5755 checkUnsupportedInstruction(Name, NameLoc); 5756 if (!Parser.hasPendingError()) { 5757 // FIXME: use real operand location rather than the current location. 5758 StringRef Msg = 5759 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5760 "not a valid operand."; 5761 Error(getLoc(), Msg); 5762 } 5763 while (!trySkipToken(AsmToken::EndOfStatement)) { 5764 lex(); 5765 } 5766 return true; 5767 } 5768 5769 // Eat the comma or space if there is one. 5770 trySkipToken(AsmToken::Comma); 5771 } 5772 5773 return false; 5774 } 5775 5776 //===----------------------------------------------------------------------===// 5777 // Utility functions 5778 //===----------------------------------------------------------------------===// 5779 5780 OperandMatchResultTy 5781 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5782 5783 if (!trySkipId(Prefix, AsmToken::Colon)) 5784 return MatchOperand_NoMatch; 5785 5786 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5787 } 5788 5789 OperandMatchResultTy 5790 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5791 AMDGPUOperand::ImmTy ImmTy, 5792 bool (*ConvertResult)(int64_t&)) { 5793 SMLoc S = getLoc(); 5794 int64_t Value = 0; 5795 5796 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5797 if (Res != MatchOperand_Success) 5798 return Res; 5799 5800 if (ConvertResult && !ConvertResult(Value)) { 5801 Error(S, "invalid " + StringRef(Prefix) + " value."); 5802 } 5803 5804 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5805 return MatchOperand_Success; 5806 } 5807 5808 OperandMatchResultTy 5809 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5810 OperandVector &Operands, 5811 AMDGPUOperand::ImmTy ImmTy, 5812 bool (*ConvertResult)(int64_t&)) { 5813 SMLoc S = getLoc(); 5814 if (!trySkipId(Prefix, AsmToken::Colon)) 5815 return MatchOperand_NoMatch; 5816 5817 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5818 return MatchOperand_ParseFail; 5819 5820 unsigned Val = 0; 5821 const unsigned MaxSize = 4; 5822 5823 // FIXME: How to verify the number of elements matches the number of src 5824 // operands? 5825 for (int I = 0; ; ++I) { 5826 int64_t Op; 5827 SMLoc Loc = getLoc(); 5828 if (!parseExpr(Op)) 5829 return MatchOperand_ParseFail; 5830 5831 if (Op != 0 && Op != 1) { 5832 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5833 return MatchOperand_ParseFail; 5834 } 5835 5836 Val |= (Op << I); 5837 5838 if (trySkipToken(AsmToken::RBrac)) 5839 break; 5840 5841 if (I + 1 == MaxSize) { 5842 Error(getLoc(), "expected a closing square bracket"); 5843 return MatchOperand_ParseFail; 5844 } 5845 5846 if (!skipToken(AsmToken::Comma, "expected a comma")) 5847 return MatchOperand_ParseFail; 5848 } 5849 5850 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5851 return MatchOperand_Success; 5852 } 5853 5854 OperandMatchResultTy 5855 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5856 AMDGPUOperand::ImmTy ImmTy) { 5857 int64_t Bit; 5858 SMLoc S = getLoc(); 5859 5860 if (trySkipId(Name)) { 5861 Bit = 1; 5862 } else if (trySkipId("no", Name)) { 5863 Bit = 0; 5864 } else { 5865 return MatchOperand_NoMatch; 5866 } 5867 5868 if (Name == "r128" && !hasMIMG_R128()) { 5869 Error(S, "r128 modifier is not supported on this GPU"); 5870 return MatchOperand_ParseFail; 5871 } 5872 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5873 Error(S, "a16 modifier is not supported on this GPU"); 5874 return MatchOperand_ParseFail; 5875 } 5876 5877 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5878 ImmTy = AMDGPUOperand::ImmTyR128A16; 5879 5880 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5881 return MatchOperand_Success; 5882 } 5883 5884 OperandMatchResultTy 5885 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5886 unsigned CPolOn = 0; 5887 unsigned CPolOff = 0; 5888 SMLoc S = getLoc(); 5889 5890 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5891 if (isGFX940() && !Mnemo.startswith("s_")) { 5892 if (trySkipId("sc0")) 5893 CPolOn = AMDGPU::CPol::SC0; 5894 else if (trySkipId("nosc0")) 5895 CPolOff = AMDGPU::CPol::SC0; 5896 else if (trySkipId("nt")) 5897 CPolOn = AMDGPU::CPol::NT; 5898 else if (trySkipId("nont")) 5899 CPolOff = AMDGPU::CPol::NT; 5900 else if (trySkipId("sc1")) 5901 CPolOn = AMDGPU::CPol::SC1; 5902 else if (trySkipId("nosc1")) 5903 CPolOff = AMDGPU::CPol::SC1; 5904 else 5905 return MatchOperand_NoMatch; 5906 } 5907 else if (trySkipId("glc")) 5908 CPolOn = AMDGPU::CPol::GLC; 5909 else if (trySkipId("noglc")) 5910 CPolOff = AMDGPU::CPol::GLC; 5911 else if (trySkipId("slc")) 5912 CPolOn = AMDGPU::CPol::SLC; 5913 else if (trySkipId("noslc")) 5914 CPolOff = AMDGPU::CPol::SLC; 5915 else if (trySkipId("dlc")) 5916 CPolOn = AMDGPU::CPol::DLC; 5917 else if (trySkipId("nodlc")) 5918 CPolOff = AMDGPU::CPol::DLC; 5919 else if (trySkipId("scc")) 5920 CPolOn = AMDGPU::CPol::SCC; 5921 else if (trySkipId("noscc")) 5922 CPolOff = AMDGPU::CPol::SCC; 5923 else 5924 return MatchOperand_NoMatch; 5925 5926 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5927 Error(S, "dlc modifier is not supported on this GPU"); 5928 return MatchOperand_ParseFail; 5929 } 5930 5931 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5932 Error(S, "scc modifier is not supported on this GPU"); 5933 return MatchOperand_ParseFail; 5934 } 5935 5936 if (CPolSeen & (CPolOn | CPolOff)) { 5937 Error(S, "duplicate cache policy modifier"); 5938 return MatchOperand_ParseFail; 5939 } 5940 5941 CPolSeen |= (CPolOn | CPolOff); 5942 5943 for (unsigned I = 1; I != Operands.size(); ++I) { 5944 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5945 if (Op.isCPol()) { 5946 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5947 return MatchOperand_Success; 5948 } 5949 } 5950 5951 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5952 AMDGPUOperand::ImmTyCPol)); 5953 5954 return MatchOperand_Success; 5955 } 5956 5957 static void addOptionalImmOperand( 5958 MCInst& Inst, const OperandVector& Operands, 5959 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5960 AMDGPUOperand::ImmTy ImmT, 5961 int64_t Default = 0) { 5962 auto i = OptionalIdx.find(ImmT); 5963 if (i != OptionalIdx.end()) { 5964 unsigned Idx = i->second; 5965 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5966 } else { 5967 Inst.addOperand(MCOperand::createImm(Default)); 5968 } 5969 } 5970 5971 OperandMatchResultTy 5972 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5973 StringRef &Value, 5974 SMLoc &StringLoc) { 5975 if (!trySkipId(Prefix, AsmToken::Colon)) 5976 return MatchOperand_NoMatch; 5977 5978 StringLoc = getLoc(); 5979 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5980 : MatchOperand_ParseFail; 5981 } 5982 5983 //===----------------------------------------------------------------------===// 5984 // MTBUF format 5985 //===----------------------------------------------------------------------===// 5986 5987 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5988 int64_t MaxVal, 5989 int64_t &Fmt) { 5990 int64_t Val; 5991 SMLoc Loc = getLoc(); 5992 5993 auto Res = parseIntWithPrefix(Pref, Val); 5994 if (Res == MatchOperand_ParseFail) 5995 return false; 5996 if (Res == MatchOperand_NoMatch) 5997 return true; 5998 5999 if (Val < 0 || Val > MaxVal) { 6000 Error(Loc, Twine("out of range ", StringRef(Pref))); 6001 return false; 6002 } 6003 6004 Fmt = Val; 6005 return true; 6006 } 6007 6008 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 6009 // values to live in a joint format operand in the MCInst encoding. 6010 OperandMatchResultTy 6011 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 6012 using namespace llvm::AMDGPU::MTBUFFormat; 6013 6014 int64_t Dfmt = DFMT_UNDEF; 6015 int64_t Nfmt = NFMT_UNDEF; 6016 6017 // dfmt and nfmt can appear in either order, and each is optional. 6018 for (int I = 0; I < 2; ++I) { 6019 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 6020 return MatchOperand_ParseFail; 6021 6022 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 6023 return MatchOperand_ParseFail; 6024 } 6025 // Skip optional comma between dfmt/nfmt 6026 // but guard against 2 commas following each other. 6027 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 6028 !peekToken().is(AsmToken::Comma)) { 6029 trySkipToken(AsmToken::Comma); 6030 } 6031 } 6032 6033 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 6034 return MatchOperand_NoMatch; 6035 6036 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6037 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6038 6039 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6040 return MatchOperand_Success; 6041 } 6042 6043 OperandMatchResultTy 6044 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 6045 using namespace llvm::AMDGPU::MTBUFFormat; 6046 6047 int64_t Fmt = UFMT_UNDEF; 6048 6049 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 6050 return MatchOperand_ParseFail; 6051 6052 if (Fmt == UFMT_UNDEF) 6053 return MatchOperand_NoMatch; 6054 6055 Format = Fmt; 6056 return MatchOperand_Success; 6057 } 6058 6059 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6060 int64_t &Nfmt, 6061 StringRef FormatStr, 6062 SMLoc Loc) { 6063 using namespace llvm::AMDGPU::MTBUFFormat; 6064 int64_t Format; 6065 6066 Format = getDfmt(FormatStr); 6067 if (Format != DFMT_UNDEF) { 6068 Dfmt = Format; 6069 return true; 6070 } 6071 6072 Format = getNfmt(FormatStr, getSTI()); 6073 if (Format != NFMT_UNDEF) { 6074 Nfmt = Format; 6075 return true; 6076 } 6077 6078 Error(Loc, "unsupported format"); 6079 return false; 6080 } 6081 6082 OperandMatchResultTy 6083 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6084 SMLoc FormatLoc, 6085 int64_t &Format) { 6086 using namespace llvm::AMDGPU::MTBUFFormat; 6087 6088 int64_t Dfmt = DFMT_UNDEF; 6089 int64_t Nfmt = NFMT_UNDEF; 6090 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6091 return MatchOperand_ParseFail; 6092 6093 if (trySkipToken(AsmToken::Comma)) { 6094 StringRef Str; 6095 SMLoc Loc = getLoc(); 6096 if (!parseId(Str, "expected a format string") || 6097 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 6098 return MatchOperand_ParseFail; 6099 } 6100 if (Dfmt == DFMT_UNDEF) { 6101 Error(Loc, "duplicate numeric format"); 6102 return MatchOperand_ParseFail; 6103 } else if (Nfmt == NFMT_UNDEF) { 6104 Error(Loc, "duplicate data format"); 6105 return MatchOperand_ParseFail; 6106 } 6107 } 6108 6109 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6110 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6111 6112 if (isGFX10Plus()) { 6113 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6114 if (Ufmt == UFMT_UNDEF) { 6115 Error(FormatLoc, "unsupported format"); 6116 return MatchOperand_ParseFail; 6117 } 6118 Format = Ufmt; 6119 } else { 6120 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6121 } 6122 6123 return MatchOperand_Success; 6124 } 6125 6126 OperandMatchResultTy 6127 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6128 SMLoc Loc, 6129 int64_t &Format) { 6130 using namespace llvm::AMDGPU::MTBUFFormat; 6131 6132 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6133 if (Id == UFMT_UNDEF) 6134 return MatchOperand_NoMatch; 6135 6136 if (!isGFX10Plus()) { 6137 Error(Loc, "unified format is not supported on this GPU"); 6138 return MatchOperand_ParseFail; 6139 } 6140 6141 Format = Id; 6142 return MatchOperand_Success; 6143 } 6144 6145 OperandMatchResultTy 6146 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6147 using namespace llvm::AMDGPU::MTBUFFormat; 6148 SMLoc Loc = getLoc(); 6149 6150 if (!parseExpr(Format)) 6151 return MatchOperand_ParseFail; 6152 if (!isValidFormatEncoding(Format, getSTI())) { 6153 Error(Loc, "out of range format"); 6154 return MatchOperand_ParseFail; 6155 } 6156 6157 return MatchOperand_Success; 6158 } 6159 6160 OperandMatchResultTy 6161 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6162 using namespace llvm::AMDGPU::MTBUFFormat; 6163 6164 if (!trySkipId("format", AsmToken::Colon)) 6165 return MatchOperand_NoMatch; 6166 6167 if (trySkipToken(AsmToken::LBrac)) { 6168 StringRef FormatStr; 6169 SMLoc Loc = getLoc(); 6170 if (!parseId(FormatStr, "expected a format string")) 6171 return MatchOperand_ParseFail; 6172 6173 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6174 if (Res == MatchOperand_NoMatch) 6175 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6176 if (Res != MatchOperand_Success) 6177 return Res; 6178 6179 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6180 return MatchOperand_ParseFail; 6181 6182 return MatchOperand_Success; 6183 } 6184 6185 return parseNumericFormat(Format); 6186 } 6187 6188 OperandMatchResultTy 6189 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6190 using namespace llvm::AMDGPU::MTBUFFormat; 6191 6192 int64_t Format = getDefaultFormatEncoding(getSTI()); 6193 OperandMatchResultTy Res; 6194 SMLoc Loc = getLoc(); 6195 6196 // Parse legacy format syntax. 6197 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6198 if (Res == MatchOperand_ParseFail) 6199 return Res; 6200 6201 bool FormatFound = (Res == MatchOperand_Success); 6202 6203 Operands.push_back( 6204 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6205 6206 if (FormatFound) 6207 trySkipToken(AsmToken::Comma); 6208 6209 if (isToken(AsmToken::EndOfStatement)) { 6210 // We are expecting an soffset operand, 6211 // but let matcher handle the error. 6212 return MatchOperand_Success; 6213 } 6214 6215 // Parse soffset. 6216 Res = parseRegOrImm(Operands); 6217 if (Res != MatchOperand_Success) 6218 return Res; 6219 6220 trySkipToken(AsmToken::Comma); 6221 6222 if (!FormatFound) { 6223 Res = parseSymbolicOrNumericFormat(Format); 6224 if (Res == MatchOperand_ParseFail) 6225 return Res; 6226 if (Res == MatchOperand_Success) { 6227 auto Size = Operands.size(); 6228 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6229 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6230 Op.setImm(Format); 6231 } 6232 return MatchOperand_Success; 6233 } 6234 6235 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6236 Error(getLoc(), "duplicate format"); 6237 return MatchOperand_ParseFail; 6238 } 6239 return MatchOperand_Success; 6240 } 6241 6242 //===----------------------------------------------------------------------===// 6243 // ds 6244 //===----------------------------------------------------------------------===// 6245 6246 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6247 const OperandVector &Operands) { 6248 OptionalImmIndexMap OptionalIdx; 6249 6250 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6251 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6252 6253 // Add the register arguments 6254 if (Op.isReg()) { 6255 Op.addRegOperands(Inst, 1); 6256 continue; 6257 } 6258 6259 // Handle optional arguments 6260 OptionalIdx[Op.getImmTy()] = i; 6261 } 6262 6263 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6264 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6265 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6266 6267 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6268 } 6269 6270 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6271 bool IsGdsHardcoded) { 6272 OptionalImmIndexMap OptionalIdx; 6273 6274 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6275 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6276 6277 // Add the register arguments 6278 if (Op.isReg()) { 6279 Op.addRegOperands(Inst, 1); 6280 continue; 6281 } 6282 6283 if (Op.isToken() && Op.getToken() == "gds") { 6284 IsGdsHardcoded = true; 6285 continue; 6286 } 6287 6288 // Handle optional arguments 6289 OptionalIdx[Op.getImmTy()] = i; 6290 } 6291 6292 AMDGPUOperand::ImmTy OffsetType = 6293 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6294 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6295 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6296 AMDGPUOperand::ImmTyOffset; 6297 6298 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6299 6300 if (!IsGdsHardcoded) { 6301 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6302 } 6303 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6304 } 6305 6306 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6307 OptionalImmIndexMap OptionalIdx; 6308 6309 unsigned OperandIdx[4]; 6310 unsigned EnMask = 0; 6311 int SrcIdx = 0; 6312 6313 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6314 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6315 6316 // Add the register arguments 6317 if (Op.isReg()) { 6318 assert(SrcIdx < 4); 6319 OperandIdx[SrcIdx] = Inst.size(); 6320 Op.addRegOperands(Inst, 1); 6321 ++SrcIdx; 6322 continue; 6323 } 6324 6325 if (Op.isOff()) { 6326 assert(SrcIdx < 4); 6327 OperandIdx[SrcIdx] = Inst.size(); 6328 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6329 ++SrcIdx; 6330 continue; 6331 } 6332 6333 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6334 Op.addImmOperands(Inst, 1); 6335 continue; 6336 } 6337 6338 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 6339 continue; 6340 6341 // Handle optional arguments 6342 OptionalIdx[Op.getImmTy()] = i; 6343 } 6344 6345 assert(SrcIdx == 4); 6346 6347 bool Compr = false; 6348 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6349 Compr = true; 6350 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6351 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6352 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6353 } 6354 6355 for (auto i = 0; i < SrcIdx; ++i) { 6356 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6357 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6358 } 6359 } 6360 6361 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6362 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6363 6364 Inst.addOperand(MCOperand::createImm(EnMask)); 6365 } 6366 6367 //===----------------------------------------------------------------------===// 6368 // s_waitcnt 6369 //===----------------------------------------------------------------------===// 6370 6371 static bool 6372 encodeCnt( 6373 const AMDGPU::IsaVersion ISA, 6374 int64_t &IntVal, 6375 int64_t CntVal, 6376 bool Saturate, 6377 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6378 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6379 { 6380 bool Failed = false; 6381 6382 IntVal = encode(ISA, IntVal, CntVal); 6383 if (CntVal != decode(ISA, IntVal)) { 6384 if (Saturate) { 6385 IntVal = encode(ISA, IntVal, -1); 6386 } else { 6387 Failed = true; 6388 } 6389 } 6390 return Failed; 6391 } 6392 6393 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6394 6395 SMLoc CntLoc = getLoc(); 6396 StringRef CntName = getTokenStr(); 6397 6398 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6399 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6400 return false; 6401 6402 int64_t CntVal; 6403 SMLoc ValLoc = getLoc(); 6404 if (!parseExpr(CntVal)) 6405 return false; 6406 6407 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6408 6409 bool Failed = true; 6410 bool Sat = CntName.endswith("_sat"); 6411 6412 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6413 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6414 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6415 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6416 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6417 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6418 } else { 6419 Error(CntLoc, "invalid counter name " + CntName); 6420 return false; 6421 } 6422 6423 if (Failed) { 6424 Error(ValLoc, "too large value for " + CntName); 6425 return false; 6426 } 6427 6428 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6429 return false; 6430 6431 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6432 if (isToken(AsmToken::EndOfStatement)) { 6433 Error(getLoc(), "expected a counter name"); 6434 return false; 6435 } 6436 } 6437 6438 return true; 6439 } 6440 6441 OperandMatchResultTy 6442 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6443 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6444 int64_t Waitcnt = getWaitcntBitMask(ISA); 6445 SMLoc S = getLoc(); 6446 6447 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6448 while (!isToken(AsmToken::EndOfStatement)) { 6449 if (!parseCnt(Waitcnt)) 6450 return MatchOperand_ParseFail; 6451 } 6452 } else { 6453 if (!parseExpr(Waitcnt)) 6454 return MatchOperand_ParseFail; 6455 } 6456 6457 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6458 return MatchOperand_Success; 6459 } 6460 6461 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6462 SMLoc FieldLoc = getLoc(); 6463 StringRef FieldName = getTokenStr(); 6464 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6465 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6466 return false; 6467 6468 SMLoc ValueLoc = getLoc(); 6469 StringRef ValueName = getTokenStr(); 6470 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6471 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6472 return false; 6473 6474 unsigned Shift; 6475 if (FieldName == "instid0") { 6476 Shift = 0; 6477 } else if (FieldName == "instskip") { 6478 Shift = 4; 6479 } else if (FieldName == "instid1") { 6480 Shift = 7; 6481 } else { 6482 Error(FieldLoc, "invalid field name " + FieldName); 6483 return false; 6484 } 6485 6486 int Value; 6487 if (Shift == 4) { 6488 // Parse values for instskip. 6489 Value = StringSwitch<int>(ValueName) 6490 .Case("SAME", 0) 6491 .Case("NEXT", 1) 6492 .Case("SKIP_1", 2) 6493 .Case("SKIP_2", 3) 6494 .Case("SKIP_3", 4) 6495 .Case("SKIP_4", 5) 6496 .Default(-1); 6497 } else { 6498 // Parse values for instid0 and instid1. 6499 Value = StringSwitch<int>(ValueName) 6500 .Case("NO_DEP", 0) 6501 .Case("VALU_DEP_1", 1) 6502 .Case("VALU_DEP_2", 2) 6503 .Case("VALU_DEP_3", 3) 6504 .Case("VALU_DEP_4", 4) 6505 .Case("TRANS32_DEP_1", 5) 6506 .Case("TRANS32_DEP_2", 6) 6507 .Case("TRANS32_DEP_3", 7) 6508 .Case("FMA_ACCUM_CYCLE_1", 8) 6509 .Case("SALU_CYCLE_1", 9) 6510 .Case("SALU_CYCLE_2", 10) 6511 .Case("SALU_CYCLE_3", 11) 6512 .Default(-1); 6513 } 6514 if (Value < 0) { 6515 Error(ValueLoc, "invalid value name " + ValueName); 6516 return false; 6517 } 6518 6519 Delay |= Value << Shift; 6520 return true; 6521 } 6522 6523 OperandMatchResultTy 6524 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) { 6525 int64_t Delay = 0; 6526 SMLoc S = getLoc(); 6527 6528 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6529 do { 6530 if (!parseDelay(Delay)) 6531 return MatchOperand_ParseFail; 6532 } while (trySkipToken(AsmToken::Pipe)); 6533 } else { 6534 if (!parseExpr(Delay)) 6535 return MatchOperand_ParseFail; 6536 } 6537 6538 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6539 return MatchOperand_Success; 6540 } 6541 6542 bool 6543 AMDGPUOperand::isSWaitCnt() const { 6544 return isImm(); 6545 } 6546 6547 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); } 6548 6549 //===----------------------------------------------------------------------===// 6550 // DepCtr 6551 //===----------------------------------------------------------------------===// 6552 6553 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6554 StringRef DepCtrName) { 6555 switch (ErrorId) { 6556 case OPR_ID_UNKNOWN: 6557 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6558 return; 6559 case OPR_ID_UNSUPPORTED: 6560 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6561 return; 6562 case OPR_ID_DUPLICATE: 6563 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6564 return; 6565 case OPR_VAL_INVALID: 6566 Error(Loc, Twine("invalid value for ", DepCtrName)); 6567 return; 6568 default: 6569 assert(false); 6570 } 6571 } 6572 6573 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6574 6575 using namespace llvm::AMDGPU::DepCtr; 6576 6577 SMLoc DepCtrLoc = getLoc(); 6578 StringRef DepCtrName = getTokenStr(); 6579 6580 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6581 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6582 return false; 6583 6584 int64_t ExprVal; 6585 if (!parseExpr(ExprVal)) 6586 return false; 6587 6588 unsigned PrevOprMask = UsedOprMask; 6589 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6590 6591 if (CntVal < 0) { 6592 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6593 return false; 6594 } 6595 6596 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6597 return false; 6598 6599 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6600 if (isToken(AsmToken::EndOfStatement)) { 6601 Error(getLoc(), "expected a counter name"); 6602 return false; 6603 } 6604 } 6605 6606 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6607 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6608 return true; 6609 } 6610 6611 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6612 using namespace llvm::AMDGPU::DepCtr; 6613 6614 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6615 SMLoc Loc = getLoc(); 6616 6617 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6618 unsigned UsedOprMask = 0; 6619 while (!isToken(AsmToken::EndOfStatement)) { 6620 if (!parseDepCtr(DepCtr, UsedOprMask)) 6621 return MatchOperand_ParseFail; 6622 } 6623 } else { 6624 if (!parseExpr(DepCtr)) 6625 return MatchOperand_ParseFail; 6626 } 6627 6628 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6629 return MatchOperand_Success; 6630 } 6631 6632 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6633 6634 //===----------------------------------------------------------------------===// 6635 // hwreg 6636 //===----------------------------------------------------------------------===// 6637 6638 bool 6639 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6640 OperandInfoTy &Offset, 6641 OperandInfoTy &Width) { 6642 using namespace llvm::AMDGPU::Hwreg; 6643 6644 // The register may be specified by name or using a numeric code 6645 HwReg.Loc = getLoc(); 6646 if (isToken(AsmToken::Identifier) && 6647 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6648 HwReg.IsSymbolic = true; 6649 lex(); // skip register name 6650 } else if (!parseExpr(HwReg.Id, "a register name")) { 6651 return false; 6652 } 6653 6654 if (trySkipToken(AsmToken::RParen)) 6655 return true; 6656 6657 // parse optional params 6658 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6659 return false; 6660 6661 Offset.Loc = getLoc(); 6662 if (!parseExpr(Offset.Id)) 6663 return false; 6664 6665 if (!skipToken(AsmToken::Comma, "expected a comma")) 6666 return false; 6667 6668 Width.Loc = getLoc(); 6669 return parseExpr(Width.Id) && 6670 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6671 } 6672 6673 bool 6674 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6675 const OperandInfoTy &Offset, 6676 const OperandInfoTy &Width) { 6677 6678 using namespace llvm::AMDGPU::Hwreg; 6679 6680 if (HwReg.IsSymbolic) { 6681 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6682 Error(HwReg.Loc, 6683 "specified hardware register is not supported on this GPU"); 6684 return false; 6685 } 6686 } else { 6687 if (!isValidHwreg(HwReg.Id)) { 6688 Error(HwReg.Loc, 6689 "invalid code of hardware register: only 6-bit values are legal"); 6690 return false; 6691 } 6692 } 6693 if (!isValidHwregOffset(Offset.Id)) { 6694 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6695 return false; 6696 } 6697 if (!isValidHwregWidth(Width.Id)) { 6698 Error(Width.Loc, 6699 "invalid bitfield width: only values from 1 to 32 are legal"); 6700 return false; 6701 } 6702 return true; 6703 } 6704 6705 OperandMatchResultTy 6706 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6707 using namespace llvm::AMDGPU::Hwreg; 6708 6709 int64_t ImmVal = 0; 6710 SMLoc Loc = getLoc(); 6711 6712 if (trySkipId("hwreg", AsmToken::LParen)) { 6713 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6714 OperandInfoTy Offset(OFFSET_DEFAULT_); 6715 OperandInfoTy Width(WIDTH_DEFAULT_); 6716 if (parseHwregBody(HwReg, Offset, Width) && 6717 validateHwreg(HwReg, Offset, Width)) { 6718 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6719 } else { 6720 return MatchOperand_ParseFail; 6721 } 6722 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6723 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6724 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6725 return MatchOperand_ParseFail; 6726 } 6727 } else { 6728 return MatchOperand_ParseFail; 6729 } 6730 6731 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6732 return MatchOperand_Success; 6733 } 6734 6735 bool AMDGPUOperand::isHwreg() const { 6736 return isImmTy(ImmTyHwreg); 6737 } 6738 6739 //===----------------------------------------------------------------------===// 6740 // sendmsg 6741 //===----------------------------------------------------------------------===// 6742 6743 bool 6744 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6745 OperandInfoTy &Op, 6746 OperandInfoTy &Stream) { 6747 using namespace llvm::AMDGPU::SendMsg; 6748 6749 Msg.Loc = getLoc(); 6750 if (isToken(AsmToken::Identifier) && 6751 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6752 Msg.IsSymbolic = true; 6753 lex(); // skip message name 6754 } else if (!parseExpr(Msg.Id, "a message name")) { 6755 return false; 6756 } 6757 6758 if (trySkipToken(AsmToken::Comma)) { 6759 Op.IsDefined = true; 6760 Op.Loc = getLoc(); 6761 if (isToken(AsmToken::Identifier) && 6762 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6763 lex(); // skip operation name 6764 } else if (!parseExpr(Op.Id, "an operation name")) { 6765 return false; 6766 } 6767 6768 if (trySkipToken(AsmToken::Comma)) { 6769 Stream.IsDefined = true; 6770 Stream.Loc = getLoc(); 6771 if (!parseExpr(Stream.Id)) 6772 return false; 6773 } 6774 } 6775 6776 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6777 } 6778 6779 bool 6780 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6781 const OperandInfoTy &Op, 6782 const OperandInfoTy &Stream) { 6783 using namespace llvm::AMDGPU::SendMsg; 6784 6785 // Validation strictness depends on whether message is specified 6786 // in a symbolic or in a numeric form. In the latter case 6787 // only encoding possibility is checked. 6788 bool Strict = Msg.IsSymbolic; 6789 6790 if (Strict) { 6791 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6792 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6793 return false; 6794 } 6795 } else { 6796 if (!isValidMsgId(Msg.Id, getSTI())) { 6797 Error(Msg.Loc, "invalid message id"); 6798 return false; 6799 } 6800 } 6801 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 6802 if (Op.IsDefined) { 6803 Error(Op.Loc, "message does not support operations"); 6804 } else { 6805 Error(Msg.Loc, "missing message operation"); 6806 } 6807 return false; 6808 } 6809 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6810 Error(Op.Loc, "invalid operation id"); 6811 return false; 6812 } 6813 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 6814 Stream.IsDefined) { 6815 Error(Stream.Loc, "message operation does not support streams"); 6816 return false; 6817 } 6818 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6819 Error(Stream.Loc, "invalid message stream id"); 6820 return false; 6821 } 6822 return true; 6823 } 6824 6825 OperandMatchResultTy 6826 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6827 using namespace llvm::AMDGPU::SendMsg; 6828 6829 int64_t ImmVal = 0; 6830 SMLoc Loc = getLoc(); 6831 6832 if (trySkipId("sendmsg", AsmToken::LParen)) { 6833 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6834 OperandInfoTy Op(OP_NONE_); 6835 OperandInfoTy Stream(STREAM_ID_NONE_); 6836 if (parseSendMsgBody(Msg, Op, Stream) && 6837 validateSendMsg(Msg, Op, Stream)) { 6838 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6839 } else { 6840 return MatchOperand_ParseFail; 6841 } 6842 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6843 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6844 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6845 return MatchOperand_ParseFail; 6846 } 6847 } else { 6848 return MatchOperand_ParseFail; 6849 } 6850 6851 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6852 return MatchOperand_Success; 6853 } 6854 6855 bool AMDGPUOperand::isSendMsg() const { 6856 return isImmTy(ImmTySendMsg); 6857 } 6858 6859 //===----------------------------------------------------------------------===// 6860 // v_interp 6861 //===----------------------------------------------------------------------===// 6862 6863 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6864 StringRef Str; 6865 SMLoc S = getLoc(); 6866 6867 if (!parseId(Str)) 6868 return MatchOperand_NoMatch; 6869 6870 int Slot = StringSwitch<int>(Str) 6871 .Case("p10", 0) 6872 .Case("p20", 1) 6873 .Case("p0", 2) 6874 .Default(-1); 6875 6876 if (Slot == -1) { 6877 Error(S, "invalid interpolation slot"); 6878 return MatchOperand_ParseFail; 6879 } 6880 6881 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6882 AMDGPUOperand::ImmTyInterpSlot)); 6883 return MatchOperand_Success; 6884 } 6885 6886 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6887 StringRef Str; 6888 SMLoc S = getLoc(); 6889 6890 if (!parseId(Str)) 6891 return MatchOperand_NoMatch; 6892 6893 if (!Str.startswith("attr")) { 6894 Error(S, "invalid interpolation attribute"); 6895 return MatchOperand_ParseFail; 6896 } 6897 6898 StringRef Chan = Str.take_back(2); 6899 int AttrChan = StringSwitch<int>(Chan) 6900 .Case(".x", 0) 6901 .Case(".y", 1) 6902 .Case(".z", 2) 6903 .Case(".w", 3) 6904 .Default(-1); 6905 if (AttrChan == -1) { 6906 Error(S, "invalid or missing interpolation attribute channel"); 6907 return MatchOperand_ParseFail; 6908 } 6909 6910 Str = Str.drop_back(2).drop_front(4); 6911 6912 uint8_t Attr; 6913 if (Str.getAsInteger(10, Attr)) { 6914 Error(S, "invalid or missing interpolation attribute number"); 6915 return MatchOperand_ParseFail; 6916 } 6917 6918 if (Attr > 63) { 6919 Error(S, "out of bounds interpolation attribute number"); 6920 return MatchOperand_ParseFail; 6921 } 6922 6923 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6924 6925 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6926 AMDGPUOperand::ImmTyInterpAttr)); 6927 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6928 AMDGPUOperand::ImmTyAttrChan)); 6929 return MatchOperand_Success; 6930 } 6931 6932 //===----------------------------------------------------------------------===// 6933 // exp 6934 //===----------------------------------------------------------------------===// 6935 6936 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6937 using namespace llvm::AMDGPU::Exp; 6938 6939 StringRef Str; 6940 SMLoc S = getLoc(); 6941 6942 if (!parseId(Str)) 6943 return MatchOperand_NoMatch; 6944 6945 unsigned Id = getTgtId(Str); 6946 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6947 Error(S, (Id == ET_INVALID) ? 6948 "invalid exp target" : 6949 "exp target is not supported on this GPU"); 6950 return MatchOperand_ParseFail; 6951 } 6952 6953 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6954 AMDGPUOperand::ImmTyExpTgt)); 6955 return MatchOperand_Success; 6956 } 6957 6958 //===----------------------------------------------------------------------===// 6959 // parser helpers 6960 //===----------------------------------------------------------------------===// 6961 6962 bool 6963 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6964 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6965 } 6966 6967 bool 6968 AMDGPUAsmParser::isId(const StringRef Id) const { 6969 return isId(getToken(), Id); 6970 } 6971 6972 bool 6973 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6974 return getTokenKind() == Kind; 6975 } 6976 6977 bool 6978 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6979 if (isId(Id)) { 6980 lex(); 6981 return true; 6982 } 6983 return false; 6984 } 6985 6986 bool 6987 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6988 if (isToken(AsmToken::Identifier)) { 6989 StringRef Tok = getTokenStr(); 6990 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6991 lex(); 6992 return true; 6993 } 6994 } 6995 return false; 6996 } 6997 6998 bool 6999 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 7000 if (isId(Id) && peekToken().is(Kind)) { 7001 lex(); 7002 lex(); 7003 return true; 7004 } 7005 return false; 7006 } 7007 7008 bool 7009 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 7010 if (isToken(Kind)) { 7011 lex(); 7012 return true; 7013 } 7014 return false; 7015 } 7016 7017 bool 7018 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 7019 const StringRef ErrMsg) { 7020 if (!trySkipToken(Kind)) { 7021 Error(getLoc(), ErrMsg); 7022 return false; 7023 } 7024 return true; 7025 } 7026 7027 bool 7028 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 7029 SMLoc S = getLoc(); 7030 7031 const MCExpr *Expr; 7032 if (Parser.parseExpression(Expr)) 7033 return false; 7034 7035 if (Expr->evaluateAsAbsolute(Imm)) 7036 return true; 7037 7038 if (Expected.empty()) { 7039 Error(S, "expected absolute expression"); 7040 } else { 7041 Error(S, Twine("expected ", Expected) + 7042 Twine(" or an absolute expression")); 7043 } 7044 return false; 7045 } 7046 7047 bool 7048 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 7049 SMLoc S = getLoc(); 7050 7051 const MCExpr *Expr; 7052 if (Parser.parseExpression(Expr)) 7053 return false; 7054 7055 int64_t IntVal; 7056 if (Expr->evaluateAsAbsolute(IntVal)) { 7057 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7058 } else { 7059 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7060 } 7061 return true; 7062 } 7063 7064 bool 7065 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7066 if (isToken(AsmToken::String)) { 7067 Val = getToken().getStringContents(); 7068 lex(); 7069 return true; 7070 } else { 7071 Error(getLoc(), ErrMsg); 7072 return false; 7073 } 7074 } 7075 7076 bool 7077 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7078 if (isToken(AsmToken::Identifier)) { 7079 Val = getTokenStr(); 7080 lex(); 7081 return true; 7082 } else { 7083 if (!ErrMsg.empty()) 7084 Error(getLoc(), ErrMsg); 7085 return false; 7086 } 7087 } 7088 7089 AsmToken 7090 AMDGPUAsmParser::getToken() const { 7091 return Parser.getTok(); 7092 } 7093 7094 AsmToken 7095 AMDGPUAsmParser::peekToken() { 7096 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 7097 } 7098 7099 void 7100 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7101 auto TokCount = getLexer().peekTokens(Tokens); 7102 7103 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7104 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7105 } 7106 7107 AsmToken::TokenKind 7108 AMDGPUAsmParser::getTokenKind() const { 7109 return getLexer().getKind(); 7110 } 7111 7112 SMLoc 7113 AMDGPUAsmParser::getLoc() const { 7114 return getToken().getLoc(); 7115 } 7116 7117 StringRef 7118 AMDGPUAsmParser::getTokenStr() const { 7119 return getToken().getString(); 7120 } 7121 7122 void 7123 AMDGPUAsmParser::lex() { 7124 Parser.Lex(); 7125 } 7126 7127 SMLoc 7128 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7129 const OperandVector &Operands) const { 7130 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7131 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7132 if (Test(Op)) 7133 return Op.getStartLoc(); 7134 } 7135 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7136 } 7137 7138 SMLoc 7139 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7140 const OperandVector &Operands) const { 7141 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7142 return getOperandLoc(Test, Operands); 7143 } 7144 7145 SMLoc 7146 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7147 const OperandVector &Operands) const { 7148 auto Test = [=](const AMDGPUOperand& Op) { 7149 return Op.isRegKind() && Op.getReg() == Reg; 7150 }; 7151 return getOperandLoc(Test, Operands); 7152 } 7153 7154 SMLoc 7155 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 7156 auto Test = [](const AMDGPUOperand& Op) { 7157 return Op.IsImmKindLiteral() || Op.isExpr(); 7158 }; 7159 return getOperandLoc(Test, Operands); 7160 } 7161 7162 SMLoc 7163 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7164 auto Test = [](const AMDGPUOperand& Op) { 7165 return Op.isImmKindConst(); 7166 }; 7167 return getOperandLoc(Test, Operands); 7168 } 7169 7170 //===----------------------------------------------------------------------===// 7171 // swizzle 7172 //===----------------------------------------------------------------------===// 7173 7174 LLVM_READNONE 7175 static unsigned 7176 encodeBitmaskPerm(const unsigned AndMask, 7177 const unsigned OrMask, 7178 const unsigned XorMask) { 7179 using namespace llvm::AMDGPU::Swizzle; 7180 7181 return BITMASK_PERM_ENC | 7182 (AndMask << BITMASK_AND_SHIFT) | 7183 (OrMask << BITMASK_OR_SHIFT) | 7184 (XorMask << BITMASK_XOR_SHIFT); 7185 } 7186 7187 bool 7188 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7189 const unsigned MinVal, 7190 const unsigned MaxVal, 7191 const StringRef ErrMsg, 7192 SMLoc &Loc) { 7193 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7194 return false; 7195 } 7196 Loc = getLoc(); 7197 if (!parseExpr(Op)) { 7198 return false; 7199 } 7200 if (Op < MinVal || Op > MaxVal) { 7201 Error(Loc, ErrMsg); 7202 return false; 7203 } 7204 7205 return true; 7206 } 7207 7208 bool 7209 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7210 const unsigned MinVal, 7211 const unsigned MaxVal, 7212 const StringRef ErrMsg) { 7213 SMLoc Loc; 7214 for (unsigned i = 0; i < OpNum; ++i) { 7215 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7216 return false; 7217 } 7218 7219 return true; 7220 } 7221 7222 bool 7223 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7224 using namespace llvm::AMDGPU::Swizzle; 7225 7226 int64_t Lane[LANE_NUM]; 7227 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7228 "expected a 2-bit lane id")) { 7229 Imm = QUAD_PERM_ENC; 7230 for (unsigned I = 0; I < LANE_NUM; ++I) { 7231 Imm |= Lane[I] << (LANE_SHIFT * I); 7232 } 7233 return true; 7234 } 7235 return false; 7236 } 7237 7238 bool 7239 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7240 using namespace llvm::AMDGPU::Swizzle; 7241 7242 SMLoc Loc; 7243 int64_t GroupSize; 7244 int64_t LaneIdx; 7245 7246 if (!parseSwizzleOperand(GroupSize, 7247 2, 32, 7248 "group size must be in the interval [2,32]", 7249 Loc)) { 7250 return false; 7251 } 7252 if (!isPowerOf2_64(GroupSize)) { 7253 Error(Loc, "group size must be a power of two"); 7254 return false; 7255 } 7256 if (parseSwizzleOperand(LaneIdx, 7257 0, GroupSize - 1, 7258 "lane id must be in the interval [0,group size - 1]", 7259 Loc)) { 7260 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7261 return true; 7262 } 7263 return false; 7264 } 7265 7266 bool 7267 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7268 using namespace llvm::AMDGPU::Swizzle; 7269 7270 SMLoc Loc; 7271 int64_t GroupSize; 7272 7273 if (!parseSwizzleOperand(GroupSize, 7274 2, 32, 7275 "group size must be in the interval [2,32]", 7276 Loc)) { 7277 return false; 7278 } 7279 if (!isPowerOf2_64(GroupSize)) { 7280 Error(Loc, "group size must be a power of two"); 7281 return false; 7282 } 7283 7284 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7285 return true; 7286 } 7287 7288 bool 7289 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7290 using namespace llvm::AMDGPU::Swizzle; 7291 7292 SMLoc Loc; 7293 int64_t GroupSize; 7294 7295 if (!parseSwizzleOperand(GroupSize, 7296 1, 16, 7297 "group size must be in the interval [1,16]", 7298 Loc)) { 7299 return false; 7300 } 7301 if (!isPowerOf2_64(GroupSize)) { 7302 Error(Loc, "group size must be a power of two"); 7303 return false; 7304 } 7305 7306 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7307 return true; 7308 } 7309 7310 bool 7311 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7312 using namespace llvm::AMDGPU::Swizzle; 7313 7314 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7315 return false; 7316 } 7317 7318 StringRef Ctl; 7319 SMLoc StrLoc = getLoc(); 7320 if (!parseString(Ctl)) { 7321 return false; 7322 } 7323 if (Ctl.size() != BITMASK_WIDTH) { 7324 Error(StrLoc, "expected a 5-character mask"); 7325 return false; 7326 } 7327 7328 unsigned AndMask = 0; 7329 unsigned OrMask = 0; 7330 unsigned XorMask = 0; 7331 7332 for (size_t i = 0; i < Ctl.size(); ++i) { 7333 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7334 switch(Ctl[i]) { 7335 default: 7336 Error(StrLoc, "invalid mask"); 7337 return false; 7338 case '0': 7339 break; 7340 case '1': 7341 OrMask |= Mask; 7342 break; 7343 case 'p': 7344 AndMask |= Mask; 7345 break; 7346 case 'i': 7347 AndMask |= Mask; 7348 XorMask |= Mask; 7349 break; 7350 } 7351 } 7352 7353 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7354 return true; 7355 } 7356 7357 bool 7358 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7359 7360 SMLoc OffsetLoc = getLoc(); 7361 7362 if (!parseExpr(Imm, "a swizzle macro")) { 7363 return false; 7364 } 7365 if (!isUInt<16>(Imm)) { 7366 Error(OffsetLoc, "expected a 16-bit offset"); 7367 return false; 7368 } 7369 return true; 7370 } 7371 7372 bool 7373 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7374 using namespace llvm::AMDGPU::Swizzle; 7375 7376 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7377 7378 SMLoc ModeLoc = getLoc(); 7379 bool Ok = false; 7380 7381 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7382 Ok = parseSwizzleQuadPerm(Imm); 7383 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7384 Ok = parseSwizzleBitmaskPerm(Imm); 7385 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7386 Ok = parseSwizzleBroadcast(Imm); 7387 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7388 Ok = parseSwizzleSwap(Imm); 7389 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7390 Ok = parseSwizzleReverse(Imm); 7391 } else { 7392 Error(ModeLoc, "expected a swizzle mode"); 7393 } 7394 7395 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7396 } 7397 7398 return false; 7399 } 7400 7401 OperandMatchResultTy 7402 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7403 SMLoc S = getLoc(); 7404 int64_t Imm = 0; 7405 7406 if (trySkipId("offset")) { 7407 7408 bool Ok = false; 7409 if (skipToken(AsmToken::Colon, "expected a colon")) { 7410 if (trySkipId("swizzle")) { 7411 Ok = parseSwizzleMacro(Imm); 7412 } else { 7413 Ok = parseSwizzleOffset(Imm); 7414 } 7415 } 7416 7417 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7418 7419 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7420 } else { 7421 // Swizzle "offset" operand is optional. 7422 // If it is omitted, try parsing other optional operands. 7423 return parseOptionalOpr(Operands); 7424 } 7425 } 7426 7427 bool 7428 AMDGPUOperand::isSwizzle() const { 7429 return isImmTy(ImmTySwizzle); 7430 } 7431 7432 //===----------------------------------------------------------------------===// 7433 // VGPR Index Mode 7434 //===----------------------------------------------------------------------===// 7435 7436 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7437 7438 using namespace llvm::AMDGPU::VGPRIndexMode; 7439 7440 if (trySkipToken(AsmToken::RParen)) { 7441 return OFF; 7442 } 7443 7444 int64_t Imm = 0; 7445 7446 while (true) { 7447 unsigned Mode = 0; 7448 SMLoc S = getLoc(); 7449 7450 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7451 if (trySkipId(IdSymbolic[ModeId])) { 7452 Mode = 1 << ModeId; 7453 break; 7454 } 7455 } 7456 7457 if (Mode == 0) { 7458 Error(S, (Imm == 0)? 7459 "expected a VGPR index mode or a closing parenthesis" : 7460 "expected a VGPR index mode"); 7461 return UNDEF; 7462 } 7463 7464 if (Imm & Mode) { 7465 Error(S, "duplicate VGPR index mode"); 7466 return UNDEF; 7467 } 7468 Imm |= Mode; 7469 7470 if (trySkipToken(AsmToken::RParen)) 7471 break; 7472 if (!skipToken(AsmToken::Comma, 7473 "expected a comma or a closing parenthesis")) 7474 return UNDEF; 7475 } 7476 7477 return Imm; 7478 } 7479 7480 OperandMatchResultTy 7481 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7482 7483 using namespace llvm::AMDGPU::VGPRIndexMode; 7484 7485 int64_t Imm = 0; 7486 SMLoc S = getLoc(); 7487 7488 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7489 Imm = parseGPRIdxMacro(); 7490 if (Imm == UNDEF) 7491 return MatchOperand_ParseFail; 7492 } else { 7493 if (getParser().parseAbsoluteExpression(Imm)) 7494 return MatchOperand_ParseFail; 7495 if (Imm < 0 || !isUInt<4>(Imm)) { 7496 Error(S, "invalid immediate: only 4-bit values are legal"); 7497 return MatchOperand_ParseFail; 7498 } 7499 } 7500 7501 Operands.push_back( 7502 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7503 return MatchOperand_Success; 7504 } 7505 7506 bool AMDGPUOperand::isGPRIdxMode() const { 7507 return isImmTy(ImmTyGprIdxMode); 7508 } 7509 7510 //===----------------------------------------------------------------------===// 7511 // sopp branch targets 7512 //===----------------------------------------------------------------------===// 7513 7514 OperandMatchResultTy 7515 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7516 7517 // Make sure we are not parsing something 7518 // that looks like a label or an expression but is not. 7519 // This will improve error messages. 7520 if (isRegister() || isModifier()) 7521 return MatchOperand_NoMatch; 7522 7523 if (!parseExpr(Operands)) 7524 return MatchOperand_ParseFail; 7525 7526 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7527 assert(Opr.isImm() || Opr.isExpr()); 7528 SMLoc Loc = Opr.getStartLoc(); 7529 7530 // Currently we do not support arbitrary expressions as branch targets. 7531 // Only labels and absolute expressions are accepted. 7532 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7533 Error(Loc, "expected an absolute expression or a label"); 7534 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7535 Error(Loc, "expected a 16-bit signed jump offset"); 7536 } 7537 7538 return MatchOperand_Success; 7539 } 7540 7541 //===----------------------------------------------------------------------===// 7542 // Boolean holding registers 7543 //===----------------------------------------------------------------------===// 7544 7545 OperandMatchResultTy 7546 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7547 return parseReg(Operands); 7548 } 7549 7550 //===----------------------------------------------------------------------===// 7551 // mubuf 7552 //===----------------------------------------------------------------------===// 7553 7554 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7555 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7556 } 7557 7558 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7559 const OperandVector &Operands, 7560 bool IsAtomic, 7561 bool IsLds) { 7562 OptionalImmIndexMap OptionalIdx; 7563 unsigned FirstOperandIdx = 1; 7564 bool IsAtomicReturn = false; 7565 7566 if (IsAtomic) { 7567 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7568 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7569 if (!Op.isCPol()) 7570 continue; 7571 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7572 break; 7573 } 7574 7575 if (!IsAtomicReturn) { 7576 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7577 if (NewOpc != -1) 7578 Inst.setOpcode(NewOpc); 7579 } 7580 7581 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7582 SIInstrFlags::IsAtomicRet; 7583 } 7584 7585 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7586 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7587 7588 // Add the register arguments 7589 if (Op.isReg()) { 7590 Op.addRegOperands(Inst, 1); 7591 // Insert a tied src for atomic return dst. 7592 // This cannot be postponed as subsequent calls to 7593 // addImmOperands rely on correct number of MC operands. 7594 if (IsAtomicReturn && i == FirstOperandIdx) 7595 Op.addRegOperands(Inst, 1); 7596 continue; 7597 } 7598 7599 // Handle the case where soffset is an immediate 7600 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7601 Op.addImmOperands(Inst, 1); 7602 continue; 7603 } 7604 7605 // Handle tokens like 'offen' which are sometimes hard-coded into the 7606 // asm string. There are no MCInst operands for these. 7607 if (Op.isToken()) { 7608 continue; 7609 } 7610 assert(Op.isImm()); 7611 7612 // Handle optional arguments 7613 OptionalIdx[Op.getImmTy()] = i; 7614 } 7615 7616 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7617 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7618 7619 if (!IsLds) { // tfe is not legal with lds opcodes 7620 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7621 } 7622 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7623 } 7624 7625 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7626 OptionalImmIndexMap OptionalIdx; 7627 7628 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7629 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7630 7631 // Add the register arguments 7632 if (Op.isReg()) { 7633 Op.addRegOperands(Inst, 1); 7634 continue; 7635 } 7636 7637 // Handle the case where soffset is an immediate 7638 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7639 Op.addImmOperands(Inst, 1); 7640 continue; 7641 } 7642 7643 // Handle tokens like 'offen' which are sometimes hard-coded into the 7644 // asm string. There are no MCInst operands for these. 7645 if (Op.isToken()) { 7646 continue; 7647 } 7648 assert(Op.isImm()); 7649 7650 // Handle optional arguments 7651 OptionalIdx[Op.getImmTy()] = i; 7652 } 7653 7654 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7655 AMDGPUOperand::ImmTyOffset); 7656 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7657 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7658 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7659 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7660 } 7661 7662 //===----------------------------------------------------------------------===// 7663 // mimg 7664 //===----------------------------------------------------------------------===// 7665 7666 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7667 bool IsAtomic) { 7668 unsigned I = 1; 7669 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7670 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7671 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7672 } 7673 7674 if (IsAtomic) { 7675 // Add src, same as dst 7676 assert(Desc.getNumDefs() == 1); 7677 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7678 } 7679 7680 OptionalImmIndexMap OptionalIdx; 7681 7682 for (unsigned E = Operands.size(); I != E; ++I) { 7683 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7684 7685 // Add the register arguments 7686 if (Op.isReg()) { 7687 Op.addRegOperands(Inst, 1); 7688 } else if (Op.isImmModifier()) { 7689 OptionalIdx[Op.getImmTy()] = I; 7690 } else if (!Op.isToken()) { 7691 llvm_unreachable("unexpected operand type"); 7692 } 7693 } 7694 7695 bool IsGFX10Plus = isGFX10Plus(); 7696 7697 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7698 if (IsGFX10Plus) 7699 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7700 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7701 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7702 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7703 if (IsGFX10Plus) 7704 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7705 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7706 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7707 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7708 if (!IsGFX10Plus) 7709 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7710 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7711 } 7712 7713 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7714 cvtMIMG(Inst, Operands, true); 7715 } 7716 7717 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7718 OptionalImmIndexMap OptionalIdx; 7719 bool IsAtomicReturn = false; 7720 7721 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7722 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7723 if (!Op.isCPol()) 7724 continue; 7725 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7726 break; 7727 } 7728 7729 if (!IsAtomicReturn) { 7730 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7731 if (NewOpc != -1) 7732 Inst.setOpcode(NewOpc); 7733 } 7734 7735 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7736 SIInstrFlags::IsAtomicRet; 7737 7738 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7739 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7740 7741 // Add the register arguments 7742 if (Op.isReg()) { 7743 Op.addRegOperands(Inst, 1); 7744 if (IsAtomicReturn && i == 1) 7745 Op.addRegOperands(Inst, 1); 7746 continue; 7747 } 7748 7749 // Handle the case where soffset is an immediate 7750 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7751 Op.addImmOperands(Inst, 1); 7752 continue; 7753 } 7754 7755 // Handle tokens like 'offen' which are sometimes hard-coded into the 7756 // asm string. There are no MCInst operands for these. 7757 if (Op.isToken()) { 7758 continue; 7759 } 7760 assert(Op.isImm()); 7761 7762 // Handle optional arguments 7763 OptionalIdx[Op.getImmTy()] = i; 7764 } 7765 7766 if ((int)Inst.getNumOperands() <= 7767 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7768 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7769 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7770 } 7771 7772 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7773 const OperandVector &Operands) { 7774 for (unsigned I = 1; I < Operands.size(); ++I) { 7775 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7776 if (Operand.isReg()) 7777 Operand.addRegOperands(Inst, 1); 7778 } 7779 7780 Inst.addOperand(MCOperand::createImm(1)); // a16 7781 } 7782 7783 //===----------------------------------------------------------------------===// 7784 // smrd 7785 //===----------------------------------------------------------------------===// 7786 7787 bool AMDGPUOperand::isSMRDOffset8() const { 7788 return isImm() && isUInt<8>(getImm()); 7789 } 7790 7791 bool AMDGPUOperand::isSMEMOffset() const { 7792 return isImmTy(ImmTyNone) || 7793 isImmTy(ImmTyOffset); // Offset range is checked later by validator. 7794 } 7795 7796 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7797 // 32-bit literals are only supported on CI and we only want to use them 7798 // when the offset is > 8-bits. 7799 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7800 } 7801 7802 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7803 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7804 } 7805 7806 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7807 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7808 } 7809 7810 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7811 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7812 } 7813 7814 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7815 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7816 } 7817 7818 //===----------------------------------------------------------------------===// 7819 // vop3 7820 //===----------------------------------------------------------------------===// 7821 7822 static bool ConvertOmodMul(int64_t &Mul) { 7823 if (Mul != 1 && Mul != 2 && Mul != 4) 7824 return false; 7825 7826 Mul >>= 1; 7827 return true; 7828 } 7829 7830 static bool ConvertOmodDiv(int64_t &Div) { 7831 if (Div == 1) { 7832 Div = 0; 7833 return true; 7834 } 7835 7836 if (Div == 2) { 7837 Div = 3; 7838 return true; 7839 } 7840 7841 return false; 7842 } 7843 7844 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7845 // This is intentional and ensures compatibility with sp3. 7846 // See bug 35397 for details. 7847 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7848 if (BoundCtrl == 0 || BoundCtrl == 1) { 7849 BoundCtrl = 1; 7850 return true; 7851 } 7852 return false; 7853 } 7854 7855 // Note: the order in this table matches the order of operands in AsmString. 7856 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7857 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7858 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7859 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7860 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7861 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7862 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7863 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7864 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7865 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7866 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7867 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7868 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7869 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7870 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7871 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7872 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7873 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7874 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7875 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7876 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7877 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7878 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7879 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7880 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7881 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7882 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7883 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7884 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7885 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7886 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7887 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7888 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7889 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7890 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7891 {"dpp8", AMDGPUOperand::ImmTyDPP8, false, nullptr}, 7892 {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr}, 7893 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7894 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7895 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7896 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7897 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7898 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7899 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}, 7900 {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr}, 7901 {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr} 7902 }; 7903 7904 void AMDGPUAsmParser::onBeginOfFile() { 7905 if (!getParser().getStreamer().getTargetStreamer() || 7906 getSTI().getTargetTriple().getArch() == Triple::r600) 7907 return; 7908 7909 if (!getTargetStreamer().getTargetID()) 7910 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7911 7912 if (isHsaAbiVersion3AndAbove(&getSTI())) 7913 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7914 } 7915 7916 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7917 7918 OperandMatchResultTy res = parseOptionalOpr(Operands); 7919 7920 // This is a hack to enable hardcoded mandatory operands which follow 7921 // optional operands. 7922 // 7923 // Current design assumes that all operands after the first optional operand 7924 // are also optional. However implementation of some instructions violates 7925 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7926 // 7927 // To alleviate this problem, we have to (implicitly) parse extra operands 7928 // to make sure autogenerated parser of custom operands never hit hardcoded 7929 // mandatory operands. 7930 7931 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7932 if (res != MatchOperand_Success || 7933 isToken(AsmToken::EndOfStatement)) 7934 break; 7935 7936 trySkipToken(AsmToken::Comma); 7937 res = parseOptionalOpr(Operands); 7938 } 7939 7940 return res; 7941 } 7942 7943 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7944 OperandMatchResultTy res; 7945 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7946 // try to parse any optional operand here 7947 if (Op.IsBit) { 7948 res = parseNamedBit(Op.Name, Operands, Op.Type); 7949 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7950 res = parseOModOperand(Operands); 7951 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7952 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7953 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7954 res = parseSDWASel(Operands, Op.Name, Op.Type); 7955 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7956 res = parseSDWADstUnused(Operands); 7957 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7958 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7959 Op.Type == AMDGPUOperand::ImmTyNegLo || 7960 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7961 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7962 Op.ConvertResult); 7963 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7964 res = parseDim(Operands); 7965 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7966 res = parseCPol(Operands); 7967 } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) { 7968 res = parseDPP8(Operands); 7969 } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) { 7970 res = parseDPPCtrl(Operands); 7971 } else { 7972 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7973 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 7974 res = parseOperandArrayWithPrefix("neg", Operands, 7975 AMDGPUOperand::ImmTyBLGP, 7976 nullptr); 7977 } 7978 } 7979 if (res != MatchOperand_NoMatch) { 7980 return res; 7981 } 7982 } 7983 return MatchOperand_NoMatch; 7984 } 7985 7986 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7987 StringRef Name = getTokenStr(); 7988 if (Name == "mul") { 7989 return parseIntWithPrefix("mul", Operands, 7990 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7991 } 7992 7993 if (Name == "div") { 7994 return parseIntWithPrefix("div", Operands, 7995 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7996 } 7997 7998 return MatchOperand_NoMatch; 7999 } 8000 8001 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 8002 cvtVOP3P(Inst, Operands); 8003 8004 int Opc = Inst.getOpcode(); 8005 8006 int SrcNum; 8007 const int Ops[] = { AMDGPU::OpName::src0, 8008 AMDGPU::OpName::src1, 8009 AMDGPU::OpName::src2 }; 8010 for (SrcNum = 0; 8011 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 8012 ++SrcNum); 8013 assert(SrcNum > 0); 8014 8015 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8016 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8017 8018 if ((OpSel & (1 << SrcNum)) != 0) { 8019 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 8020 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8021 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 8022 } 8023 } 8024 8025 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 8026 // 1. This operand is input modifiers 8027 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 8028 // 2. This is not last operand 8029 && Desc.NumOperands > (OpNum + 1) 8030 // 3. Next operand is register class 8031 && Desc.OpInfo[OpNum + 1].RegClass != -1 8032 // 4. Next register is not tied to any other operand 8033 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 8034 } 8035 8036 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 8037 { 8038 OptionalImmIndexMap OptionalIdx; 8039 unsigned Opc = Inst.getOpcode(); 8040 8041 unsigned I = 1; 8042 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8043 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8044 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8045 } 8046 8047 for (unsigned E = Operands.size(); I != E; ++I) { 8048 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8049 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8050 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8051 } else if (Op.isInterpSlot() || 8052 Op.isInterpAttr() || 8053 Op.isAttrChan()) { 8054 Inst.addOperand(MCOperand::createImm(Op.getImm())); 8055 } else if (Op.isImmModifier()) { 8056 OptionalIdx[Op.getImmTy()] = I; 8057 } else { 8058 llvm_unreachable("unhandled operand type"); 8059 } 8060 } 8061 8062 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 8063 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 8064 } 8065 8066 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8067 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8068 } 8069 8070 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8071 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8072 } 8073 } 8074 8075 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 8076 { 8077 OptionalImmIndexMap OptionalIdx; 8078 unsigned Opc = Inst.getOpcode(); 8079 8080 unsigned I = 1; 8081 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8082 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8083 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8084 } 8085 8086 for (unsigned E = Operands.size(); I != E; ++I) { 8087 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8088 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8089 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8090 } else if (Op.isImmModifier()) { 8091 OptionalIdx[Op.getImmTy()] = I; 8092 } else { 8093 llvm_unreachable("unhandled operand type"); 8094 } 8095 } 8096 8097 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8098 8099 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8100 if (OpSelIdx != -1) 8101 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8102 8103 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 8104 8105 if (OpSelIdx == -1) 8106 return; 8107 8108 const int Ops[] = { AMDGPU::OpName::src0, 8109 AMDGPU::OpName::src1, 8110 AMDGPU::OpName::src2 }; 8111 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8112 AMDGPU::OpName::src1_modifiers, 8113 AMDGPU::OpName::src2_modifiers }; 8114 8115 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8116 8117 for (int J = 0; J < 3; ++J) { 8118 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8119 if (OpIdx == -1) 8120 break; 8121 8122 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8123 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8124 8125 if ((OpSel & (1 << J)) != 0) 8126 ModVal |= SISrcMods::OP_SEL_0; 8127 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 8128 (OpSel & (1 << 3)) != 0) 8129 ModVal |= SISrcMods::DST_OP_SEL; 8130 8131 Inst.getOperand(ModIdx).setImm(ModVal); 8132 } 8133 } 8134 8135 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8136 OptionalImmIndexMap &OptionalIdx) { 8137 unsigned Opc = Inst.getOpcode(); 8138 8139 unsigned I = 1; 8140 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8141 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8142 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8143 } 8144 8145 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 8146 // This instruction has src modifiers 8147 for (unsigned E = Operands.size(); I != E; ++I) { 8148 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8149 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8150 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8151 } else if (Op.isImmModifier()) { 8152 OptionalIdx[Op.getImmTy()] = I; 8153 } else if (Op.isRegOrImm()) { 8154 Op.addRegOrImmOperands(Inst, 1); 8155 } else { 8156 llvm_unreachable("unhandled operand type"); 8157 } 8158 } 8159 } else { 8160 // No src modifiers 8161 for (unsigned E = Operands.size(); I != E; ++I) { 8162 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8163 if (Op.isMod()) { 8164 OptionalIdx[Op.getImmTy()] = I; 8165 } else { 8166 Op.addRegOrImmOperands(Inst, 1); 8167 } 8168 } 8169 } 8170 8171 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8172 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8173 } 8174 8175 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8176 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8177 } 8178 8179 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8180 // it has src2 register operand that is tied to dst operand 8181 // we don't allow modifiers for this operand in assembler so src2_modifiers 8182 // should be 0. 8183 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 8184 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 8185 Opc == AMDGPU::V_MAC_F32_e64_vi || 8186 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 8187 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 8188 Opc == AMDGPU::V_MAC_F16_e64_vi || 8189 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 8190 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 8191 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || 8192 Opc == AMDGPU::V_FMAC_F32_e64_vi || 8193 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 8194 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || 8195 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || 8196 Opc == AMDGPU::V_FMAC_F16_e64_gfx11) { 8197 auto it = Inst.begin(); 8198 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8199 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8200 ++it; 8201 // Copy the operand to ensure it's not invalidated when Inst grows. 8202 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8203 } 8204 } 8205 8206 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8207 OptionalImmIndexMap OptionalIdx; 8208 cvtVOP3(Inst, Operands, OptionalIdx); 8209 } 8210 8211 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8212 OptionalImmIndexMap &OptIdx) { 8213 const int Opc = Inst.getOpcode(); 8214 const MCInstrDesc &Desc = MII.get(Opc); 8215 8216 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8217 8218 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 8219 assert(!IsPacked); 8220 Inst.addOperand(Inst.getOperand(0)); 8221 } 8222 8223 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8224 // instruction, and then figure out where to actually put the modifiers 8225 8226 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8227 if (OpSelIdx != -1) { 8228 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8229 } 8230 8231 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8232 if (OpSelHiIdx != -1) { 8233 int DefaultVal = IsPacked ? -1 : 0; 8234 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8235 DefaultVal); 8236 } 8237 8238 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8239 if (NegLoIdx != -1) { 8240 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8241 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8242 } 8243 8244 const int Ops[] = { AMDGPU::OpName::src0, 8245 AMDGPU::OpName::src1, 8246 AMDGPU::OpName::src2 }; 8247 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8248 AMDGPU::OpName::src1_modifiers, 8249 AMDGPU::OpName::src2_modifiers }; 8250 8251 unsigned OpSel = 0; 8252 unsigned OpSelHi = 0; 8253 unsigned NegLo = 0; 8254 unsigned NegHi = 0; 8255 8256 if (OpSelIdx != -1) 8257 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8258 8259 if (OpSelHiIdx != -1) 8260 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8261 8262 if (NegLoIdx != -1) { 8263 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8264 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8265 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8266 } 8267 8268 for (int J = 0; J < 3; ++J) { 8269 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8270 if (OpIdx == -1) 8271 break; 8272 8273 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8274 8275 if (ModIdx == -1) 8276 continue; 8277 8278 uint32_t ModVal = 0; 8279 8280 if ((OpSel & (1 << J)) != 0) 8281 ModVal |= SISrcMods::OP_SEL_0; 8282 8283 if ((OpSelHi & (1 << J)) != 0) 8284 ModVal |= SISrcMods::OP_SEL_1; 8285 8286 if ((NegLo & (1 << J)) != 0) 8287 ModVal |= SISrcMods::NEG; 8288 8289 if ((NegHi & (1 << J)) != 0) 8290 ModVal |= SISrcMods::NEG_HI; 8291 8292 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8293 } 8294 } 8295 8296 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8297 OptionalImmIndexMap OptIdx; 8298 cvtVOP3(Inst, Operands, OptIdx); 8299 cvtVOP3P(Inst, Operands, OptIdx); 8300 } 8301 8302 //===----------------------------------------------------------------------===// 8303 // dpp 8304 //===----------------------------------------------------------------------===// 8305 8306 bool AMDGPUOperand::isDPP8() const { 8307 return isImmTy(ImmTyDPP8); 8308 } 8309 8310 bool AMDGPUOperand::isDPPCtrl() const { 8311 using namespace AMDGPU::DPP; 8312 8313 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8314 if (result) { 8315 int64_t Imm = getImm(); 8316 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8317 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8318 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8319 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8320 (Imm == DppCtrl::WAVE_SHL1) || 8321 (Imm == DppCtrl::WAVE_ROL1) || 8322 (Imm == DppCtrl::WAVE_SHR1) || 8323 (Imm == DppCtrl::WAVE_ROR1) || 8324 (Imm == DppCtrl::ROW_MIRROR) || 8325 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8326 (Imm == DppCtrl::BCAST15) || 8327 (Imm == DppCtrl::BCAST31) || 8328 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8329 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8330 } 8331 return false; 8332 } 8333 8334 //===----------------------------------------------------------------------===// 8335 // mAI 8336 //===----------------------------------------------------------------------===// 8337 8338 bool AMDGPUOperand::isBLGP() const { 8339 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8340 } 8341 8342 bool AMDGPUOperand::isCBSZ() const { 8343 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8344 } 8345 8346 bool AMDGPUOperand::isABID() const { 8347 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8348 } 8349 8350 bool AMDGPUOperand::isS16Imm() const { 8351 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8352 } 8353 8354 bool AMDGPUOperand::isU16Imm() const { 8355 return isImm() && isUInt<16>(getImm()); 8356 } 8357 8358 //===----------------------------------------------------------------------===// 8359 // dim 8360 //===----------------------------------------------------------------------===// 8361 8362 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8363 // We want to allow "dim:1D" etc., 8364 // but the initial 1 is tokenized as an integer. 8365 std::string Token; 8366 if (isToken(AsmToken::Integer)) { 8367 SMLoc Loc = getToken().getEndLoc(); 8368 Token = std::string(getTokenStr()); 8369 lex(); 8370 if (getLoc() != Loc) 8371 return false; 8372 } 8373 8374 StringRef Suffix; 8375 if (!parseId(Suffix)) 8376 return false; 8377 Token += Suffix; 8378 8379 StringRef DimId = Token; 8380 if (DimId.startswith("SQ_RSRC_IMG_")) 8381 DimId = DimId.drop_front(12); 8382 8383 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8384 if (!DimInfo) 8385 return false; 8386 8387 Encoding = DimInfo->Encoding; 8388 return true; 8389 } 8390 8391 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8392 if (!isGFX10Plus()) 8393 return MatchOperand_NoMatch; 8394 8395 SMLoc S = getLoc(); 8396 8397 if (!trySkipId("dim", AsmToken::Colon)) 8398 return MatchOperand_NoMatch; 8399 8400 unsigned Encoding; 8401 SMLoc Loc = getLoc(); 8402 if (!parseDimId(Encoding)) { 8403 Error(Loc, "invalid dim value"); 8404 return MatchOperand_ParseFail; 8405 } 8406 8407 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8408 AMDGPUOperand::ImmTyDim)); 8409 return MatchOperand_Success; 8410 } 8411 8412 //===----------------------------------------------------------------------===// 8413 // dpp 8414 //===----------------------------------------------------------------------===// 8415 8416 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8417 SMLoc S = getLoc(); 8418 8419 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8420 return MatchOperand_NoMatch; 8421 8422 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8423 8424 int64_t Sels[8]; 8425 8426 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8427 return MatchOperand_ParseFail; 8428 8429 for (size_t i = 0; i < 8; ++i) { 8430 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8431 return MatchOperand_ParseFail; 8432 8433 SMLoc Loc = getLoc(); 8434 if (getParser().parseAbsoluteExpression(Sels[i])) 8435 return MatchOperand_ParseFail; 8436 if (0 > Sels[i] || 7 < Sels[i]) { 8437 Error(Loc, "expected a 3-bit value"); 8438 return MatchOperand_ParseFail; 8439 } 8440 } 8441 8442 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8443 return MatchOperand_ParseFail; 8444 8445 unsigned DPP8 = 0; 8446 for (size_t i = 0; i < 8; ++i) 8447 DPP8 |= (Sels[i] << (i * 3)); 8448 8449 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8450 return MatchOperand_Success; 8451 } 8452 8453 bool 8454 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8455 const OperandVector &Operands) { 8456 if (Ctrl == "row_newbcast") 8457 return isGFX90A(); 8458 8459 if (Ctrl == "row_share" || 8460 Ctrl == "row_xmask") 8461 return isGFX10Plus(); 8462 8463 if (Ctrl == "wave_shl" || 8464 Ctrl == "wave_shr" || 8465 Ctrl == "wave_rol" || 8466 Ctrl == "wave_ror" || 8467 Ctrl == "row_bcast") 8468 return isVI() || isGFX9(); 8469 8470 return Ctrl == "row_mirror" || 8471 Ctrl == "row_half_mirror" || 8472 Ctrl == "quad_perm" || 8473 Ctrl == "row_shl" || 8474 Ctrl == "row_shr" || 8475 Ctrl == "row_ror"; 8476 } 8477 8478 int64_t 8479 AMDGPUAsmParser::parseDPPCtrlPerm() { 8480 // quad_perm:[%d,%d,%d,%d] 8481 8482 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8483 return -1; 8484 8485 int64_t Val = 0; 8486 for (int i = 0; i < 4; ++i) { 8487 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8488 return -1; 8489 8490 int64_t Temp; 8491 SMLoc Loc = getLoc(); 8492 if (getParser().parseAbsoluteExpression(Temp)) 8493 return -1; 8494 if (Temp < 0 || Temp > 3) { 8495 Error(Loc, "expected a 2-bit value"); 8496 return -1; 8497 } 8498 8499 Val += (Temp << i * 2); 8500 } 8501 8502 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8503 return -1; 8504 8505 return Val; 8506 } 8507 8508 int64_t 8509 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8510 using namespace AMDGPU::DPP; 8511 8512 // sel:%d 8513 8514 int64_t Val; 8515 SMLoc Loc = getLoc(); 8516 8517 if (getParser().parseAbsoluteExpression(Val)) 8518 return -1; 8519 8520 struct DppCtrlCheck { 8521 int64_t Ctrl; 8522 int Lo; 8523 int Hi; 8524 }; 8525 8526 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8527 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8528 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8529 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8530 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8531 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8532 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8533 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8534 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8535 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8536 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8537 .Default({-1, 0, 0}); 8538 8539 bool Valid; 8540 if (Check.Ctrl == -1) { 8541 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8542 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8543 } else { 8544 Valid = Check.Lo <= Val && Val <= Check.Hi; 8545 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8546 } 8547 8548 if (!Valid) { 8549 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8550 return -1; 8551 } 8552 8553 return Val; 8554 } 8555 8556 OperandMatchResultTy 8557 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8558 using namespace AMDGPU::DPP; 8559 8560 if (!isToken(AsmToken::Identifier) || 8561 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8562 return MatchOperand_NoMatch; 8563 8564 SMLoc S = getLoc(); 8565 int64_t Val = -1; 8566 StringRef Ctrl; 8567 8568 parseId(Ctrl); 8569 8570 if (Ctrl == "row_mirror") { 8571 Val = DppCtrl::ROW_MIRROR; 8572 } else if (Ctrl == "row_half_mirror") { 8573 Val = DppCtrl::ROW_HALF_MIRROR; 8574 } else { 8575 if (skipToken(AsmToken::Colon, "expected a colon")) { 8576 if (Ctrl == "quad_perm") { 8577 Val = parseDPPCtrlPerm(); 8578 } else { 8579 Val = parseDPPCtrlSel(Ctrl); 8580 } 8581 } 8582 } 8583 8584 if (Val == -1) 8585 return MatchOperand_ParseFail; 8586 8587 Operands.push_back( 8588 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8589 return MatchOperand_Success; 8590 } 8591 8592 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8593 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8594 } 8595 8596 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8597 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8598 } 8599 8600 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8601 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8602 } 8603 8604 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8605 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8606 } 8607 8608 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8609 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8610 } 8611 8612 // Add dummy $old operand 8613 void AMDGPUAsmParser::cvtVOPC64NoDstDPP(MCInst &Inst, 8614 const OperandVector &Operands, 8615 bool IsDPP8) { 8616 Inst.addOperand(MCOperand::createReg(0)); 8617 cvtVOP3DPP(Inst, Operands, IsDPP8); 8618 } 8619 8620 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8621 OptionalImmIndexMap OptionalIdx; 8622 unsigned Opc = Inst.getOpcode(); 8623 bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8624 unsigned I = 1; 8625 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8626 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8627 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8628 } 8629 8630 int Fi = 0; 8631 for (unsigned E = Operands.size(); I != E; ++I) { 8632 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8633 MCOI::TIED_TO); 8634 if (TiedTo != -1) { 8635 assert((unsigned)TiedTo < Inst.getNumOperands()); 8636 // handle tied old or src2 for MAC instructions 8637 Inst.addOperand(Inst.getOperand(TiedTo)); 8638 } 8639 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8640 // Add the register arguments 8641 if (IsDPP8 && Op.isFI()) { 8642 Fi = Op.getImm(); 8643 } else if (HasModifiers && 8644 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8645 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8646 } else if (Op.isReg()) { 8647 Op.addRegOperands(Inst, 1); 8648 } else if (Op.isImm() && 8649 Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) { 8650 assert(!HasModifiers && "Case should be unreachable with modifiers"); 8651 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); 8652 Op.addImmOperands(Inst, 1); 8653 } else if (Op.isImm()) { 8654 OptionalIdx[Op.getImmTy()] = I; 8655 } else { 8656 llvm_unreachable("unhandled operand type"); 8657 } 8658 } 8659 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8660 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8661 } 8662 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8663 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8664 } 8665 if (Desc.TSFlags & SIInstrFlags::VOP3P) 8666 cvtVOP3P(Inst, Operands, OptionalIdx); 8667 else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { 8668 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8669 } 8670 8671 if (IsDPP8) { 8672 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); 8673 using namespace llvm::AMDGPU::DPP; 8674 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8675 } else { 8676 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); 8677 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8678 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8679 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8680 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8681 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8682 } 8683 } 8684 } 8685 8686 // Add dummy $old operand 8687 void AMDGPUAsmParser::cvtVOPCNoDstDPP(MCInst &Inst, 8688 const OperandVector &Operands, 8689 bool IsDPP8) { 8690 Inst.addOperand(MCOperand::createReg(0)); 8691 cvtDPP(Inst, Operands, IsDPP8); 8692 } 8693 8694 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8695 OptionalImmIndexMap OptionalIdx; 8696 8697 unsigned Opc = Inst.getOpcode(); 8698 bool HasModifiers = 8699 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8700 unsigned I = 1; 8701 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8702 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8703 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8704 } 8705 8706 int Fi = 0; 8707 for (unsigned E = Operands.size(); I != E; ++I) { 8708 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8709 MCOI::TIED_TO); 8710 if (TiedTo != -1) { 8711 assert((unsigned)TiedTo < Inst.getNumOperands()); 8712 // handle tied old or src2 for MAC instructions 8713 Inst.addOperand(Inst.getOperand(TiedTo)); 8714 } 8715 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8716 // Add the register arguments 8717 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8718 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8719 // Skip it. 8720 continue; 8721 } 8722 8723 if (IsDPP8) { 8724 if (Op.isDPP8()) { 8725 Op.addImmOperands(Inst, 1); 8726 } else if (HasModifiers && 8727 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8728 Op.addRegWithFPInputModsOperands(Inst, 2); 8729 } else if (Op.isFI()) { 8730 Fi = Op.getImm(); 8731 } else if (Op.isReg()) { 8732 Op.addRegOperands(Inst, 1); 8733 } else { 8734 llvm_unreachable("Invalid operand type"); 8735 } 8736 } else { 8737 if (HasModifiers && 8738 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8739 Op.addRegWithFPInputModsOperands(Inst, 2); 8740 } else if (Op.isReg()) { 8741 Op.addRegOperands(Inst, 1); 8742 } else if (Op.isDPPCtrl()) { 8743 Op.addImmOperands(Inst, 1); 8744 } else if (Op.isImm()) { 8745 // Handle optional arguments 8746 OptionalIdx[Op.getImmTy()] = I; 8747 } else { 8748 llvm_unreachable("Invalid operand type"); 8749 } 8750 } 8751 } 8752 8753 if (IsDPP8) { 8754 using namespace llvm::AMDGPU::DPP; 8755 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8756 } else { 8757 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8758 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8759 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8760 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8761 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8762 } 8763 } 8764 } 8765 8766 //===----------------------------------------------------------------------===// 8767 // sdwa 8768 //===----------------------------------------------------------------------===// 8769 8770 OperandMatchResultTy 8771 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8772 AMDGPUOperand::ImmTy Type) { 8773 using namespace llvm::AMDGPU::SDWA; 8774 8775 SMLoc S = getLoc(); 8776 StringRef Value; 8777 OperandMatchResultTy res; 8778 8779 SMLoc StringLoc; 8780 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8781 if (res != MatchOperand_Success) { 8782 return res; 8783 } 8784 8785 int64_t Int; 8786 Int = StringSwitch<int64_t>(Value) 8787 .Case("BYTE_0", SdwaSel::BYTE_0) 8788 .Case("BYTE_1", SdwaSel::BYTE_1) 8789 .Case("BYTE_2", SdwaSel::BYTE_2) 8790 .Case("BYTE_3", SdwaSel::BYTE_3) 8791 .Case("WORD_0", SdwaSel::WORD_0) 8792 .Case("WORD_1", SdwaSel::WORD_1) 8793 .Case("DWORD", SdwaSel::DWORD) 8794 .Default(0xffffffff); 8795 8796 if (Int == 0xffffffff) { 8797 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8798 return MatchOperand_ParseFail; 8799 } 8800 8801 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8802 return MatchOperand_Success; 8803 } 8804 8805 OperandMatchResultTy 8806 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8807 using namespace llvm::AMDGPU::SDWA; 8808 8809 SMLoc S = getLoc(); 8810 StringRef Value; 8811 OperandMatchResultTy res; 8812 8813 SMLoc StringLoc; 8814 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8815 if (res != MatchOperand_Success) { 8816 return res; 8817 } 8818 8819 int64_t Int; 8820 Int = StringSwitch<int64_t>(Value) 8821 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8822 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8823 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8824 .Default(0xffffffff); 8825 8826 if (Int == 0xffffffff) { 8827 Error(StringLoc, "invalid dst_unused value"); 8828 return MatchOperand_ParseFail; 8829 } 8830 8831 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8832 return MatchOperand_Success; 8833 } 8834 8835 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8836 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8837 } 8838 8839 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8840 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8841 } 8842 8843 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8844 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8845 } 8846 8847 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8848 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8849 } 8850 8851 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8852 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8853 } 8854 8855 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8856 uint64_t BasicInstType, 8857 bool SkipDstVcc, 8858 bool SkipSrcVcc) { 8859 using namespace llvm::AMDGPU::SDWA; 8860 8861 OptionalImmIndexMap OptionalIdx; 8862 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8863 bool SkippedVcc = false; 8864 8865 unsigned I = 1; 8866 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8867 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8868 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8869 } 8870 8871 for (unsigned E = Operands.size(); I != E; ++I) { 8872 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8873 if (SkipVcc && !SkippedVcc && Op.isReg() && 8874 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8875 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8876 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8877 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8878 // Skip VCC only if we didn't skip it on previous iteration. 8879 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8880 if (BasicInstType == SIInstrFlags::VOP2 && 8881 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8882 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8883 SkippedVcc = true; 8884 continue; 8885 } else if (BasicInstType == SIInstrFlags::VOPC && 8886 Inst.getNumOperands() == 0) { 8887 SkippedVcc = true; 8888 continue; 8889 } 8890 } 8891 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8892 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8893 } else if (Op.isImm()) { 8894 // Handle optional arguments 8895 OptionalIdx[Op.getImmTy()] = I; 8896 } else { 8897 llvm_unreachable("Invalid operand type"); 8898 } 8899 SkippedVcc = false; 8900 } 8901 8902 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8903 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8904 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8905 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8906 switch (BasicInstType) { 8907 case SIInstrFlags::VOP1: 8908 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8909 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8910 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8911 } 8912 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8913 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8914 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8915 break; 8916 8917 case SIInstrFlags::VOP2: 8918 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8919 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8920 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8921 } 8922 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8923 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8924 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8925 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8926 break; 8927 8928 case SIInstrFlags::VOPC: 8929 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8930 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8931 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8932 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8933 break; 8934 8935 default: 8936 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8937 } 8938 } 8939 8940 // special case v_mac_{f16, f32}: 8941 // it has src2 register operand that is tied to dst operand 8942 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8943 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8944 auto it = Inst.begin(); 8945 std::advance( 8946 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8947 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8948 } 8949 } 8950 8951 //===----------------------------------------------------------------------===// 8952 // mAI 8953 //===----------------------------------------------------------------------===// 8954 8955 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8956 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8957 } 8958 8959 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8960 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8961 } 8962 8963 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8964 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8965 } 8966 8967 /// Force static initialization. 8968 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8969 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8970 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8971 } 8972 8973 #define GET_REGISTER_MATCHER 8974 #define GET_MATCHER_IMPLEMENTATION 8975 #define GET_MNEMONIC_SPELL_CHECKER 8976 #define GET_MNEMONIC_CHECKER 8977 #include "AMDGPUGenAsmMatcher.inc" 8978 8979 // This function should be defined after auto-generated include so that we have 8980 // MatchClassKind enum defined 8981 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8982 unsigned Kind) { 8983 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8984 // But MatchInstructionImpl() expects to meet token and fails to validate 8985 // operand. This method checks if we are given immediate operand but expect to 8986 // get corresponding token. 8987 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8988 switch (Kind) { 8989 case MCK_addr64: 8990 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8991 case MCK_gds: 8992 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8993 case MCK_lds: 8994 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8995 case MCK_idxen: 8996 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8997 case MCK_offen: 8998 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8999 case MCK_SSrcB32: 9000 // When operands have expression values, they will return true for isToken, 9001 // because it is not possible to distinguish between a token and an 9002 // expression at parse time. MatchInstructionImpl() will always try to 9003 // match an operand as a token, when isToken returns true, and when the 9004 // name of the expression is not a valid token, the match will fail, 9005 // so we need to handle it here. 9006 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 9007 case MCK_SSrcF32: 9008 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 9009 case MCK_SoppBrTarget: 9010 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 9011 case MCK_VReg32OrOff: 9012 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 9013 case MCK_InterpSlot: 9014 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 9015 case MCK_Attr: 9016 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 9017 case MCK_AttrChan: 9018 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 9019 case MCK_ImmSMEMOffset: 9020 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 9021 case MCK_SReg_64: 9022 case MCK_SReg_64_XEXEC: 9023 // Null is defined as a 32-bit register but 9024 // it should also be enabled with 64-bit operands. 9025 // The following code enables it for SReg_64 operands 9026 // used as source and destination. Remaining source 9027 // operands are handled in isInlinableImm. 9028 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 9029 default: 9030 return Match_InvalidOperand; 9031 } 9032 } 9033 9034 //===----------------------------------------------------------------------===// 9035 // endpgm 9036 //===----------------------------------------------------------------------===// 9037 9038 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 9039 SMLoc S = getLoc(); 9040 int64_t Imm = 0; 9041 9042 if (!parseExpr(Imm)) { 9043 // The operand is optional, if not present default to 0 9044 Imm = 0; 9045 } 9046 9047 if (!isUInt<16>(Imm)) { 9048 Error(S, "expected a 16-bit value"); 9049 return MatchOperand_ParseFail; 9050 } 9051 9052 Operands.push_back( 9053 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 9054 return MatchOperand_Success; 9055 } 9056 9057 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 9058 9059 //===----------------------------------------------------------------------===// 9060 // LDSDIR 9061 //===----------------------------------------------------------------------===// 9062 9063 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const { 9064 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST); 9065 } 9066 9067 bool AMDGPUOperand::isWaitVDST() const { 9068 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); 9069 } 9070 9071 //===----------------------------------------------------------------------===// 9072 // VINTERP 9073 //===----------------------------------------------------------------------===// 9074 9075 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const { 9076 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP); 9077 } 9078 9079 bool AMDGPUOperand::isWaitEXP() const { 9080 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); 9081 } 9082