1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCInstrDesc.h" 29 #include "llvm/MC/MCParser/MCAsmLexer.h" 30 #include "llvm/MC/MCParser/MCAsmParser.h" 31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 32 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 33 #include "llvm/MC/MCSymbol.h" 34 #include "llvm/MC/TargetRegistry.h" 35 #include "llvm/Support/AMDGPUMetadata.h" 36 #include "llvm/Support/AMDHSAKernelDescriptor.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/MachineValueType.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/Support/TargetParser.h" 41 42 using namespace llvm; 43 using namespace llvm::AMDGPU; 44 using namespace llvm::amdhsa; 45 46 namespace { 47 48 class AMDGPUAsmParser; 49 50 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 51 52 //===----------------------------------------------------------------------===// 53 // Operand 54 //===----------------------------------------------------------------------===// 55 56 class AMDGPUOperand : public MCParsedAsmOperand { 57 enum KindTy { 58 Token, 59 Immediate, 60 Register, 61 Expression 62 } Kind; 63 64 SMLoc StartLoc, EndLoc; 65 const AMDGPUAsmParser *AsmParser; 66 67 public: 68 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 69 : Kind(Kind_), AsmParser(AsmParser_) {} 70 71 using Ptr = std::unique_ptr<AMDGPUOperand>; 72 73 struct Modifiers { 74 bool Abs = false; 75 bool Neg = false; 76 bool Sext = false; 77 78 bool hasFPModifiers() const { return Abs || Neg; } 79 bool hasIntModifiers() const { return Sext; } 80 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 81 82 int64_t getFPModifiersOperand() const { 83 int64_t Operand = 0; 84 Operand |= Abs ? SISrcMods::ABS : 0u; 85 Operand |= Neg ? SISrcMods::NEG : 0u; 86 return Operand; 87 } 88 89 int64_t getIntModifiersOperand() const { 90 int64_t Operand = 0; 91 Operand |= Sext ? SISrcMods::SEXT : 0u; 92 return Operand; 93 } 94 95 int64_t getModifiersOperand() const { 96 assert(!(hasFPModifiers() && hasIntModifiers()) 97 && "fp and int modifiers should not be used simultaneously"); 98 if (hasFPModifiers()) { 99 return getFPModifiersOperand(); 100 } else if (hasIntModifiers()) { 101 return getIntModifiersOperand(); 102 } else { 103 return 0; 104 } 105 } 106 107 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 108 }; 109 110 enum ImmTy { 111 ImmTyNone, 112 ImmTyGDS, 113 ImmTyLDS, 114 ImmTyOffen, 115 ImmTyIdxen, 116 ImmTyAddr64, 117 ImmTyOffset, 118 ImmTyInstOffset, 119 ImmTyOffset0, 120 ImmTyOffset1, 121 ImmTyCPol, 122 ImmTySWZ, 123 ImmTyTFE, 124 ImmTyD16, 125 ImmTyClampSI, 126 ImmTyOModSI, 127 ImmTySdwaDstSel, 128 ImmTySdwaSrc0Sel, 129 ImmTySdwaSrc1Sel, 130 ImmTySdwaDstUnused, 131 ImmTyDMask, 132 ImmTyDim, 133 ImmTyUNorm, 134 ImmTyDA, 135 ImmTyR128A16, 136 ImmTyA16, 137 ImmTyLWE, 138 ImmTyExpTgt, 139 ImmTyExpCompr, 140 ImmTyExpVM, 141 ImmTyFORMAT, 142 ImmTyHwreg, 143 ImmTyOff, 144 ImmTySendMsg, 145 ImmTyInterpSlot, 146 ImmTyInterpAttr, 147 ImmTyAttrChan, 148 ImmTyOpSel, 149 ImmTyOpSelHi, 150 ImmTyNegLo, 151 ImmTyNegHi, 152 ImmTyDPP8, 153 ImmTyDppCtrl, 154 ImmTyDppRowMask, 155 ImmTyDppBankMask, 156 ImmTyDppBoundCtrl, 157 ImmTyDppFi, 158 ImmTySwizzle, 159 ImmTyGprIdxMode, 160 ImmTyHigh, 161 ImmTyBLGP, 162 ImmTyCBSZ, 163 ImmTyABID, 164 ImmTyEndpgm, 165 ImmTyWaitVDST, 166 ImmTyWaitEXP, 167 }; 168 169 enum ImmKindTy { 170 ImmKindTyNone, 171 ImmKindTyLiteral, 172 ImmKindTyConst, 173 }; 174 175 private: 176 struct TokOp { 177 const char *Data; 178 unsigned Length; 179 }; 180 181 struct ImmOp { 182 int64_t Val; 183 ImmTy Type; 184 bool IsFPImm; 185 mutable ImmKindTy Kind; 186 Modifiers Mods; 187 }; 188 189 struct RegOp { 190 unsigned RegNo; 191 Modifiers Mods; 192 }; 193 194 union { 195 TokOp Tok; 196 ImmOp Imm; 197 RegOp Reg; 198 const MCExpr *Expr; 199 }; 200 201 public: 202 bool isToken() const override { 203 if (Kind == Token) 204 return true; 205 206 // When parsing operands, we can't always tell if something was meant to be 207 // a token, like 'gds', or an expression that references a global variable. 208 // In this case, we assume the string is an expression, and if we need to 209 // interpret is a token, then we treat the symbol name as the token. 210 return isSymbolRefExpr(); 211 } 212 213 bool isSymbolRefExpr() const { 214 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 215 } 216 217 bool isImm() const override { 218 return Kind == Immediate; 219 } 220 221 void setImmKindNone() const { 222 assert(isImm()); 223 Imm.Kind = ImmKindTyNone; 224 } 225 226 void setImmKindLiteral() const { 227 assert(isImm()); 228 Imm.Kind = ImmKindTyLiteral; 229 } 230 231 void setImmKindConst() const { 232 assert(isImm()); 233 Imm.Kind = ImmKindTyConst; 234 } 235 236 bool IsImmKindLiteral() const { 237 return isImm() && Imm.Kind == ImmKindTyLiteral; 238 } 239 240 bool isImmKindConst() const { 241 return isImm() && Imm.Kind == ImmKindTyConst; 242 } 243 244 bool isInlinableImm(MVT type) const; 245 bool isLiteralImm(MVT type) const; 246 247 bool isRegKind() const { 248 return Kind == Register; 249 } 250 251 bool isReg() const override { 252 return isRegKind() && !hasModifiers(); 253 } 254 255 bool isRegOrInline(unsigned RCID, MVT type) const { 256 return isRegClass(RCID) || isInlinableImm(type); 257 } 258 259 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 260 return isRegOrInline(RCID, type) || isLiteralImm(type); 261 } 262 263 bool isRegOrImmWithInt16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 265 } 266 267 bool isRegOrImmWithInt32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 269 } 270 271 bool isRegOrInlineImmWithInt16InputMods() const { 272 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); 273 } 274 275 bool isRegOrInlineImmWithInt32InputMods() const { 276 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); 277 } 278 279 bool isRegOrImmWithInt64InputMods() const { 280 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 281 } 282 283 bool isRegOrImmWithFP16InputMods() const { 284 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 285 } 286 287 bool isRegOrImmWithFP32InputMods() const { 288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 289 } 290 291 bool isRegOrImmWithFP64InputMods() const { 292 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 293 } 294 295 bool isRegOrInlineImmWithFP16InputMods() const { 296 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16); 297 } 298 299 bool isRegOrInlineImmWithFP32InputMods() const { 300 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); 301 } 302 303 304 bool isVReg() const { 305 return isRegClass(AMDGPU::VGPR_32RegClassID) || 306 isRegClass(AMDGPU::VReg_64RegClassID) || 307 isRegClass(AMDGPU::VReg_96RegClassID) || 308 isRegClass(AMDGPU::VReg_128RegClassID) || 309 isRegClass(AMDGPU::VReg_160RegClassID) || 310 isRegClass(AMDGPU::VReg_192RegClassID) || 311 isRegClass(AMDGPU::VReg_256RegClassID) || 312 isRegClass(AMDGPU::VReg_512RegClassID) || 313 isRegClass(AMDGPU::VReg_1024RegClassID); 314 } 315 316 bool isVReg32() const { 317 return isRegClass(AMDGPU::VGPR_32RegClassID); 318 } 319 320 bool isVReg32OrOff() const { 321 return isOff() || isVReg32(); 322 } 323 324 bool isNull() const { 325 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 326 } 327 328 bool isVRegWithInputMods() const; 329 330 bool isSDWAOperand(MVT type) const; 331 bool isSDWAFP16Operand() const; 332 bool isSDWAFP32Operand() const; 333 bool isSDWAInt16Operand() const; 334 bool isSDWAInt32Operand() const; 335 336 bool isImmTy(ImmTy ImmT) const { 337 return isImm() && Imm.Type == ImmT; 338 } 339 340 bool isImmModifier() const { 341 return isImm() && Imm.Type != ImmTyNone; 342 } 343 344 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 345 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 346 bool isDMask() const { return isImmTy(ImmTyDMask); } 347 bool isDim() const { return isImmTy(ImmTyDim); } 348 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 349 bool isDA() const { return isImmTy(ImmTyDA); } 350 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 351 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 352 bool isLWE() const { return isImmTy(ImmTyLWE); } 353 bool isOff() const { return isImmTy(ImmTyOff); } 354 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 355 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 356 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 357 bool isOffen() const { return isImmTy(ImmTyOffen); } 358 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 359 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 360 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 361 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 362 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 363 364 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 365 bool isGDS() const { return isImmTy(ImmTyGDS); } 366 bool isLDS() const { return isImmTy(ImmTyLDS); } 367 bool isCPol() const { return isImmTy(ImmTyCPol); } 368 bool isSWZ() const { return isImmTy(ImmTySWZ); } 369 bool isTFE() const { return isImmTy(ImmTyTFE); } 370 bool isD16() const { return isImmTy(ImmTyD16); } 371 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 372 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 373 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 374 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 375 bool isFI() const { return isImmTy(ImmTyDppFi); } 376 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 377 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 378 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 379 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 380 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 381 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 382 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 383 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 384 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 385 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 386 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 387 bool isHigh() const { return isImmTy(ImmTyHigh); } 388 389 bool isMod() const { 390 return isClampSI() || isOModSI(); 391 } 392 393 bool isRegOrImm() const { 394 return isReg() || isImm(); 395 } 396 397 bool isRegClass(unsigned RCID) const; 398 399 bool isInlineValue() const; 400 401 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 402 return isRegOrInline(RCID, type) && !hasModifiers(); 403 } 404 405 bool isSCSrcB16() const { 406 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 407 } 408 409 bool isSCSrcV2B16() const { 410 return isSCSrcB16(); 411 } 412 413 bool isSCSrcB32() const { 414 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 415 } 416 417 bool isSCSrcB64() const { 418 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 419 } 420 421 bool isBoolReg() const; 422 423 bool isSCSrcF16() const { 424 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 425 } 426 427 bool isSCSrcV2F16() const { 428 return isSCSrcF16(); 429 } 430 431 bool isSCSrcF32() const { 432 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 433 } 434 435 bool isSCSrcF64() const { 436 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 437 } 438 439 bool isSSrcB32() const { 440 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 441 } 442 443 bool isSSrcB16() const { 444 return isSCSrcB16() || isLiteralImm(MVT::i16); 445 } 446 447 bool isSSrcV2B16() const { 448 llvm_unreachable("cannot happen"); 449 return isSSrcB16(); 450 } 451 452 bool isSSrcB64() const { 453 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 454 // See isVSrc64(). 455 return isSCSrcB64() || isLiteralImm(MVT::i64); 456 } 457 458 bool isSSrcF32() const { 459 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 460 } 461 462 bool isSSrcF64() const { 463 return isSCSrcB64() || isLiteralImm(MVT::f64); 464 } 465 466 bool isSSrcF16() const { 467 return isSCSrcB16() || isLiteralImm(MVT::f16); 468 } 469 470 bool isSSrcV2F16() const { 471 llvm_unreachable("cannot happen"); 472 return isSSrcF16(); 473 } 474 475 bool isSSrcV2FP32() const { 476 llvm_unreachable("cannot happen"); 477 return isSSrcF32(); 478 } 479 480 bool isSCSrcV2FP32() const { 481 llvm_unreachable("cannot happen"); 482 return isSCSrcF32(); 483 } 484 485 bool isSSrcV2INT32() const { 486 llvm_unreachable("cannot happen"); 487 return isSSrcB32(); 488 } 489 490 bool isSCSrcV2INT32() const { 491 llvm_unreachable("cannot happen"); 492 return isSCSrcB32(); 493 } 494 495 bool isSSrcOrLdsB32() const { 496 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 497 isLiteralImm(MVT::i32) || isExpr(); 498 } 499 500 bool isVCSrcB32() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 502 } 503 504 bool isVCSrcB64() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 506 } 507 508 bool isVCSrcB16() const { 509 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 510 } 511 512 bool isVCSrcV2B16() const { 513 return isVCSrcB16(); 514 } 515 516 bool isVCSrcF32() const { 517 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 518 } 519 520 bool isVCSrcF64() const { 521 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 522 } 523 524 bool isVCSrcF16() const { 525 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 526 } 527 528 bool isVCSrcV2F16() const { 529 return isVCSrcF16(); 530 } 531 532 bool isVSrcB32() const { 533 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 534 } 535 536 bool isVSrcB64() const { 537 return isVCSrcF64() || isLiteralImm(MVT::i64); 538 } 539 540 bool isVSrcB16() const { 541 return isVCSrcB16() || isLiteralImm(MVT::i16); 542 } 543 544 bool isVSrcV2B16() const { 545 return isVSrcB16() || isLiteralImm(MVT::v2i16); 546 } 547 548 bool isVCSrcV2FP32() const { 549 return isVCSrcF64(); 550 } 551 552 bool isVSrcV2FP32() const { 553 return isVSrcF64() || isLiteralImm(MVT::v2f32); 554 } 555 556 bool isVCSrcV2INT32() const { 557 return isVCSrcB64(); 558 } 559 560 bool isVSrcV2INT32() const { 561 return isVSrcB64() || isLiteralImm(MVT::v2i32); 562 } 563 564 bool isVSrcF32() const { 565 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 566 } 567 568 bool isVSrcF64() const { 569 return isVCSrcF64() || isLiteralImm(MVT::f64); 570 } 571 572 bool isVSrcF16() const { 573 return isVCSrcF16() || isLiteralImm(MVT::f16); 574 } 575 576 bool isVSrcV2F16() const { 577 return isVSrcF16() || isLiteralImm(MVT::v2f16); 578 } 579 580 bool isVISrcB32() const { 581 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 582 } 583 584 bool isVISrcB16() const { 585 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 586 } 587 588 bool isVISrcV2B16() const { 589 return isVISrcB16(); 590 } 591 592 bool isVISrcF32() const { 593 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 594 } 595 596 bool isVISrcF16() const { 597 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 598 } 599 600 bool isVISrcV2F16() const { 601 return isVISrcF16() || isVISrcB32(); 602 } 603 604 bool isVISrc_64B64() const { 605 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 606 } 607 608 bool isVISrc_64F64() const { 609 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 610 } 611 612 bool isVISrc_64V2FP32() const { 613 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 614 } 615 616 bool isVISrc_64V2INT32() const { 617 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 618 } 619 620 bool isVISrc_256B64() const { 621 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 622 } 623 624 bool isVISrc_256F64() const { 625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 626 } 627 628 bool isVISrc_128B16() const { 629 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 630 } 631 632 bool isVISrc_128V2B16() const { 633 return isVISrc_128B16(); 634 } 635 636 bool isVISrc_128B32() const { 637 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 638 } 639 640 bool isVISrc_128F32() const { 641 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 642 } 643 644 bool isVISrc_256V2FP32() const { 645 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 646 } 647 648 bool isVISrc_256V2INT32() const { 649 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 650 } 651 652 bool isVISrc_512B32() const { 653 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 654 } 655 656 bool isVISrc_512B16() const { 657 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 658 } 659 660 bool isVISrc_512V2B16() const { 661 return isVISrc_512B16(); 662 } 663 664 bool isVISrc_512F32() const { 665 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 666 } 667 668 bool isVISrc_512F16() const { 669 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 670 } 671 672 bool isVISrc_512V2F16() const { 673 return isVISrc_512F16() || isVISrc_512B32(); 674 } 675 676 bool isVISrc_1024B32() const { 677 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 678 } 679 680 bool isVISrc_1024B16() const { 681 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 682 } 683 684 bool isVISrc_1024V2B16() const { 685 return isVISrc_1024B16(); 686 } 687 688 bool isVISrc_1024F32() const { 689 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 690 } 691 692 bool isVISrc_1024F16() const { 693 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 694 } 695 696 bool isVISrc_1024V2F16() const { 697 return isVISrc_1024F16() || isVISrc_1024B32(); 698 } 699 700 bool isAISrcB32() const { 701 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 702 } 703 704 bool isAISrcB16() const { 705 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 706 } 707 708 bool isAISrcV2B16() const { 709 return isAISrcB16(); 710 } 711 712 bool isAISrcF32() const { 713 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 714 } 715 716 bool isAISrcF16() const { 717 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 718 } 719 720 bool isAISrcV2F16() const { 721 return isAISrcF16() || isAISrcB32(); 722 } 723 724 bool isAISrc_64B64() const { 725 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 726 } 727 728 bool isAISrc_64F64() const { 729 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 730 } 731 732 bool isAISrc_128B32() const { 733 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 734 } 735 736 bool isAISrc_128B16() const { 737 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 738 } 739 740 bool isAISrc_128V2B16() const { 741 return isAISrc_128B16(); 742 } 743 744 bool isAISrc_128F32() const { 745 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 746 } 747 748 bool isAISrc_128F16() const { 749 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 750 } 751 752 bool isAISrc_128V2F16() const { 753 return isAISrc_128F16() || isAISrc_128B32(); 754 } 755 756 bool isVISrc_128F16() const { 757 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 758 } 759 760 bool isVISrc_128V2F16() const { 761 return isVISrc_128F16() || isVISrc_128B32(); 762 } 763 764 bool isAISrc_256B64() const { 765 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 766 } 767 768 bool isAISrc_256F64() const { 769 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 770 } 771 772 bool isAISrc_512B32() const { 773 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 774 } 775 776 bool isAISrc_512B16() const { 777 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 778 } 779 780 bool isAISrc_512V2B16() const { 781 return isAISrc_512B16(); 782 } 783 784 bool isAISrc_512F32() const { 785 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 786 } 787 788 bool isAISrc_512F16() const { 789 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 790 } 791 792 bool isAISrc_512V2F16() const { 793 return isAISrc_512F16() || isAISrc_512B32(); 794 } 795 796 bool isAISrc_1024B32() const { 797 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 798 } 799 800 bool isAISrc_1024B16() const { 801 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 802 } 803 804 bool isAISrc_1024V2B16() const { 805 return isAISrc_1024B16(); 806 } 807 808 bool isAISrc_1024F32() const { 809 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 810 } 811 812 bool isAISrc_1024F16() const { 813 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 814 } 815 816 bool isAISrc_1024V2F16() const { 817 return isAISrc_1024F16() || isAISrc_1024B32(); 818 } 819 820 bool isKImmFP32() const { 821 return isLiteralImm(MVT::f32); 822 } 823 824 bool isKImmFP16() const { 825 return isLiteralImm(MVT::f16); 826 } 827 828 bool isMem() const override { 829 return false; 830 } 831 832 bool isExpr() const { 833 return Kind == Expression; 834 } 835 836 bool isSoppBrTarget() const { 837 return isExpr() || isImm(); 838 } 839 840 bool isSWaitCnt() const; 841 bool isDepCtr() const; 842 bool isSDelayAlu() const; 843 bool isHwreg() const; 844 bool isSendMsg() const; 845 bool isSwizzle() const; 846 bool isSMRDOffset8() const; 847 bool isSMEMOffset() const; 848 bool isSMRDLiteralOffset() const; 849 bool isDPP8() const; 850 bool isDPPCtrl() const; 851 bool isBLGP() const; 852 bool isCBSZ() const; 853 bool isABID() const; 854 bool isGPRIdxMode() const; 855 bool isS16Imm() const; 856 bool isU16Imm() const; 857 bool isEndpgm() const; 858 bool isWaitVDST() const; 859 bool isWaitEXP() const; 860 861 StringRef getExpressionAsToken() const { 862 assert(isExpr()); 863 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 864 return S->getSymbol().getName(); 865 } 866 867 StringRef getToken() const { 868 assert(isToken()); 869 870 if (Kind == Expression) 871 return getExpressionAsToken(); 872 873 return StringRef(Tok.Data, Tok.Length); 874 } 875 876 int64_t getImm() const { 877 assert(isImm()); 878 return Imm.Val; 879 } 880 881 void setImm(int64_t Val) { 882 assert(isImm()); 883 Imm.Val = Val; 884 } 885 886 ImmTy getImmTy() const { 887 assert(isImm()); 888 return Imm.Type; 889 } 890 891 unsigned getReg() const override { 892 assert(isRegKind()); 893 return Reg.RegNo; 894 } 895 896 SMLoc getStartLoc() const override { 897 return StartLoc; 898 } 899 900 SMLoc getEndLoc() const override { 901 return EndLoc; 902 } 903 904 SMRange getLocRange() const { 905 return SMRange(StartLoc, EndLoc); 906 } 907 908 Modifiers getModifiers() const { 909 assert(isRegKind() || isImmTy(ImmTyNone)); 910 return isRegKind() ? Reg.Mods : Imm.Mods; 911 } 912 913 void setModifiers(Modifiers Mods) { 914 assert(isRegKind() || isImmTy(ImmTyNone)); 915 if (isRegKind()) 916 Reg.Mods = Mods; 917 else 918 Imm.Mods = Mods; 919 } 920 921 bool hasModifiers() const { 922 return getModifiers().hasModifiers(); 923 } 924 925 bool hasFPModifiers() const { 926 return getModifiers().hasFPModifiers(); 927 } 928 929 bool hasIntModifiers() const { 930 return getModifiers().hasIntModifiers(); 931 } 932 933 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 934 935 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 936 937 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 938 939 template <unsigned Bitwidth> 940 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 941 942 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 943 addKImmFPOperands<16>(Inst, N); 944 } 945 946 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 947 addKImmFPOperands<32>(Inst, N); 948 } 949 950 void addRegOperands(MCInst &Inst, unsigned N) const; 951 952 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 953 addRegOperands(Inst, N); 954 } 955 956 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 957 if (isRegKind()) 958 addRegOperands(Inst, N); 959 else if (isExpr()) 960 Inst.addOperand(MCOperand::createExpr(Expr)); 961 else 962 addImmOperands(Inst, N); 963 } 964 965 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 966 Modifiers Mods = getModifiers(); 967 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 968 if (isRegKind()) { 969 addRegOperands(Inst, N); 970 } else { 971 addImmOperands(Inst, N, false); 972 } 973 } 974 975 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 976 assert(!hasIntModifiers()); 977 addRegOrImmWithInputModsOperands(Inst, N); 978 } 979 980 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 981 assert(!hasFPModifiers()); 982 addRegOrImmWithInputModsOperands(Inst, N); 983 } 984 985 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 986 Modifiers Mods = getModifiers(); 987 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 988 assert(isRegKind()); 989 addRegOperands(Inst, N); 990 } 991 992 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 993 assert(!hasIntModifiers()); 994 addRegWithInputModsOperands(Inst, N); 995 } 996 997 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 998 assert(!hasFPModifiers()); 999 addRegWithInputModsOperands(Inst, N); 1000 } 1001 1002 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 1003 if (isImm()) 1004 addImmOperands(Inst, N); 1005 else { 1006 assert(isExpr()); 1007 Inst.addOperand(MCOperand::createExpr(Expr)); 1008 } 1009 } 1010 1011 static void printImmTy(raw_ostream& OS, ImmTy Type) { 1012 switch (Type) { 1013 case ImmTyNone: OS << "None"; break; 1014 case ImmTyGDS: OS << "GDS"; break; 1015 case ImmTyLDS: OS << "LDS"; break; 1016 case ImmTyOffen: OS << "Offen"; break; 1017 case ImmTyIdxen: OS << "Idxen"; break; 1018 case ImmTyAddr64: OS << "Addr64"; break; 1019 case ImmTyOffset: OS << "Offset"; break; 1020 case ImmTyInstOffset: OS << "InstOffset"; break; 1021 case ImmTyOffset0: OS << "Offset0"; break; 1022 case ImmTyOffset1: OS << "Offset1"; break; 1023 case ImmTyCPol: OS << "CPol"; break; 1024 case ImmTySWZ: OS << "SWZ"; break; 1025 case ImmTyTFE: OS << "TFE"; break; 1026 case ImmTyD16: OS << "D16"; break; 1027 case ImmTyFORMAT: OS << "FORMAT"; break; 1028 case ImmTyClampSI: OS << "ClampSI"; break; 1029 case ImmTyOModSI: OS << "OModSI"; break; 1030 case ImmTyDPP8: OS << "DPP8"; break; 1031 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1032 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1033 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1034 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1035 case ImmTyDppFi: OS << "FI"; break; 1036 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1037 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1038 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1039 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1040 case ImmTyDMask: OS << "DMask"; break; 1041 case ImmTyDim: OS << "Dim"; break; 1042 case ImmTyUNorm: OS << "UNorm"; break; 1043 case ImmTyDA: OS << "DA"; break; 1044 case ImmTyR128A16: OS << "R128A16"; break; 1045 case ImmTyA16: OS << "A16"; break; 1046 case ImmTyLWE: OS << "LWE"; break; 1047 case ImmTyOff: OS << "Off"; break; 1048 case ImmTyExpTgt: OS << "ExpTgt"; break; 1049 case ImmTyExpCompr: OS << "ExpCompr"; break; 1050 case ImmTyExpVM: OS << "ExpVM"; break; 1051 case ImmTyHwreg: OS << "Hwreg"; break; 1052 case ImmTySendMsg: OS << "SendMsg"; break; 1053 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1054 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1055 case ImmTyAttrChan: OS << "AttrChan"; break; 1056 case ImmTyOpSel: OS << "OpSel"; break; 1057 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1058 case ImmTyNegLo: OS << "NegLo"; break; 1059 case ImmTyNegHi: OS << "NegHi"; break; 1060 case ImmTySwizzle: OS << "Swizzle"; break; 1061 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1062 case ImmTyHigh: OS << "High"; break; 1063 case ImmTyBLGP: OS << "BLGP"; break; 1064 case ImmTyCBSZ: OS << "CBSZ"; break; 1065 case ImmTyABID: OS << "ABID"; break; 1066 case ImmTyEndpgm: OS << "Endpgm"; break; 1067 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1068 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1069 } 1070 } 1071 1072 void print(raw_ostream &OS) const override { 1073 switch (Kind) { 1074 case Register: 1075 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1076 break; 1077 case Immediate: 1078 OS << '<' << getImm(); 1079 if (getImmTy() != ImmTyNone) { 1080 OS << " type: "; printImmTy(OS, getImmTy()); 1081 } 1082 OS << " mods: " << Imm.Mods << '>'; 1083 break; 1084 case Token: 1085 OS << '\'' << getToken() << '\''; 1086 break; 1087 case Expression: 1088 OS << "<expr " << *Expr << '>'; 1089 break; 1090 } 1091 } 1092 1093 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1094 int64_t Val, SMLoc Loc, 1095 ImmTy Type = ImmTyNone, 1096 bool IsFPImm = false) { 1097 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1098 Op->Imm.Val = Val; 1099 Op->Imm.IsFPImm = IsFPImm; 1100 Op->Imm.Kind = ImmKindTyNone; 1101 Op->Imm.Type = Type; 1102 Op->Imm.Mods = Modifiers(); 1103 Op->StartLoc = Loc; 1104 Op->EndLoc = Loc; 1105 return Op; 1106 } 1107 1108 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1109 StringRef Str, SMLoc Loc, 1110 bool HasExplicitEncodingSize = true) { 1111 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1112 Res->Tok.Data = Str.data(); 1113 Res->Tok.Length = Str.size(); 1114 Res->StartLoc = Loc; 1115 Res->EndLoc = Loc; 1116 return Res; 1117 } 1118 1119 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1120 unsigned RegNo, SMLoc S, 1121 SMLoc E) { 1122 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1123 Op->Reg.RegNo = RegNo; 1124 Op->Reg.Mods = Modifiers(); 1125 Op->StartLoc = S; 1126 Op->EndLoc = E; 1127 return Op; 1128 } 1129 1130 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1131 const class MCExpr *Expr, SMLoc S) { 1132 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1133 Op->Expr = Expr; 1134 Op->StartLoc = S; 1135 Op->EndLoc = S; 1136 return Op; 1137 } 1138 }; 1139 1140 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1141 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1142 return OS; 1143 } 1144 1145 //===----------------------------------------------------------------------===// 1146 // AsmParser 1147 //===----------------------------------------------------------------------===// 1148 1149 // Holds info related to the current kernel, e.g. count of SGPRs used. 1150 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1151 // .amdgpu_hsa_kernel or at EOF. 1152 class KernelScopeInfo { 1153 int SgprIndexUnusedMin = -1; 1154 int VgprIndexUnusedMin = -1; 1155 int AgprIndexUnusedMin = -1; 1156 MCContext *Ctx = nullptr; 1157 MCSubtargetInfo const *MSTI = nullptr; 1158 1159 void usesSgprAt(int i) { 1160 if (i >= SgprIndexUnusedMin) { 1161 SgprIndexUnusedMin = ++i; 1162 if (Ctx) { 1163 MCSymbol* const Sym = 1164 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1165 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1166 } 1167 } 1168 } 1169 1170 void usesVgprAt(int i) { 1171 if (i >= VgprIndexUnusedMin) { 1172 VgprIndexUnusedMin = ++i; 1173 if (Ctx) { 1174 MCSymbol* const Sym = 1175 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1176 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1177 VgprIndexUnusedMin); 1178 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1179 } 1180 } 1181 } 1182 1183 void usesAgprAt(int i) { 1184 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1185 if (!hasMAIInsts(*MSTI)) 1186 return; 1187 1188 if (i >= AgprIndexUnusedMin) { 1189 AgprIndexUnusedMin = ++i; 1190 if (Ctx) { 1191 MCSymbol* const Sym = 1192 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1193 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1194 1195 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1196 MCSymbol* const vSym = 1197 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1198 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1199 VgprIndexUnusedMin); 1200 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1201 } 1202 } 1203 } 1204 1205 public: 1206 KernelScopeInfo() = default; 1207 1208 void initialize(MCContext &Context) { 1209 Ctx = &Context; 1210 MSTI = Ctx->getSubtargetInfo(); 1211 1212 usesSgprAt(SgprIndexUnusedMin = -1); 1213 usesVgprAt(VgprIndexUnusedMin = -1); 1214 if (hasMAIInsts(*MSTI)) { 1215 usesAgprAt(AgprIndexUnusedMin = -1); 1216 } 1217 } 1218 1219 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1220 unsigned RegWidth) { 1221 switch (RegKind) { 1222 case IS_SGPR: 1223 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1224 break; 1225 case IS_AGPR: 1226 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1227 break; 1228 case IS_VGPR: 1229 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1230 break; 1231 default: 1232 break; 1233 } 1234 } 1235 }; 1236 1237 class AMDGPUAsmParser : public MCTargetAsmParser { 1238 MCAsmParser &Parser; 1239 1240 // Number of extra operands parsed after the first optional operand. 1241 // This may be necessary to skip hardcoded mandatory operands. 1242 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1243 1244 unsigned ForcedEncodingSize = 0; 1245 bool ForcedDPP = false; 1246 bool ForcedSDWA = false; 1247 KernelScopeInfo KernelScope; 1248 unsigned CPolSeen; 1249 1250 /// @name Auto-generated Match Functions 1251 /// { 1252 1253 #define GET_ASSEMBLER_HEADER 1254 #include "AMDGPUGenAsmMatcher.inc" 1255 1256 /// } 1257 1258 private: 1259 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1260 bool OutOfRangeError(SMRange Range); 1261 /// Calculate VGPR/SGPR blocks required for given target, reserved 1262 /// registers, and user-specified NextFreeXGPR values. 1263 /// 1264 /// \param Features [in] Target features, used for bug corrections. 1265 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1266 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1267 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1268 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1269 /// descriptor field, if valid. 1270 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1271 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1272 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1273 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1274 /// \param VGPRBlocks [out] Result VGPR block count. 1275 /// \param SGPRBlocks [out] Result SGPR block count. 1276 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1277 bool FlatScrUsed, bool XNACKUsed, 1278 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1279 SMRange VGPRRange, unsigned NextFreeSGPR, 1280 SMRange SGPRRange, unsigned &VGPRBlocks, 1281 unsigned &SGPRBlocks); 1282 bool ParseDirectiveAMDGCNTarget(); 1283 bool ParseDirectiveAMDHSAKernel(); 1284 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1285 bool ParseDirectiveHSACodeObjectVersion(); 1286 bool ParseDirectiveHSACodeObjectISA(); 1287 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1288 bool ParseDirectiveAMDKernelCodeT(); 1289 // TODO: Possibly make subtargetHasRegister const. 1290 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1291 bool ParseDirectiveAMDGPUHsaKernel(); 1292 1293 bool ParseDirectiveISAVersion(); 1294 bool ParseDirectiveHSAMetadata(); 1295 bool ParseDirectivePALMetadataBegin(); 1296 bool ParseDirectivePALMetadata(); 1297 bool ParseDirectiveAMDGPULDS(); 1298 1299 /// Common code to parse out a block of text (typically YAML) between start and 1300 /// end directives. 1301 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1302 const char *AssemblerDirectiveEnd, 1303 std::string &CollectString); 1304 1305 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1306 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1307 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1308 unsigned &RegNum, unsigned &RegWidth, 1309 bool RestoreOnFailure = false); 1310 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1311 unsigned &RegNum, unsigned &RegWidth, 1312 SmallVectorImpl<AsmToken> &Tokens); 1313 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1314 unsigned &RegWidth, 1315 SmallVectorImpl<AsmToken> &Tokens); 1316 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1317 unsigned &RegWidth, 1318 SmallVectorImpl<AsmToken> &Tokens); 1319 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1320 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1321 bool ParseRegRange(unsigned& Num, unsigned& Width); 1322 unsigned getRegularReg(RegisterKind RegKind, 1323 unsigned RegNum, 1324 unsigned RegWidth, 1325 SMLoc Loc); 1326 1327 bool isRegister(); 1328 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1329 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1330 void initializeGprCountSymbol(RegisterKind RegKind); 1331 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1332 unsigned RegWidth); 1333 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1334 bool IsAtomic, bool IsLds = false); 1335 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1336 bool IsGdsHardcoded); 1337 1338 public: 1339 enum AMDGPUMatchResultTy { 1340 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1341 }; 1342 enum OperandMode { 1343 OperandMode_Default, 1344 OperandMode_NSA, 1345 }; 1346 1347 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1348 1349 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1350 const MCInstrInfo &MII, 1351 const MCTargetOptions &Options) 1352 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1353 MCAsmParserExtension::Initialize(Parser); 1354 1355 if (getFeatureBits().none()) { 1356 // Set default features. 1357 copySTI().ToggleFeature("southern-islands"); 1358 } 1359 1360 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1361 1362 { 1363 // TODO: make those pre-defined variables read-only. 1364 // Currently there is none suitable machinery in the core llvm-mc for this. 1365 // MCSymbol::isRedefinable is intended for another purpose, and 1366 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1367 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1368 MCContext &Ctx = getContext(); 1369 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1370 MCSymbol *Sym = 1371 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1372 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1373 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1374 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1375 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1376 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1377 } else { 1378 MCSymbol *Sym = 1379 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1380 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1381 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1382 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1383 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1384 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1385 } 1386 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1387 initializeGprCountSymbol(IS_VGPR); 1388 initializeGprCountSymbol(IS_SGPR); 1389 } else 1390 KernelScope.initialize(getContext()); 1391 } 1392 } 1393 1394 bool hasMIMG_R128() const { 1395 return AMDGPU::hasMIMG_R128(getSTI()); 1396 } 1397 1398 bool hasPackedD16() const { 1399 return AMDGPU::hasPackedD16(getSTI()); 1400 } 1401 1402 bool hasGFX10A16() const { 1403 return AMDGPU::hasGFX10A16(getSTI()); 1404 } 1405 1406 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1407 1408 bool isSI() const { 1409 return AMDGPU::isSI(getSTI()); 1410 } 1411 1412 bool isCI() const { 1413 return AMDGPU::isCI(getSTI()); 1414 } 1415 1416 bool isVI() const { 1417 return AMDGPU::isVI(getSTI()); 1418 } 1419 1420 bool isGFX9() const { 1421 return AMDGPU::isGFX9(getSTI()); 1422 } 1423 1424 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1425 bool isGFX90A() const { 1426 return AMDGPU::isGFX90A(getSTI()); 1427 } 1428 1429 bool isGFX940() const { 1430 return AMDGPU::isGFX940(getSTI()); 1431 } 1432 1433 bool isGFX9Plus() const { 1434 return AMDGPU::isGFX9Plus(getSTI()); 1435 } 1436 1437 bool isGFX10() const { 1438 return AMDGPU::isGFX10(getSTI()); 1439 } 1440 1441 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1442 1443 bool isGFX11() const { 1444 return AMDGPU::isGFX11(getSTI()); 1445 } 1446 1447 bool isGFX11Plus() const { 1448 return AMDGPU::isGFX11Plus(getSTI()); 1449 } 1450 1451 bool isGFX10_BEncoding() const { 1452 return AMDGPU::isGFX10_BEncoding(getSTI()); 1453 } 1454 1455 bool hasInv2PiInlineImm() const { 1456 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1457 } 1458 1459 bool hasFlatOffsets() const { 1460 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1461 } 1462 1463 bool hasArchitectedFlatScratch() const { 1464 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1465 } 1466 1467 bool hasSGPR102_SGPR103() const { 1468 return !isVI() && !isGFX9(); 1469 } 1470 1471 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1472 1473 bool hasIntClamp() const { 1474 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1475 } 1476 1477 AMDGPUTargetStreamer &getTargetStreamer() { 1478 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1479 return static_cast<AMDGPUTargetStreamer &>(TS); 1480 } 1481 1482 const MCRegisterInfo *getMRI() const { 1483 // We need this const_cast because for some reason getContext() is not const 1484 // in MCAsmParser. 1485 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1486 } 1487 1488 const MCInstrInfo *getMII() const { 1489 return &MII; 1490 } 1491 1492 const FeatureBitset &getFeatureBits() const { 1493 return getSTI().getFeatureBits(); 1494 } 1495 1496 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1497 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1498 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1499 1500 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1501 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1502 bool isForcedDPP() const { return ForcedDPP; } 1503 bool isForcedSDWA() const { return ForcedSDWA; } 1504 ArrayRef<unsigned> getMatchedVariants() const; 1505 StringRef getMatchedVariantName() const; 1506 1507 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1508 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1509 bool RestoreOnFailure); 1510 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1511 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1512 SMLoc &EndLoc) override; 1513 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1514 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1515 unsigned Kind) override; 1516 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1517 OperandVector &Operands, MCStreamer &Out, 1518 uint64_t &ErrorInfo, 1519 bool MatchingInlineAsm) override; 1520 bool ParseDirective(AsmToken DirectiveID) override; 1521 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1522 OperandMode Mode = OperandMode_Default); 1523 StringRef parseMnemonicSuffix(StringRef Name); 1524 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1525 SMLoc NameLoc, OperandVector &Operands) override; 1526 //bool ProcessInstruction(MCInst &Inst); 1527 1528 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1529 1530 OperandMatchResultTy 1531 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1532 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1533 bool (*ConvertResult)(int64_t &) = nullptr); 1534 1535 OperandMatchResultTy 1536 parseOperandArrayWithPrefix(const char *Prefix, 1537 OperandVector &Operands, 1538 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1539 bool (*ConvertResult)(int64_t&) = nullptr); 1540 1541 OperandMatchResultTy 1542 parseNamedBit(StringRef Name, OperandVector &Operands, 1543 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1544 OperandMatchResultTy parseCPol(OperandVector &Operands); 1545 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1546 StringRef &Value, 1547 SMLoc &StringLoc); 1548 1549 bool isModifier(); 1550 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1551 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1552 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1553 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1554 bool parseSP3NegModifier(); 1555 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1556 OperandMatchResultTy parseReg(OperandVector &Operands); 1557 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1558 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1559 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1560 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1561 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1562 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1563 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1564 OperandMatchResultTy parseUfmt(int64_t &Format); 1565 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1566 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1567 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1568 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1569 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1570 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1571 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1572 1573 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1574 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1575 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1576 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1577 1578 bool parseCnt(int64_t &IntVal); 1579 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1580 1581 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1582 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1583 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1584 1585 bool parseDelay(int64_t &Delay); 1586 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands); 1587 1588 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1589 1590 private: 1591 struct OperandInfoTy { 1592 SMLoc Loc; 1593 int64_t Id; 1594 bool IsSymbolic = false; 1595 bool IsDefined = false; 1596 1597 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1598 }; 1599 1600 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1601 bool validateSendMsg(const OperandInfoTy &Msg, 1602 const OperandInfoTy &Op, 1603 const OperandInfoTy &Stream); 1604 1605 bool parseHwregBody(OperandInfoTy &HwReg, 1606 OperandInfoTy &Offset, 1607 OperandInfoTy &Width); 1608 bool validateHwreg(const OperandInfoTy &HwReg, 1609 const OperandInfoTy &Offset, 1610 const OperandInfoTy &Width); 1611 1612 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1613 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1614 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1615 1616 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1617 const OperandVector &Operands) const; 1618 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1619 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1620 SMLoc getLitLoc(const OperandVector &Operands) const; 1621 SMLoc getConstLoc(const OperandVector &Operands) const; 1622 1623 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1624 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1625 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1626 bool validateSOPLiteral(const MCInst &Inst) const; 1627 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1628 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1629 bool validateIntClampSupported(const MCInst &Inst); 1630 bool validateMIMGAtomicDMask(const MCInst &Inst); 1631 bool validateMIMGGatherDMask(const MCInst &Inst); 1632 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1633 Optional<StringRef> validateMIMGDataSize(const MCInst &Inst); 1634 bool validateMIMGAddrSize(const MCInst &Inst); 1635 bool validateMIMGD16(const MCInst &Inst); 1636 bool validateMIMGDim(const MCInst &Inst); 1637 bool validateMIMGMSAA(const MCInst &Inst); 1638 bool validateOpSel(const MCInst &Inst); 1639 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1640 bool validateVccOperand(unsigned Reg) const; 1641 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1642 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1643 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1644 bool validateAGPRLdSt(const MCInst &Inst) const; 1645 bool validateVGPRAlign(const MCInst &Inst) const; 1646 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1647 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1648 bool validateDivScale(const MCInst &Inst); 1649 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1650 const SMLoc &IDLoc); 1651 bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands, 1652 const SMLoc &IDLoc); 1653 bool validateExeczVcczOperands(const OperandVector &Operands); 1654 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1655 unsigned getConstantBusLimit(unsigned Opcode) const; 1656 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1657 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1658 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1659 1660 bool isSupportedMnemo(StringRef Mnemo, 1661 const FeatureBitset &FBS); 1662 bool isSupportedMnemo(StringRef Mnemo, 1663 const FeatureBitset &FBS, 1664 ArrayRef<unsigned> Variants); 1665 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1666 1667 bool isId(const StringRef Id) const; 1668 bool isId(const AsmToken &Token, const StringRef Id) const; 1669 bool isToken(const AsmToken::TokenKind Kind) const; 1670 bool trySkipId(const StringRef Id); 1671 bool trySkipId(const StringRef Pref, const StringRef Id); 1672 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1673 bool trySkipToken(const AsmToken::TokenKind Kind); 1674 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1675 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1676 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1677 1678 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1679 AsmToken::TokenKind getTokenKind() const; 1680 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1681 bool parseExpr(OperandVector &Operands); 1682 StringRef getTokenStr() const; 1683 AsmToken peekToken(); 1684 AsmToken getToken() const; 1685 SMLoc getLoc() const; 1686 void lex(); 1687 1688 public: 1689 void onBeginOfFile() override; 1690 1691 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1692 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1693 1694 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1695 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1696 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1697 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1698 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1699 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1700 1701 bool parseSwizzleOperand(int64_t &Op, 1702 const unsigned MinVal, 1703 const unsigned MaxVal, 1704 const StringRef ErrMsg, 1705 SMLoc &Loc); 1706 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1707 const unsigned MinVal, 1708 const unsigned MaxVal, 1709 const StringRef ErrMsg); 1710 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1711 bool parseSwizzleOffset(int64_t &Imm); 1712 bool parseSwizzleMacro(int64_t &Imm); 1713 bool parseSwizzleQuadPerm(int64_t &Imm); 1714 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1715 bool parseSwizzleBroadcast(int64_t &Imm); 1716 bool parseSwizzleSwap(int64_t &Imm); 1717 bool parseSwizzleReverse(int64_t &Imm); 1718 1719 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1720 int64_t parseGPRIdxMacro(); 1721 1722 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1723 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1724 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1725 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1726 1727 AMDGPUOperand::Ptr defaultCPol() const; 1728 1729 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1730 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1731 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1732 AMDGPUOperand::Ptr defaultFlatOffset() const; 1733 1734 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1735 1736 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1737 OptionalImmIndexMap &OptionalIdx); 1738 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1739 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1740 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1741 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1742 OptionalImmIndexMap &OptionalIdx); 1743 1744 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1745 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1746 1747 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1748 bool IsAtomic = false); 1749 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1750 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1751 1752 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1753 1754 bool parseDimId(unsigned &Encoding); 1755 OperandMatchResultTy parseDim(OperandVector &Operands); 1756 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1757 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1758 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1759 int64_t parseDPPCtrlSel(StringRef Ctrl); 1760 int64_t parseDPPCtrlPerm(); 1761 AMDGPUOperand::Ptr defaultRowMask() const; 1762 AMDGPUOperand::Ptr defaultBankMask() const; 1763 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1764 AMDGPUOperand::Ptr defaultFI() const; 1765 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1766 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { 1767 cvtDPP(Inst, Operands, true); 1768 } 1769 void cvtVOPCNoDstDPP(MCInst &Inst, const OperandVector &Operands, 1770 bool IsDPP8 = false); 1771 void cvtVOPCNoDstDPP8(MCInst &Inst, const OperandVector &Operands) { 1772 cvtVOPCNoDstDPP(Inst, Operands, true); 1773 } 1774 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 1775 bool IsDPP8 = false); 1776 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { 1777 cvtVOP3DPP(Inst, Operands, true); 1778 } 1779 void cvtVOPC64NoDstDPP(MCInst &Inst, const OperandVector &Operands, 1780 bool IsDPP8 = false); 1781 void cvtVOPC64NoDstDPP8(MCInst &Inst, const OperandVector &Operands) { 1782 cvtVOPC64NoDstDPP(Inst, Operands, true); 1783 } 1784 1785 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1786 AMDGPUOperand::ImmTy Type); 1787 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1788 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1789 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1790 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1791 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1792 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1793 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1794 uint64_t BasicInstType, 1795 bool SkipDstVcc = false, 1796 bool SkipSrcVcc = false); 1797 1798 AMDGPUOperand::Ptr defaultBLGP() const; 1799 AMDGPUOperand::Ptr defaultCBSZ() const; 1800 AMDGPUOperand::Ptr defaultABID() const; 1801 1802 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1803 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1804 1805 AMDGPUOperand::Ptr defaultWaitVDST() const; 1806 AMDGPUOperand::Ptr defaultWaitEXP() const; 1807 }; 1808 1809 struct OptionalOperand { 1810 const char *Name; 1811 AMDGPUOperand::ImmTy Type; 1812 bool IsBit; 1813 bool (*ConvertResult)(int64_t&); 1814 }; 1815 1816 } // end anonymous namespace 1817 1818 // May be called with integer type with equivalent bitwidth. 1819 static const fltSemantics *getFltSemantics(unsigned Size) { 1820 switch (Size) { 1821 case 4: 1822 return &APFloat::IEEEsingle(); 1823 case 8: 1824 return &APFloat::IEEEdouble(); 1825 case 2: 1826 return &APFloat::IEEEhalf(); 1827 default: 1828 llvm_unreachable("unsupported fp type"); 1829 } 1830 } 1831 1832 static const fltSemantics *getFltSemantics(MVT VT) { 1833 return getFltSemantics(VT.getSizeInBits() / 8); 1834 } 1835 1836 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1837 switch (OperandType) { 1838 case AMDGPU::OPERAND_REG_IMM_INT32: 1839 case AMDGPU::OPERAND_REG_IMM_FP32: 1840 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1841 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1842 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1843 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1844 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1845 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1846 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1847 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1848 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1849 case AMDGPU::OPERAND_KIMM32: 1850 return &APFloat::IEEEsingle(); 1851 case AMDGPU::OPERAND_REG_IMM_INT64: 1852 case AMDGPU::OPERAND_REG_IMM_FP64: 1853 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1854 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1855 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1856 return &APFloat::IEEEdouble(); 1857 case AMDGPU::OPERAND_REG_IMM_INT16: 1858 case AMDGPU::OPERAND_REG_IMM_FP16: 1859 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1860 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1861 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1862 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1863 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1864 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1865 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1866 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1867 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1868 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1869 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1870 case AMDGPU::OPERAND_KIMM16: 1871 return &APFloat::IEEEhalf(); 1872 default: 1873 llvm_unreachable("unsupported fp type"); 1874 } 1875 } 1876 1877 //===----------------------------------------------------------------------===// 1878 // Operand 1879 //===----------------------------------------------------------------------===// 1880 1881 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1882 bool Lost; 1883 1884 // Convert literal to single precision 1885 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1886 APFloat::rmNearestTiesToEven, 1887 &Lost); 1888 // We allow precision lost but not overflow or underflow 1889 if (Status != APFloat::opOK && 1890 Lost && 1891 ((Status & APFloat::opOverflow) != 0 || 1892 (Status & APFloat::opUnderflow) != 0)) { 1893 return false; 1894 } 1895 1896 return true; 1897 } 1898 1899 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1900 return isUIntN(Size, Val) || isIntN(Size, Val); 1901 } 1902 1903 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1904 if (VT.getScalarType() == MVT::i16) { 1905 // FP immediate values are broken. 1906 return isInlinableIntLiteral(Val); 1907 } 1908 1909 // f16/v2f16 operands work correctly for all values. 1910 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1911 } 1912 1913 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1914 1915 // This is a hack to enable named inline values like 1916 // shared_base with both 32-bit and 64-bit operands. 1917 // Note that these values are defined as 1918 // 32-bit operands only. 1919 if (isInlineValue()) { 1920 return true; 1921 } 1922 1923 if (!isImmTy(ImmTyNone)) { 1924 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1925 return false; 1926 } 1927 // TODO: We should avoid using host float here. It would be better to 1928 // check the float bit values which is what a few other places do. 1929 // We've had bot failures before due to weird NaN support on mips hosts. 1930 1931 APInt Literal(64, Imm.Val); 1932 1933 if (Imm.IsFPImm) { // We got fp literal token 1934 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1935 return AMDGPU::isInlinableLiteral64(Imm.Val, 1936 AsmParser->hasInv2PiInlineImm()); 1937 } 1938 1939 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1940 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1941 return false; 1942 1943 if (type.getScalarSizeInBits() == 16) { 1944 return isInlineableLiteralOp16( 1945 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1946 type, AsmParser->hasInv2PiInlineImm()); 1947 } 1948 1949 // Check if single precision literal is inlinable 1950 return AMDGPU::isInlinableLiteral32( 1951 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1952 AsmParser->hasInv2PiInlineImm()); 1953 } 1954 1955 // We got int literal token. 1956 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1957 return AMDGPU::isInlinableLiteral64(Imm.Val, 1958 AsmParser->hasInv2PiInlineImm()); 1959 } 1960 1961 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1962 return false; 1963 } 1964 1965 if (type.getScalarSizeInBits() == 16) { 1966 return isInlineableLiteralOp16( 1967 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1968 type, AsmParser->hasInv2PiInlineImm()); 1969 } 1970 1971 return AMDGPU::isInlinableLiteral32( 1972 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1973 AsmParser->hasInv2PiInlineImm()); 1974 } 1975 1976 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1977 // Check that this immediate can be added as literal 1978 if (!isImmTy(ImmTyNone)) { 1979 return false; 1980 } 1981 1982 if (!Imm.IsFPImm) { 1983 // We got int literal token. 1984 1985 if (type == MVT::f64 && hasFPModifiers()) { 1986 // Cannot apply fp modifiers to int literals preserving the same semantics 1987 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1988 // disable these cases. 1989 return false; 1990 } 1991 1992 unsigned Size = type.getSizeInBits(); 1993 if (Size == 64) 1994 Size = 32; 1995 1996 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1997 // types. 1998 return isSafeTruncation(Imm.Val, Size); 1999 } 2000 2001 // We got fp literal token 2002 if (type == MVT::f64) { // Expected 64-bit fp operand 2003 // We would set low 64-bits of literal to zeroes but we accept this literals 2004 return true; 2005 } 2006 2007 if (type == MVT::i64) { // Expected 64-bit int operand 2008 // We don't allow fp literals in 64-bit integer instructions. It is 2009 // unclear how we should encode them. 2010 return false; 2011 } 2012 2013 // We allow fp literals with f16x2 operands assuming that the specified 2014 // literal goes into the lower half and the upper half is zero. We also 2015 // require that the literal may be losslessly converted to f16. 2016 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 2017 (type == MVT::v2i16)? MVT::i16 : 2018 (type == MVT::v2f32)? MVT::f32 : type; 2019 2020 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 2021 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 2022 } 2023 2024 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 2025 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 2026 } 2027 2028 bool AMDGPUOperand::isVRegWithInputMods() const { 2029 return isRegClass(AMDGPU::VGPR_32RegClassID) || 2030 // GFX90A allows DPP on 64-bit operands. 2031 (isRegClass(AMDGPU::VReg_64RegClassID) && 2032 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 2033 } 2034 2035 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 2036 if (AsmParser->isVI()) 2037 return isVReg32(); 2038 else if (AsmParser->isGFX9Plus()) 2039 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2040 else 2041 return false; 2042 } 2043 2044 bool AMDGPUOperand::isSDWAFP16Operand() const { 2045 return isSDWAOperand(MVT::f16); 2046 } 2047 2048 bool AMDGPUOperand::isSDWAFP32Operand() const { 2049 return isSDWAOperand(MVT::f32); 2050 } 2051 2052 bool AMDGPUOperand::isSDWAInt16Operand() const { 2053 return isSDWAOperand(MVT::i16); 2054 } 2055 2056 bool AMDGPUOperand::isSDWAInt32Operand() const { 2057 return isSDWAOperand(MVT::i32); 2058 } 2059 2060 bool AMDGPUOperand::isBoolReg() const { 2061 auto FB = AsmParser->getFeatureBits(); 2062 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2063 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2064 } 2065 2066 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2067 { 2068 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2069 assert(Size == 2 || Size == 4 || Size == 8); 2070 2071 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2072 2073 if (Imm.Mods.Abs) { 2074 Val &= ~FpSignMask; 2075 } 2076 if (Imm.Mods.Neg) { 2077 Val ^= FpSignMask; 2078 } 2079 2080 return Val; 2081 } 2082 2083 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2084 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2085 Inst.getNumOperands())) { 2086 addLiteralImmOperand(Inst, Imm.Val, 2087 ApplyModifiers & 2088 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2089 } else { 2090 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2091 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2092 setImmKindNone(); 2093 } 2094 } 2095 2096 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2097 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2098 auto OpNum = Inst.getNumOperands(); 2099 // Check that this operand accepts literals 2100 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2101 2102 if (ApplyModifiers) { 2103 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2104 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2105 Val = applyInputFPModifiers(Val, Size); 2106 } 2107 2108 APInt Literal(64, Val); 2109 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2110 2111 if (Imm.IsFPImm) { // We got fp literal token 2112 switch (OpTy) { 2113 case AMDGPU::OPERAND_REG_IMM_INT64: 2114 case AMDGPU::OPERAND_REG_IMM_FP64: 2115 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2116 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2117 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2118 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2119 AsmParser->hasInv2PiInlineImm())) { 2120 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2121 setImmKindConst(); 2122 return; 2123 } 2124 2125 // Non-inlineable 2126 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2127 // For fp operands we check if low 32 bits are zeros 2128 if (Literal.getLoBits(32) != 0) { 2129 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2130 "Can't encode literal as exact 64-bit floating-point operand. " 2131 "Low 32-bits will be set to zero"); 2132 } 2133 2134 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2135 setImmKindLiteral(); 2136 return; 2137 } 2138 2139 // We don't allow fp literals in 64-bit integer instructions. It is 2140 // unclear how we should encode them. This case should be checked earlier 2141 // in predicate methods (isLiteralImm()) 2142 llvm_unreachable("fp literal in 64-bit integer instruction."); 2143 2144 case AMDGPU::OPERAND_REG_IMM_INT32: 2145 case AMDGPU::OPERAND_REG_IMM_FP32: 2146 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2147 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2148 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2149 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2150 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2151 case AMDGPU::OPERAND_REG_IMM_INT16: 2152 case AMDGPU::OPERAND_REG_IMM_FP16: 2153 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2154 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2155 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2156 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2157 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2158 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2159 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2160 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2161 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2162 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2163 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2164 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2165 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2166 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2167 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2168 case AMDGPU::OPERAND_KIMM32: 2169 case AMDGPU::OPERAND_KIMM16: { 2170 bool lost; 2171 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2172 // Convert literal to single precision 2173 FPLiteral.convert(*getOpFltSemantics(OpTy), 2174 APFloat::rmNearestTiesToEven, &lost); 2175 // We allow precision lost but not overflow or underflow. This should be 2176 // checked earlier in isLiteralImm() 2177 2178 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2179 Inst.addOperand(MCOperand::createImm(ImmVal)); 2180 setImmKindLiteral(); 2181 return; 2182 } 2183 default: 2184 llvm_unreachable("invalid operand size"); 2185 } 2186 2187 return; 2188 } 2189 2190 // We got int literal token. 2191 // Only sign extend inline immediates. 2192 switch (OpTy) { 2193 case AMDGPU::OPERAND_REG_IMM_INT32: 2194 case AMDGPU::OPERAND_REG_IMM_FP32: 2195 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2196 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2197 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2198 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2199 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2200 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2201 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2202 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2203 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2204 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2205 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2206 if (isSafeTruncation(Val, 32) && 2207 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2208 AsmParser->hasInv2PiInlineImm())) { 2209 Inst.addOperand(MCOperand::createImm(Val)); 2210 setImmKindConst(); 2211 return; 2212 } 2213 2214 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2215 setImmKindLiteral(); 2216 return; 2217 2218 case AMDGPU::OPERAND_REG_IMM_INT64: 2219 case AMDGPU::OPERAND_REG_IMM_FP64: 2220 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2221 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2222 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2223 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2224 Inst.addOperand(MCOperand::createImm(Val)); 2225 setImmKindConst(); 2226 return; 2227 } 2228 2229 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2230 setImmKindLiteral(); 2231 return; 2232 2233 case AMDGPU::OPERAND_REG_IMM_INT16: 2234 case AMDGPU::OPERAND_REG_IMM_FP16: 2235 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2236 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2237 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2238 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2239 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2240 if (isSafeTruncation(Val, 16) && 2241 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2242 AsmParser->hasInv2PiInlineImm())) { 2243 Inst.addOperand(MCOperand::createImm(Val)); 2244 setImmKindConst(); 2245 return; 2246 } 2247 2248 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2249 setImmKindLiteral(); 2250 return; 2251 2252 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2253 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2254 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2255 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2256 assert(isSafeTruncation(Val, 16)); 2257 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2258 AsmParser->hasInv2PiInlineImm())); 2259 2260 Inst.addOperand(MCOperand::createImm(Val)); 2261 return; 2262 } 2263 case AMDGPU::OPERAND_KIMM32: 2264 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2265 setImmKindNone(); 2266 return; 2267 case AMDGPU::OPERAND_KIMM16: 2268 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2269 setImmKindNone(); 2270 return; 2271 default: 2272 llvm_unreachable("invalid operand size"); 2273 } 2274 } 2275 2276 template <unsigned Bitwidth> 2277 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2278 APInt Literal(64, Imm.Val); 2279 setImmKindNone(); 2280 2281 if (!Imm.IsFPImm) { 2282 // We got int literal token. 2283 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2284 return; 2285 } 2286 2287 bool Lost; 2288 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2289 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2290 APFloat::rmNearestTiesToEven, &Lost); 2291 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2292 } 2293 2294 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2295 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2296 } 2297 2298 static bool isInlineValue(unsigned Reg) { 2299 switch (Reg) { 2300 case AMDGPU::SRC_SHARED_BASE: 2301 case AMDGPU::SRC_SHARED_LIMIT: 2302 case AMDGPU::SRC_PRIVATE_BASE: 2303 case AMDGPU::SRC_PRIVATE_LIMIT: 2304 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2305 return true; 2306 case AMDGPU::SRC_VCCZ: 2307 case AMDGPU::SRC_EXECZ: 2308 case AMDGPU::SRC_SCC: 2309 return true; 2310 case AMDGPU::SGPR_NULL: 2311 return true; 2312 default: 2313 return false; 2314 } 2315 } 2316 2317 bool AMDGPUOperand::isInlineValue() const { 2318 return isRegKind() && ::isInlineValue(getReg()); 2319 } 2320 2321 //===----------------------------------------------------------------------===// 2322 // AsmParser 2323 //===----------------------------------------------------------------------===// 2324 2325 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2326 if (Is == IS_VGPR) { 2327 switch (RegWidth) { 2328 default: return -1; 2329 case 32: 2330 return AMDGPU::VGPR_32RegClassID; 2331 case 64: 2332 return AMDGPU::VReg_64RegClassID; 2333 case 96: 2334 return AMDGPU::VReg_96RegClassID; 2335 case 128: 2336 return AMDGPU::VReg_128RegClassID; 2337 case 160: 2338 return AMDGPU::VReg_160RegClassID; 2339 case 192: 2340 return AMDGPU::VReg_192RegClassID; 2341 case 224: 2342 return AMDGPU::VReg_224RegClassID; 2343 case 256: 2344 return AMDGPU::VReg_256RegClassID; 2345 case 512: 2346 return AMDGPU::VReg_512RegClassID; 2347 case 1024: 2348 return AMDGPU::VReg_1024RegClassID; 2349 } 2350 } else if (Is == IS_TTMP) { 2351 switch (RegWidth) { 2352 default: return -1; 2353 case 32: 2354 return AMDGPU::TTMP_32RegClassID; 2355 case 64: 2356 return AMDGPU::TTMP_64RegClassID; 2357 case 128: 2358 return AMDGPU::TTMP_128RegClassID; 2359 case 256: 2360 return AMDGPU::TTMP_256RegClassID; 2361 case 512: 2362 return AMDGPU::TTMP_512RegClassID; 2363 } 2364 } else if (Is == IS_SGPR) { 2365 switch (RegWidth) { 2366 default: return -1; 2367 case 32: 2368 return AMDGPU::SGPR_32RegClassID; 2369 case 64: 2370 return AMDGPU::SGPR_64RegClassID; 2371 case 96: 2372 return AMDGPU::SGPR_96RegClassID; 2373 case 128: 2374 return AMDGPU::SGPR_128RegClassID; 2375 case 160: 2376 return AMDGPU::SGPR_160RegClassID; 2377 case 192: 2378 return AMDGPU::SGPR_192RegClassID; 2379 case 224: 2380 return AMDGPU::SGPR_224RegClassID; 2381 case 256: 2382 return AMDGPU::SGPR_256RegClassID; 2383 case 512: 2384 return AMDGPU::SGPR_512RegClassID; 2385 } 2386 } else if (Is == IS_AGPR) { 2387 switch (RegWidth) { 2388 default: return -1; 2389 case 32: 2390 return AMDGPU::AGPR_32RegClassID; 2391 case 64: 2392 return AMDGPU::AReg_64RegClassID; 2393 case 96: 2394 return AMDGPU::AReg_96RegClassID; 2395 case 128: 2396 return AMDGPU::AReg_128RegClassID; 2397 case 160: 2398 return AMDGPU::AReg_160RegClassID; 2399 case 192: 2400 return AMDGPU::AReg_192RegClassID; 2401 case 224: 2402 return AMDGPU::AReg_224RegClassID; 2403 case 256: 2404 return AMDGPU::AReg_256RegClassID; 2405 case 512: 2406 return AMDGPU::AReg_512RegClassID; 2407 case 1024: 2408 return AMDGPU::AReg_1024RegClassID; 2409 } 2410 } 2411 return -1; 2412 } 2413 2414 static unsigned getSpecialRegForName(StringRef RegName) { 2415 return StringSwitch<unsigned>(RegName) 2416 .Case("exec", AMDGPU::EXEC) 2417 .Case("vcc", AMDGPU::VCC) 2418 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2419 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2420 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2421 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2422 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2423 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2424 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2425 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2426 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2427 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2428 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2429 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2430 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2431 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2432 .Case("m0", AMDGPU::M0) 2433 .Case("vccz", AMDGPU::SRC_VCCZ) 2434 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2435 .Case("execz", AMDGPU::SRC_EXECZ) 2436 .Case("src_execz", AMDGPU::SRC_EXECZ) 2437 .Case("scc", AMDGPU::SRC_SCC) 2438 .Case("src_scc", AMDGPU::SRC_SCC) 2439 .Case("tba", AMDGPU::TBA) 2440 .Case("tma", AMDGPU::TMA) 2441 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2442 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2443 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2444 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2445 .Case("vcc_lo", AMDGPU::VCC_LO) 2446 .Case("vcc_hi", AMDGPU::VCC_HI) 2447 .Case("exec_lo", AMDGPU::EXEC_LO) 2448 .Case("exec_hi", AMDGPU::EXEC_HI) 2449 .Case("tma_lo", AMDGPU::TMA_LO) 2450 .Case("tma_hi", AMDGPU::TMA_HI) 2451 .Case("tba_lo", AMDGPU::TBA_LO) 2452 .Case("tba_hi", AMDGPU::TBA_HI) 2453 .Case("pc", AMDGPU::PC_REG) 2454 .Case("null", AMDGPU::SGPR_NULL) 2455 .Default(AMDGPU::NoRegister); 2456 } 2457 2458 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2459 SMLoc &EndLoc, bool RestoreOnFailure) { 2460 auto R = parseRegister(); 2461 if (!R) return true; 2462 assert(R->isReg()); 2463 RegNo = R->getReg(); 2464 StartLoc = R->getStartLoc(); 2465 EndLoc = R->getEndLoc(); 2466 return false; 2467 } 2468 2469 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2470 SMLoc &EndLoc) { 2471 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2472 } 2473 2474 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2475 SMLoc &StartLoc, 2476 SMLoc &EndLoc) { 2477 bool Result = 2478 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2479 bool PendingErrors = getParser().hasPendingError(); 2480 getParser().clearPendingErrors(); 2481 if (PendingErrors) 2482 return MatchOperand_ParseFail; 2483 if (Result) 2484 return MatchOperand_NoMatch; 2485 return MatchOperand_Success; 2486 } 2487 2488 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2489 RegisterKind RegKind, unsigned Reg1, 2490 SMLoc Loc) { 2491 switch (RegKind) { 2492 case IS_SPECIAL: 2493 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2494 Reg = AMDGPU::EXEC; 2495 RegWidth = 64; 2496 return true; 2497 } 2498 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2499 Reg = AMDGPU::FLAT_SCR; 2500 RegWidth = 64; 2501 return true; 2502 } 2503 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2504 Reg = AMDGPU::XNACK_MASK; 2505 RegWidth = 64; 2506 return true; 2507 } 2508 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2509 Reg = AMDGPU::VCC; 2510 RegWidth = 64; 2511 return true; 2512 } 2513 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2514 Reg = AMDGPU::TBA; 2515 RegWidth = 64; 2516 return true; 2517 } 2518 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2519 Reg = AMDGPU::TMA; 2520 RegWidth = 64; 2521 return true; 2522 } 2523 Error(Loc, "register does not fit in the list"); 2524 return false; 2525 case IS_VGPR: 2526 case IS_SGPR: 2527 case IS_AGPR: 2528 case IS_TTMP: 2529 if (Reg1 != Reg + RegWidth / 32) { 2530 Error(Loc, "registers in a list must have consecutive indices"); 2531 return false; 2532 } 2533 RegWidth += 32; 2534 return true; 2535 default: 2536 llvm_unreachable("unexpected register kind"); 2537 } 2538 } 2539 2540 struct RegInfo { 2541 StringLiteral Name; 2542 RegisterKind Kind; 2543 }; 2544 2545 static constexpr RegInfo RegularRegisters[] = { 2546 {{"v"}, IS_VGPR}, 2547 {{"s"}, IS_SGPR}, 2548 {{"ttmp"}, IS_TTMP}, 2549 {{"acc"}, IS_AGPR}, 2550 {{"a"}, IS_AGPR}, 2551 }; 2552 2553 static bool isRegularReg(RegisterKind Kind) { 2554 return Kind == IS_VGPR || 2555 Kind == IS_SGPR || 2556 Kind == IS_TTMP || 2557 Kind == IS_AGPR; 2558 } 2559 2560 static const RegInfo* getRegularRegInfo(StringRef Str) { 2561 for (const RegInfo &Reg : RegularRegisters) 2562 if (Str.startswith(Reg.Name)) 2563 return &Reg; 2564 return nullptr; 2565 } 2566 2567 static bool getRegNum(StringRef Str, unsigned& Num) { 2568 return !Str.getAsInteger(10, Num); 2569 } 2570 2571 bool 2572 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2573 const AsmToken &NextToken) const { 2574 2575 // A list of consecutive registers: [s0,s1,s2,s3] 2576 if (Token.is(AsmToken::LBrac)) 2577 return true; 2578 2579 if (!Token.is(AsmToken::Identifier)) 2580 return false; 2581 2582 // A single register like s0 or a range of registers like s[0:1] 2583 2584 StringRef Str = Token.getString(); 2585 const RegInfo *Reg = getRegularRegInfo(Str); 2586 if (Reg) { 2587 StringRef RegName = Reg->Name; 2588 StringRef RegSuffix = Str.substr(RegName.size()); 2589 if (!RegSuffix.empty()) { 2590 unsigned Num; 2591 // A single register with an index: rXX 2592 if (getRegNum(RegSuffix, Num)) 2593 return true; 2594 } else { 2595 // A range of registers: r[XX:YY]. 2596 if (NextToken.is(AsmToken::LBrac)) 2597 return true; 2598 } 2599 } 2600 2601 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2602 } 2603 2604 bool 2605 AMDGPUAsmParser::isRegister() 2606 { 2607 return isRegister(getToken(), peekToken()); 2608 } 2609 2610 unsigned 2611 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2612 unsigned RegNum, 2613 unsigned RegWidth, 2614 SMLoc Loc) { 2615 2616 assert(isRegularReg(RegKind)); 2617 2618 unsigned AlignSize = 1; 2619 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2620 // SGPR and TTMP registers must be aligned. 2621 // Max required alignment is 4 dwords. 2622 AlignSize = std::min(RegWidth / 32, 4u); 2623 } 2624 2625 if (RegNum % AlignSize != 0) { 2626 Error(Loc, "invalid register alignment"); 2627 return AMDGPU::NoRegister; 2628 } 2629 2630 unsigned RegIdx = RegNum / AlignSize; 2631 int RCID = getRegClass(RegKind, RegWidth); 2632 if (RCID == -1) { 2633 Error(Loc, "invalid or unsupported register size"); 2634 return AMDGPU::NoRegister; 2635 } 2636 2637 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2638 const MCRegisterClass RC = TRI->getRegClass(RCID); 2639 if (RegIdx >= RC.getNumRegs()) { 2640 Error(Loc, "register index is out of range"); 2641 return AMDGPU::NoRegister; 2642 } 2643 2644 return RC.getRegister(RegIdx); 2645 } 2646 2647 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2648 int64_t RegLo, RegHi; 2649 if (!skipToken(AsmToken::LBrac, "missing register index")) 2650 return false; 2651 2652 SMLoc FirstIdxLoc = getLoc(); 2653 SMLoc SecondIdxLoc; 2654 2655 if (!parseExpr(RegLo)) 2656 return false; 2657 2658 if (trySkipToken(AsmToken::Colon)) { 2659 SecondIdxLoc = getLoc(); 2660 if (!parseExpr(RegHi)) 2661 return false; 2662 } else { 2663 RegHi = RegLo; 2664 } 2665 2666 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2667 return false; 2668 2669 if (!isUInt<32>(RegLo)) { 2670 Error(FirstIdxLoc, "invalid register index"); 2671 return false; 2672 } 2673 2674 if (!isUInt<32>(RegHi)) { 2675 Error(SecondIdxLoc, "invalid register index"); 2676 return false; 2677 } 2678 2679 if (RegLo > RegHi) { 2680 Error(FirstIdxLoc, "first register index should not exceed second index"); 2681 return false; 2682 } 2683 2684 Num = static_cast<unsigned>(RegLo); 2685 RegWidth = 32 * ((RegHi - RegLo) + 1); 2686 return true; 2687 } 2688 2689 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2690 unsigned &RegNum, unsigned &RegWidth, 2691 SmallVectorImpl<AsmToken> &Tokens) { 2692 assert(isToken(AsmToken::Identifier)); 2693 unsigned Reg = getSpecialRegForName(getTokenStr()); 2694 if (Reg) { 2695 RegNum = 0; 2696 RegWidth = 32; 2697 RegKind = IS_SPECIAL; 2698 Tokens.push_back(getToken()); 2699 lex(); // skip register name 2700 } 2701 return Reg; 2702 } 2703 2704 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2705 unsigned &RegNum, unsigned &RegWidth, 2706 SmallVectorImpl<AsmToken> &Tokens) { 2707 assert(isToken(AsmToken::Identifier)); 2708 StringRef RegName = getTokenStr(); 2709 auto Loc = getLoc(); 2710 2711 const RegInfo *RI = getRegularRegInfo(RegName); 2712 if (!RI) { 2713 Error(Loc, "invalid register name"); 2714 return AMDGPU::NoRegister; 2715 } 2716 2717 Tokens.push_back(getToken()); 2718 lex(); // skip register name 2719 2720 RegKind = RI->Kind; 2721 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2722 if (!RegSuffix.empty()) { 2723 // Single 32-bit register: vXX. 2724 if (!getRegNum(RegSuffix, RegNum)) { 2725 Error(Loc, "invalid register index"); 2726 return AMDGPU::NoRegister; 2727 } 2728 RegWidth = 32; 2729 } else { 2730 // Range of registers: v[XX:YY]. ":YY" is optional. 2731 if (!ParseRegRange(RegNum, RegWidth)) 2732 return AMDGPU::NoRegister; 2733 } 2734 2735 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2736 } 2737 2738 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2739 unsigned &RegWidth, 2740 SmallVectorImpl<AsmToken> &Tokens) { 2741 unsigned Reg = AMDGPU::NoRegister; 2742 auto ListLoc = getLoc(); 2743 2744 if (!skipToken(AsmToken::LBrac, 2745 "expected a register or a list of registers")) { 2746 return AMDGPU::NoRegister; 2747 } 2748 2749 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2750 2751 auto Loc = getLoc(); 2752 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2753 return AMDGPU::NoRegister; 2754 if (RegWidth != 32) { 2755 Error(Loc, "expected a single 32-bit register"); 2756 return AMDGPU::NoRegister; 2757 } 2758 2759 for (; trySkipToken(AsmToken::Comma); ) { 2760 RegisterKind NextRegKind; 2761 unsigned NextReg, NextRegNum, NextRegWidth; 2762 Loc = getLoc(); 2763 2764 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2765 NextRegNum, NextRegWidth, 2766 Tokens)) { 2767 return AMDGPU::NoRegister; 2768 } 2769 if (NextRegWidth != 32) { 2770 Error(Loc, "expected a single 32-bit register"); 2771 return AMDGPU::NoRegister; 2772 } 2773 if (NextRegKind != RegKind) { 2774 Error(Loc, "registers in a list must be of the same kind"); 2775 return AMDGPU::NoRegister; 2776 } 2777 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2778 return AMDGPU::NoRegister; 2779 } 2780 2781 if (!skipToken(AsmToken::RBrac, 2782 "expected a comma or a closing square bracket")) { 2783 return AMDGPU::NoRegister; 2784 } 2785 2786 if (isRegularReg(RegKind)) 2787 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2788 2789 return Reg; 2790 } 2791 2792 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2793 unsigned &RegNum, unsigned &RegWidth, 2794 SmallVectorImpl<AsmToken> &Tokens) { 2795 auto Loc = getLoc(); 2796 Reg = AMDGPU::NoRegister; 2797 2798 if (isToken(AsmToken::Identifier)) { 2799 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2800 if (Reg == AMDGPU::NoRegister) 2801 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2802 } else { 2803 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2804 } 2805 2806 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2807 if (Reg == AMDGPU::NoRegister) { 2808 assert(Parser.hasPendingError()); 2809 return false; 2810 } 2811 2812 if (!subtargetHasRegister(*TRI, Reg)) { 2813 if (Reg == AMDGPU::SGPR_NULL) { 2814 Error(Loc, "'null' operand is not supported on this GPU"); 2815 } else { 2816 Error(Loc, "register not available on this GPU"); 2817 } 2818 return false; 2819 } 2820 2821 return true; 2822 } 2823 2824 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2825 unsigned &RegNum, unsigned &RegWidth, 2826 bool RestoreOnFailure /*=false*/) { 2827 Reg = AMDGPU::NoRegister; 2828 2829 SmallVector<AsmToken, 1> Tokens; 2830 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2831 if (RestoreOnFailure) { 2832 while (!Tokens.empty()) { 2833 getLexer().UnLex(Tokens.pop_back_val()); 2834 } 2835 } 2836 return true; 2837 } 2838 return false; 2839 } 2840 2841 Optional<StringRef> 2842 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2843 switch (RegKind) { 2844 case IS_VGPR: 2845 return StringRef(".amdgcn.next_free_vgpr"); 2846 case IS_SGPR: 2847 return StringRef(".amdgcn.next_free_sgpr"); 2848 default: 2849 return None; 2850 } 2851 } 2852 2853 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2854 auto SymbolName = getGprCountSymbolName(RegKind); 2855 assert(SymbolName && "initializing invalid register kind"); 2856 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2857 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2858 } 2859 2860 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2861 unsigned DwordRegIndex, 2862 unsigned RegWidth) { 2863 // Symbols are only defined for GCN targets 2864 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2865 return true; 2866 2867 auto SymbolName = getGprCountSymbolName(RegKind); 2868 if (!SymbolName) 2869 return true; 2870 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2871 2872 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2873 int64_t OldCount; 2874 2875 if (!Sym->isVariable()) 2876 return !Error(getLoc(), 2877 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2878 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2879 return !Error( 2880 getLoc(), 2881 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2882 2883 if (OldCount <= NewMax) 2884 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2885 2886 return true; 2887 } 2888 2889 std::unique_ptr<AMDGPUOperand> 2890 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2891 const auto &Tok = getToken(); 2892 SMLoc StartLoc = Tok.getLoc(); 2893 SMLoc EndLoc = Tok.getEndLoc(); 2894 RegisterKind RegKind; 2895 unsigned Reg, RegNum, RegWidth; 2896 2897 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2898 return nullptr; 2899 } 2900 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2901 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2902 return nullptr; 2903 } else 2904 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2905 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2906 } 2907 2908 OperandMatchResultTy 2909 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2910 // TODO: add syntactic sugar for 1/(2*PI) 2911 2912 assert(!isRegister()); 2913 assert(!isModifier()); 2914 2915 const auto& Tok = getToken(); 2916 const auto& NextTok = peekToken(); 2917 bool IsReal = Tok.is(AsmToken::Real); 2918 SMLoc S = getLoc(); 2919 bool Negate = false; 2920 2921 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2922 lex(); 2923 IsReal = true; 2924 Negate = true; 2925 } 2926 2927 if (IsReal) { 2928 // Floating-point expressions are not supported. 2929 // Can only allow floating-point literals with an 2930 // optional sign. 2931 2932 StringRef Num = getTokenStr(); 2933 lex(); 2934 2935 APFloat RealVal(APFloat::IEEEdouble()); 2936 auto roundMode = APFloat::rmNearestTiesToEven; 2937 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2938 return MatchOperand_ParseFail; 2939 } 2940 if (Negate) 2941 RealVal.changeSign(); 2942 2943 Operands.push_back( 2944 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2945 AMDGPUOperand::ImmTyNone, true)); 2946 2947 return MatchOperand_Success; 2948 2949 } else { 2950 int64_t IntVal; 2951 const MCExpr *Expr; 2952 SMLoc S = getLoc(); 2953 2954 if (HasSP3AbsModifier) { 2955 // This is a workaround for handling expressions 2956 // as arguments of SP3 'abs' modifier, for example: 2957 // |1.0| 2958 // |-1| 2959 // |1+x| 2960 // This syntax is not compatible with syntax of standard 2961 // MC expressions (due to the trailing '|'). 2962 SMLoc EndLoc; 2963 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2964 return MatchOperand_ParseFail; 2965 } else { 2966 if (Parser.parseExpression(Expr)) 2967 return MatchOperand_ParseFail; 2968 } 2969 2970 if (Expr->evaluateAsAbsolute(IntVal)) { 2971 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2972 } else { 2973 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2974 } 2975 2976 return MatchOperand_Success; 2977 } 2978 2979 return MatchOperand_NoMatch; 2980 } 2981 2982 OperandMatchResultTy 2983 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2984 if (!isRegister()) 2985 return MatchOperand_NoMatch; 2986 2987 if (auto R = parseRegister()) { 2988 assert(R->isReg()); 2989 Operands.push_back(std::move(R)); 2990 return MatchOperand_Success; 2991 } 2992 return MatchOperand_ParseFail; 2993 } 2994 2995 OperandMatchResultTy 2996 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2997 auto res = parseReg(Operands); 2998 if (res != MatchOperand_NoMatch) { 2999 return res; 3000 } else if (isModifier()) { 3001 return MatchOperand_NoMatch; 3002 } else { 3003 return parseImm(Operands, HasSP3AbsMod); 3004 } 3005 } 3006 3007 bool 3008 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3009 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 3010 const auto &str = Token.getString(); 3011 return str == "abs" || str == "neg" || str == "sext"; 3012 } 3013 return false; 3014 } 3015 3016 bool 3017 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 3018 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 3019 } 3020 3021 bool 3022 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3023 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 3024 } 3025 3026 bool 3027 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3028 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 3029 } 3030 3031 // Check if this is an operand modifier or an opcode modifier 3032 // which may look like an expression but it is not. We should 3033 // avoid parsing these modifiers as expressions. Currently 3034 // recognized sequences are: 3035 // |...| 3036 // abs(...) 3037 // neg(...) 3038 // sext(...) 3039 // -reg 3040 // -|...| 3041 // -abs(...) 3042 // name:... 3043 // Note that simple opcode modifiers like 'gds' may be parsed as 3044 // expressions; this is a special case. See getExpressionAsToken. 3045 // 3046 bool 3047 AMDGPUAsmParser::isModifier() { 3048 3049 AsmToken Tok = getToken(); 3050 AsmToken NextToken[2]; 3051 peekTokens(NextToken); 3052 3053 return isOperandModifier(Tok, NextToken[0]) || 3054 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3055 isOpcodeModifierWithVal(Tok, NextToken[0]); 3056 } 3057 3058 // Check if the current token is an SP3 'neg' modifier. 3059 // Currently this modifier is allowed in the following context: 3060 // 3061 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3062 // 2. Before an 'abs' modifier: -abs(...) 3063 // 3. Before an SP3 'abs' modifier: -|...| 3064 // 3065 // In all other cases "-" is handled as a part 3066 // of an expression that follows the sign. 3067 // 3068 // Note: When "-" is followed by an integer literal, 3069 // this is interpreted as integer negation rather 3070 // than a floating-point NEG modifier applied to N. 3071 // Beside being contr-intuitive, such use of floating-point 3072 // NEG modifier would have resulted in different meaning 3073 // of integer literals used with VOP1/2/C and VOP3, 3074 // for example: 3075 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3076 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3077 // Negative fp literals with preceding "-" are 3078 // handled likewise for uniformity 3079 // 3080 bool 3081 AMDGPUAsmParser::parseSP3NegModifier() { 3082 3083 AsmToken NextToken[2]; 3084 peekTokens(NextToken); 3085 3086 if (isToken(AsmToken::Minus) && 3087 (isRegister(NextToken[0], NextToken[1]) || 3088 NextToken[0].is(AsmToken::Pipe) || 3089 isId(NextToken[0], "abs"))) { 3090 lex(); 3091 return true; 3092 } 3093 3094 return false; 3095 } 3096 3097 OperandMatchResultTy 3098 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3099 bool AllowImm) { 3100 bool Neg, SP3Neg; 3101 bool Abs, SP3Abs; 3102 SMLoc Loc; 3103 3104 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3105 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3106 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3107 return MatchOperand_ParseFail; 3108 } 3109 3110 SP3Neg = parseSP3NegModifier(); 3111 3112 Loc = getLoc(); 3113 Neg = trySkipId("neg"); 3114 if (Neg && SP3Neg) { 3115 Error(Loc, "expected register or immediate"); 3116 return MatchOperand_ParseFail; 3117 } 3118 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3119 return MatchOperand_ParseFail; 3120 3121 Abs = trySkipId("abs"); 3122 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3123 return MatchOperand_ParseFail; 3124 3125 Loc = getLoc(); 3126 SP3Abs = trySkipToken(AsmToken::Pipe); 3127 if (Abs && SP3Abs) { 3128 Error(Loc, "expected register or immediate"); 3129 return MatchOperand_ParseFail; 3130 } 3131 3132 OperandMatchResultTy Res; 3133 if (AllowImm) { 3134 Res = parseRegOrImm(Operands, SP3Abs); 3135 } else { 3136 Res = parseReg(Operands); 3137 } 3138 if (Res != MatchOperand_Success) { 3139 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3140 } 3141 3142 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3143 return MatchOperand_ParseFail; 3144 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3145 return MatchOperand_ParseFail; 3146 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3147 return MatchOperand_ParseFail; 3148 3149 AMDGPUOperand::Modifiers Mods; 3150 Mods.Abs = Abs || SP3Abs; 3151 Mods.Neg = Neg || SP3Neg; 3152 3153 if (Mods.hasFPModifiers()) { 3154 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3155 if (Op.isExpr()) { 3156 Error(Op.getStartLoc(), "expected an absolute expression"); 3157 return MatchOperand_ParseFail; 3158 } 3159 Op.setModifiers(Mods); 3160 } 3161 return MatchOperand_Success; 3162 } 3163 3164 OperandMatchResultTy 3165 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3166 bool AllowImm) { 3167 bool Sext = trySkipId("sext"); 3168 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3169 return MatchOperand_ParseFail; 3170 3171 OperandMatchResultTy Res; 3172 if (AllowImm) { 3173 Res = parseRegOrImm(Operands); 3174 } else { 3175 Res = parseReg(Operands); 3176 } 3177 if (Res != MatchOperand_Success) { 3178 return Sext? MatchOperand_ParseFail : Res; 3179 } 3180 3181 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3182 return MatchOperand_ParseFail; 3183 3184 AMDGPUOperand::Modifiers Mods; 3185 Mods.Sext = Sext; 3186 3187 if (Mods.hasIntModifiers()) { 3188 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3189 if (Op.isExpr()) { 3190 Error(Op.getStartLoc(), "expected an absolute expression"); 3191 return MatchOperand_ParseFail; 3192 } 3193 Op.setModifiers(Mods); 3194 } 3195 3196 return MatchOperand_Success; 3197 } 3198 3199 OperandMatchResultTy 3200 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3201 return parseRegOrImmWithFPInputMods(Operands, false); 3202 } 3203 3204 OperandMatchResultTy 3205 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3206 return parseRegOrImmWithIntInputMods(Operands, false); 3207 } 3208 3209 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3210 auto Loc = getLoc(); 3211 if (trySkipId("off")) { 3212 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3213 AMDGPUOperand::ImmTyOff, false)); 3214 return MatchOperand_Success; 3215 } 3216 3217 if (!isRegister()) 3218 return MatchOperand_NoMatch; 3219 3220 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3221 if (Reg) { 3222 Operands.push_back(std::move(Reg)); 3223 return MatchOperand_Success; 3224 } 3225 3226 return MatchOperand_ParseFail; 3227 3228 } 3229 3230 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3231 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3232 3233 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3234 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3235 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3236 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3237 return Match_InvalidOperand; 3238 3239 if ((TSFlags & SIInstrFlags::VOP3) && 3240 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3241 getForcedEncodingSize() != 64) 3242 return Match_PreferE32; 3243 3244 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3245 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3246 // v_mac_f32/16 allow only dst_sel == DWORD; 3247 auto OpNum = 3248 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3249 const auto &Op = Inst.getOperand(OpNum); 3250 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3251 return Match_InvalidOperand; 3252 } 3253 } 3254 3255 return Match_Success; 3256 } 3257 3258 static ArrayRef<unsigned> getAllVariants() { 3259 static const unsigned Variants[] = { 3260 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3261 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, 3262 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP 3263 }; 3264 3265 return makeArrayRef(Variants); 3266 } 3267 3268 // What asm variants we should check 3269 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3270 if (isForcedDPP() && isForcedVOP3()) { 3271 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; 3272 return makeArrayRef(Variants); 3273 } 3274 if (getForcedEncodingSize() == 32) { 3275 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3276 return makeArrayRef(Variants); 3277 } 3278 3279 if (isForcedVOP3()) { 3280 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3281 return makeArrayRef(Variants); 3282 } 3283 3284 if (isForcedSDWA()) { 3285 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3286 AMDGPUAsmVariants::SDWA9}; 3287 return makeArrayRef(Variants); 3288 } 3289 3290 if (isForcedDPP()) { 3291 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3292 return makeArrayRef(Variants); 3293 } 3294 3295 return getAllVariants(); 3296 } 3297 3298 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3299 if (isForcedDPP() && isForcedVOP3()) 3300 return "e64_dpp"; 3301 3302 if (getForcedEncodingSize() == 32) 3303 return "e32"; 3304 3305 if (isForcedVOP3()) 3306 return "e64"; 3307 3308 if (isForcedSDWA()) 3309 return "sdwa"; 3310 3311 if (isForcedDPP()) 3312 return "dpp"; 3313 3314 return ""; 3315 } 3316 3317 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3318 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3319 const unsigned Num = Desc.getNumImplicitUses(); 3320 for (unsigned i = 0; i < Num; ++i) { 3321 unsigned Reg = Desc.ImplicitUses[i]; 3322 switch (Reg) { 3323 case AMDGPU::FLAT_SCR: 3324 case AMDGPU::VCC: 3325 case AMDGPU::VCC_LO: 3326 case AMDGPU::VCC_HI: 3327 case AMDGPU::M0: 3328 return Reg; 3329 default: 3330 break; 3331 } 3332 } 3333 return AMDGPU::NoRegister; 3334 } 3335 3336 // NB: This code is correct only when used to check constant 3337 // bus limitations because GFX7 support no f16 inline constants. 3338 // Note that there are no cases when a GFX7 opcode violates 3339 // constant bus limitations due to the use of an f16 constant. 3340 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3341 unsigned OpIdx) const { 3342 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3343 3344 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3345 return false; 3346 } 3347 3348 const MCOperand &MO = Inst.getOperand(OpIdx); 3349 3350 int64_t Val = MO.getImm(); 3351 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3352 3353 switch (OpSize) { // expected operand size 3354 case 8: 3355 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3356 case 4: 3357 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3358 case 2: { 3359 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3360 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3361 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3362 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3363 return AMDGPU::isInlinableIntLiteral(Val); 3364 3365 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3366 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3367 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3368 return AMDGPU::isInlinableIntLiteralV216(Val); 3369 3370 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3371 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3372 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3373 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3374 3375 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3376 } 3377 default: 3378 llvm_unreachable("invalid operand size"); 3379 } 3380 } 3381 3382 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3383 if (!isGFX10Plus()) 3384 return 1; 3385 3386 switch (Opcode) { 3387 // 64-bit shift instructions can use only one scalar value input 3388 case AMDGPU::V_LSHLREV_B64_e64: 3389 case AMDGPU::V_LSHLREV_B64_gfx10: 3390 case AMDGPU::V_LSHLREV_B64_e64_gfx11: 3391 case AMDGPU::V_LSHRREV_B64_e64: 3392 case AMDGPU::V_LSHRREV_B64_gfx10: 3393 case AMDGPU::V_LSHRREV_B64_e64_gfx11: 3394 case AMDGPU::V_ASHRREV_I64_e64: 3395 case AMDGPU::V_ASHRREV_I64_gfx10: 3396 case AMDGPU::V_ASHRREV_I64_e64_gfx11: 3397 case AMDGPU::V_LSHL_B64_e64: 3398 case AMDGPU::V_LSHR_B64_e64: 3399 case AMDGPU::V_ASHR_I64_e64: 3400 return 1; 3401 default: 3402 return 2; 3403 } 3404 } 3405 3406 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3407 const MCOperand &MO = Inst.getOperand(OpIdx); 3408 if (MO.isImm()) { 3409 return !isInlineConstant(Inst, OpIdx); 3410 } else if (MO.isReg()) { 3411 auto Reg = MO.getReg(); 3412 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3413 auto PReg = mc2PseudoReg(Reg); 3414 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3415 } else { 3416 return true; 3417 } 3418 } 3419 3420 bool 3421 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3422 const OperandVector &Operands) { 3423 const unsigned Opcode = Inst.getOpcode(); 3424 const MCInstrDesc &Desc = MII.get(Opcode); 3425 unsigned LastSGPR = AMDGPU::NoRegister; 3426 unsigned ConstantBusUseCount = 0; 3427 unsigned NumLiterals = 0; 3428 unsigned LiteralSize; 3429 3430 if (Desc.TSFlags & 3431 (SIInstrFlags::VOPC | 3432 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3433 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3434 SIInstrFlags::SDWA)) { 3435 // Check special imm operands (used by madmk, etc) 3436 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3437 ++NumLiterals; 3438 LiteralSize = 4; 3439 } 3440 3441 SmallDenseSet<unsigned> SGPRsUsed; 3442 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3443 if (SGPRUsed != AMDGPU::NoRegister) { 3444 SGPRsUsed.insert(SGPRUsed); 3445 ++ConstantBusUseCount; 3446 } 3447 3448 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3449 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3450 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3451 3452 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3453 3454 for (int OpIdx : OpIndices) { 3455 if (OpIdx == -1) break; 3456 3457 const MCOperand &MO = Inst.getOperand(OpIdx); 3458 if (usesConstantBus(Inst, OpIdx)) { 3459 if (MO.isReg()) { 3460 LastSGPR = mc2PseudoReg(MO.getReg()); 3461 // Pairs of registers with a partial intersections like these 3462 // s0, s[0:1] 3463 // flat_scratch_lo, flat_scratch 3464 // flat_scratch_lo, flat_scratch_hi 3465 // are theoretically valid but they are disabled anyway. 3466 // Note that this code mimics SIInstrInfo::verifyInstruction 3467 if (!SGPRsUsed.count(LastSGPR)) { 3468 SGPRsUsed.insert(LastSGPR); 3469 ++ConstantBusUseCount; 3470 } 3471 } else { // Expression or a literal 3472 3473 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3474 continue; // special operand like VINTERP attr_chan 3475 3476 // An instruction may use only one literal. 3477 // This has been validated on the previous step. 3478 // See validateVOPLiteral. 3479 // This literal may be used as more than one operand. 3480 // If all these operands are of the same size, 3481 // this literal counts as one scalar value. 3482 // Otherwise it counts as 2 scalar values. 3483 // See "GFX10 Shader Programming", section 3.6.2.3. 3484 3485 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3486 if (Size < 4) Size = 4; 3487 3488 if (NumLiterals == 0) { 3489 NumLiterals = 1; 3490 LiteralSize = Size; 3491 } else if (LiteralSize != Size) { 3492 NumLiterals = 2; 3493 } 3494 } 3495 } 3496 } 3497 } 3498 ConstantBusUseCount += NumLiterals; 3499 3500 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3501 return true; 3502 3503 SMLoc LitLoc = getLitLoc(Operands); 3504 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3505 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3506 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3507 return false; 3508 } 3509 3510 bool 3511 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3512 const OperandVector &Operands) { 3513 const unsigned Opcode = Inst.getOpcode(); 3514 const MCInstrDesc &Desc = MII.get(Opcode); 3515 3516 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3517 if (DstIdx == -1 || 3518 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3519 return true; 3520 } 3521 3522 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3523 3524 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3525 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3526 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3527 3528 assert(DstIdx != -1); 3529 const MCOperand &Dst = Inst.getOperand(DstIdx); 3530 assert(Dst.isReg()); 3531 3532 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3533 3534 for (int SrcIdx : SrcIndices) { 3535 if (SrcIdx == -1) break; 3536 const MCOperand &Src = Inst.getOperand(SrcIdx); 3537 if (Src.isReg()) { 3538 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3539 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3540 Error(getRegLoc(SrcReg, Operands), 3541 "destination must be different than all sources"); 3542 return false; 3543 } 3544 } 3545 } 3546 3547 return true; 3548 } 3549 3550 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3551 3552 const unsigned Opc = Inst.getOpcode(); 3553 const MCInstrDesc &Desc = MII.get(Opc); 3554 3555 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3556 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3557 assert(ClampIdx != -1); 3558 return Inst.getOperand(ClampIdx).getImm() == 0; 3559 } 3560 3561 return true; 3562 } 3563 3564 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3565 3566 const unsigned Opc = Inst.getOpcode(); 3567 const MCInstrDesc &Desc = MII.get(Opc); 3568 3569 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3570 return None; 3571 3572 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3573 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3574 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3575 3576 assert(VDataIdx != -1); 3577 3578 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3579 return None; 3580 3581 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3582 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3583 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3584 if (DMask == 0) 3585 DMask = 1; 3586 3587 bool isPackedD16 = false; 3588 unsigned DataSize = 3589 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3590 if (hasPackedD16()) { 3591 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3592 isPackedD16 = D16Idx >= 0; 3593 if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) 3594 DataSize = (DataSize + 1) / 2; 3595 } 3596 3597 if ((VDataSize / 4) == DataSize + TFESize) 3598 return None; 3599 3600 return StringRef(isPackedD16 3601 ? "image data size does not match dmask, d16 and tfe" 3602 : "image data size does not match dmask and tfe"); 3603 } 3604 3605 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3606 const unsigned Opc = Inst.getOpcode(); 3607 const MCInstrDesc &Desc = MII.get(Opc); 3608 3609 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3610 return true; 3611 3612 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3613 3614 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3615 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3616 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3617 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3618 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3619 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3620 3621 assert(VAddr0Idx != -1); 3622 assert(SrsrcIdx != -1); 3623 assert(SrsrcIdx > VAddr0Idx); 3624 3625 if (DimIdx == -1) 3626 return true; // intersect_ray 3627 3628 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3629 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3630 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3631 unsigned ActualAddrSize = 3632 IsNSA ? SrsrcIdx - VAddr0Idx 3633 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3634 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3635 3636 unsigned ExpectedAddrSize = 3637 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3638 3639 if (!IsNSA) { 3640 if (ExpectedAddrSize > 8) 3641 ExpectedAddrSize = 16; 3642 3643 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3644 // This provides backward compatibility for assembly created 3645 // before 160b/192b/224b types were directly supported. 3646 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3647 return true; 3648 } 3649 3650 return ActualAddrSize == ExpectedAddrSize; 3651 } 3652 3653 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3654 3655 const unsigned Opc = Inst.getOpcode(); 3656 const MCInstrDesc &Desc = MII.get(Opc); 3657 3658 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3659 return true; 3660 if (!Desc.mayLoad() || !Desc.mayStore()) 3661 return true; // Not atomic 3662 3663 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3664 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3665 3666 // This is an incomplete check because image_atomic_cmpswap 3667 // may only use 0x3 and 0xf while other atomic operations 3668 // may use 0x1 and 0x3. However these limitations are 3669 // verified when we check that dmask matches dst size. 3670 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3671 } 3672 3673 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3674 3675 const unsigned Opc = Inst.getOpcode(); 3676 const MCInstrDesc &Desc = MII.get(Opc); 3677 3678 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3679 return true; 3680 3681 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3682 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3683 3684 // GATHER4 instructions use dmask in a different fashion compared to 3685 // other MIMG instructions. The only useful DMASK values are 3686 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3687 // (red,red,red,red) etc.) The ISA document doesn't mention 3688 // this. 3689 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3690 } 3691 3692 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3693 const unsigned Opc = Inst.getOpcode(); 3694 const MCInstrDesc &Desc = MII.get(Opc); 3695 3696 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3697 return true; 3698 3699 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3700 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3701 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3702 3703 if (!BaseOpcode->MSAA) 3704 return true; 3705 3706 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3707 assert(DimIdx != -1); 3708 3709 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3710 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3711 3712 return DimInfo->MSAA; 3713 } 3714 3715 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3716 { 3717 switch (Opcode) { 3718 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3719 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3720 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3721 return true; 3722 default: 3723 return false; 3724 } 3725 } 3726 3727 // movrels* opcodes should only allow VGPRS as src0. 3728 // This is specified in .td description for vop1/vop3, 3729 // but sdwa is handled differently. See isSDWAOperand. 3730 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3731 const OperandVector &Operands) { 3732 3733 const unsigned Opc = Inst.getOpcode(); 3734 const MCInstrDesc &Desc = MII.get(Opc); 3735 3736 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3737 return true; 3738 3739 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3740 assert(Src0Idx != -1); 3741 3742 SMLoc ErrLoc; 3743 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3744 if (Src0.isReg()) { 3745 auto Reg = mc2PseudoReg(Src0.getReg()); 3746 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3747 if (!isSGPR(Reg, TRI)) 3748 return true; 3749 ErrLoc = getRegLoc(Reg, Operands); 3750 } else { 3751 ErrLoc = getConstLoc(Operands); 3752 } 3753 3754 Error(ErrLoc, "source operand must be a VGPR"); 3755 return false; 3756 } 3757 3758 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3759 const OperandVector &Operands) { 3760 3761 const unsigned Opc = Inst.getOpcode(); 3762 3763 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3764 return true; 3765 3766 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3767 assert(Src0Idx != -1); 3768 3769 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3770 if (!Src0.isReg()) 3771 return true; 3772 3773 auto Reg = mc2PseudoReg(Src0.getReg()); 3774 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3775 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3776 Error(getRegLoc(Reg, Operands), 3777 "source operand must be either a VGPR or an inline constant"); 3778 return false; 3779 } 3780 3781 return true; 3782 } 3783 3784 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3785 const OperandVector &Operands) { 3786 const unsigned Opc = Inst.getOpcode(); 3787 const MCInstrDesc &Desc = MII.get(Opc); 3788 3789 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3790 return true; 3791 3792 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3793 if (Src2Idx == -1) 3794 return true; 3795 3796 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3797 if (!Src2.isReg()) 3798 return true; 3799 3800 MCRegister Src2Reg = Src2.getReg(); 3801 MCRegister DstReg = Inst.getOperand(0).getReg(); 3802 if (Src2Reg == DstReg) 3803 return true; 3804 3805 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3806 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3807 return true; 3808 3809 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3810 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3811 "source 2 operand must not partially overlap with dst"); 3812 return false; 3813 } 3814 3815 return true; 3816 } 3817 3818 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3819 switch (Inst.getOpcode()) { 3820 default: 3821 return true; 3822 case V_DIV_SCALE_F32_gfx6_gfx7: 3823 case V_DIV_SCALE_F32_vi: 3824 case V_DIV_SCALE_F32_gfx10: 3825 case V_DIV_SCALE_F64_gfx6_gfx7: 3826 case V_DIV_SCALE_F64_vi: 3827 case V_DIV_SCALE_F64_gfx10: 3828 break; 3829 } 3830 3831 // TODO: Check that src0 = src1 or src2. 3832 3833 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3834 AMDGPU::OpName::src2_modifiers, 3835 AMDGPU::OpName::src2_modifiers}) { 3836 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3837 .getImm() & 3838 SISrcMods::ABS) { 3839 return false; 3840 } 3841 } 3842 3843 return true; 3844 } 3845 3846 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3847 3848 const unsigned Opc = Inst.getOpcode(); 3849 const MCInstrDesc &Desc = MII.get(Opc); 3850 3851 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3852 return true; 3853 3854 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3855 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3856 if (isCI() || isSI()) 3857 return false; 3858 } 3859 3860 return true; 3861 } 3862 3863 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3864 const unsigned Opc = Inst.getOpcode(); 3865 const MCInstrDesc &Desc = MII.get(Opc); 3866 3867 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3868 return true; 3869 3870 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3871 if (DimIdx < 0) 3872 return true; 3873 3874 long Imm = Inst.getOperand(DimIdx).getImm(); 3875 if (Imm < 0 || Imm >= 8) 3876 return false; 3877 3878 return true; 3879 } 3880 3881 static bool IsRevOpcode(const unsigned Opcode) 3882 { 3883 switch (Opcode) { 3884 case AMDGPU::V_SUBREV_F32_e32: 3885 case AMDGPU::V_SUBREV_F32_e64: 3886 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3887 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3888 case AMDGPU::V_SUBREV_F32_e32_vi: 3889 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3890 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3891 case AMDGPU::V_SUBREV_F32_e64_vi: 3892 3893 case AMDGPU::V_SUBREV_CO_U32_e32: 3894 case AMDGPU::V_SUBREV_CO_U32_e64: 3895 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3896 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3897 3898 case AMDGPU::V_SUBBREV_U32_e32: 3899 case AMDGPU::V_SUBBREV_U32_e64: 3900 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3901 case AMDGPU::V_SUBBREV_U32_e32_vi: 3902 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3903 case AMDGPU::V_SUBBREV_U32_e64_vi: 3904 3905 case AMDGPU::V_SUBREV_U32_e32: 3906 case AMDGPU::V_SUBREV_U32_e64: 3907 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3908 case AMDGPU::V_SUBREV_U32_e32_vi: 3909 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3910 case AMDGPU::V_SUBREV_U32_e64_vi: 3911 3912 case AMDGPU::V_SUBREV_F16_e32: 3913 case AMDGPU::V_SUBREV_F16_e64: 3914 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3915 case AMDGPU::V_SUBREV_F16_e32_vi: 3916 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3917 case AMDGPU::V_SUBREV_F16_e64_vi: 3918 3919 case AMDGPU::V_SUBREV_U16_e32: 3920 case AMDGPU::V_SUBREV_U16_e64: 3921 case AMDGPU::V_SUBREV_U16_e32_vi: 3922 case AMDGPU::V_SUBREV_U16_e64_vi: 3923 3924 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3925 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3926 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3927 3928 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3929 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3930 3931 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3932 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3933 3934 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3935 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3936 3937 case AMDGPU::V_LSHRREV_B32_e32: 3938 case AMDGPU::V_LSHRREV_B32_e64: 3939 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3940 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3941 case AMDGPU::V_LSHRREV_B32_e32_vi: 3942 case AMDGPU::V_LSHRREV_B32_e64_vi: 3943 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3944 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3945 3946 case AMDGPU::V_ASHRREV_I32_e32: 3947 case AMDGPU::V_ASHRREV_I32_e64: 3948 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3949 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3950 case AMDGPU::V_ASHRREV_I32_e32_vi: 3951 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3952 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3953 case AMDGPU::V_ASHRREV_I32_e64_vi: 3954 3955 case AMDGPU::V_LSHLREV_B32_e32: 3956 case AMDGPU::V_LSHLREV_B32_e64: 3957 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3958 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3959 case AMDGPU::V_LSHLREV_B32_e32_vi: 3960 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3961 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3962 case AMDGPU::V_LSHLREV_B32_e64_vi: 3963 3964 case AMDGPU::V_LSHLREV_B16_e32: 3965 case AMDGPU::V_LSHLREV_B16_e64: 3966 case AMDGPU::V_LSHLREV_B16_e32_vi: 3967 case AMDGPU::V_LSHLREV_B16_e64_vi: 3968 case AMDGPU::V_LSHLREV_B16_gfx10: 3969 3970 case AMDGPU::V_LSHRREV_B16_e32: 3971 case AMDGPU::V_LSHRREV_B16_e64: 3972 case AMDGPU::V_LSHRREV_B16_e32_vi: 3973 case AMDGPU::V_LSHRREV_B16_e64_vi: 3974 case AMDGPU::V_LSHRREV_B16_gfx10: 3975 3976 case AMDGPU::V_ASHRREV_I16_e32: 3977 case AMDGPU::V_ASHRREV_I16_e64: 3978 case AMDGPU::V_ASHRREV_I16_e32_vi: 3979 case AMDGPU::V_ASHRREV_I16_e64_vi: 3980 case AMDGPU::V_ASHRREV_I16_gfx10: 3981 3982 case AMDGPU::V_LSHLREV_B64_e64: 3983 case AMDGPU::V_LSHLREV_B64_gfx10: 3984 case AMDGPU::V_LSHLREV_B64_vi: 3985 3986 case AMDGPU::V_LSHRREV_B64_e64: 3987 case AMDGPU::V_LSHRREV_B64_gfx10: 3988 case AMDGPU::V_LSHRREV_B64_vi: 3989 3990 case AMDGPU::V_ASHRREV_I64_e64: 3991 case AMDGPU::V_ASHRREV_I64_gfx10: 3992 case AMDGPU::V_ASHRREV_I64_vi: 3993 3994 case AMDGPU::V_PK_LSHLREV_B16: 3995 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3996 case AMDGPU::V_PK_LSHLREV_B16_vi: 3997 3998 case AMDGPU::V_PK_LSHRREV_B16: 3999 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 4000 case AMDGPU::V_PK_LSHRREV_B16_vi: 4001 case AMDGPU::V_PK_ASHRREV_I16: 4002 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 4003 case AMDGPU::V_PK_ASHRREV_I16_vi: 4004 return true; 4005 default: 4006 return false; 4007 } 4008 } 4009 4010 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 4011 4012 using namespace SIInstrFlags; 4013 const unsigned Opcode = Inst.getOpcode(); 4014 const MCInstrDesc &Desc = MII.get(Opcode); 4015 4016 // lds_direct register is defined so that it can be used 4017 // with 9-bit operands only. Ignore encodings which do not accept these. 4018 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 4019 if ((Desc.TSFlags & Enc) == 0) 4020 return None; 4021 4022 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 4023 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 4024 if (SrcIdx == -1) 4025 break; 4026 const auto &Src = Inst.getOperand(SrcIdx); 4027 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 4028 4029 if (isGFX90A() || isGFX11Plus()) 4030 return StringRef("lds_direct is not supported on this GPU"); 4031 4032 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 4033 return StringRef("lds_direct cannot be used with this instruction"); 4034 4035 if (SrcName != OpName::src0) 4036 return StringRef("lds_direct may be used as src0 only"); 4037 } 4038 } 4039 4040 return None; 4041 } 4042 4043 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 4044 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4045 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4046 if (Op.isFlatOffset()) 4047 return Op.getStartLoc(); 4048 } 4049 return getLoc(); 4050 } 4051 4052 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4053 const OperandVector &Operands) { 4054 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4055 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4056 return true; 4057 4058 auto Opcode = Inst.getOpcode(); 4059 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4060 assert(OpNum != -1); 4061 4062 const auto &Op = Inst.getOperand(OpNum); 4063 if (!hasFlatOffsets() && Op.getImm() != 0) { 4064 Error(getFlatOffsetLoc(Operands), 4065 "flat offset modifier is not supported on this GPU"); 4066 return false; 4067 } 4068 4069 // For FLAT segment the offset must be positive; 4070 // MSB is ignored and forced to zero. 4071 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 4072 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 4073 if (!isIntN(OffsetSize, Op.getImm())) { 4074 Error(getFlatOffsetLoc(Operands), 4075 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4076 return false; 4077 } 4078 } else { 4079 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 4080 if (!isUIntN(OffsetSize, Op.getImm())) { 4081 Error(getFlatOffsetLoc(Operands), 4082 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4083 return false; 4084 } 4085 } 4086 4087 return true; 4088 } 4089 4090 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4091 // Start with second operand because SMEM Offset cannot be dst or src0. 4092 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4093 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4094 if (Op.isSMEMOffset()) 4095 return Op.getStartLoc(); 4096 } 4097 return getLoc(); 4098 } 4099 4100 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4101 const OperandVector &Operands) { 4102 if (isCI() || isSI()) 4103 return true; 4104 4105 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4106 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4107 return true; 4108 4109 auto Opcode = Inst.getOpcode(); 4110 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4111 if (OpNum == -1) 4112 return true; 4113 4114 const auto &Op = Inst.getOperand(OpNum); 4115 if (!Op.isImm()) 4116 return true; 4117 4118 uint64_t Offset = Op.getImm(); 4119 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4120 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4121 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4122 return true; 4123 4124 Error(getSMEMOffsetLoc(Operands), 4125 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4126 "expected a 21-bit signed offset"); 4127 4128 return false; 4129 } 4130 4131 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4132 unsigned Opcode = Inst.getOpcode(); 4133 const MCInstrDesc &Desc = MII.get(Opcode); 4134 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4135 return true; 4136 4137 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4138 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4139 4140 const int OpIndices[] = { Src0Idx, Src1Idx }; 4141 4142 unsigned NumExprs = 0; 4143 unsigned NumLiterals = 0; 4144 uint32_t LiteralValue; 4145 4146 for (int OpIdx : OpIndices) { 4147 if (OpIdx == -1) break; 4148 4149 const MCOperand &MO = Inst.getOperand(OpIdx); 4150 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4151 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4152 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4153 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4154 if (NumLiterals == 0 || LiteralValue != Value) { 4155 LiteralValue = Value; 4156 ++NumLiterals; 4157 } 4158 } else if (MO.isExpr()) { 4159 ++NumExprs; 4160 } 4161 } 4162 } 4163 4164 return NumLiterals + NumExprs <= 1; 4165 } 4166 4167 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4168 const unsigned Opc = Inst.getOpcode(); 4169 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4170 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4171 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4172 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4173 4174 if (OpSel & ~3) 4175 return false; 4176 } 4177 4178 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4179 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4180 if (OpSelIdx != -1) { 4181 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4182 return false; 4183 } 4184 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4185 if (OpSelHiIdx != -1) { 4186 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4187 return false; 4188 } 4189 } 4190 4191 return true; 4192 } 4193 4194 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4195 const OperandVector &Operands) { 4196 const unsigned Opc = Inst.getOpcode(); 4197 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4198 if (DppCtrlIdx < 0) 4199 return true; 4200 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4201 4202 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4203 // DPP64 is supported for row_newbcast only. 4204 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4205 if (Src0Idx >= 0 && 4206 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4207 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4208 Error(S, "64 bit dpp only supports row_newbcast"); 4209 return false; 4210 } 4211 } 4212 4213 return true; 4214 } 4215 4216 // Check if VCC register matches wavefront size 4217 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4218 auto FB = getFeatureBits(); 4219 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4220 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4221 } 4222 4223 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4224 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4225 const OperandVector &Operands) { 4226 unsigned Opcode = Inst.getOpcode(); 4227 const MCInstrDesc &Desc = MII.get(Opcode); 4228 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4229 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4230 ImmIdx == -1) 4231 return true; 4232 4233 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4234 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4235 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4236 4237 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4238 4239 unsigned NumExprs = 0; 4240 unsigned NumLiterals = 0; 4241 uint32_t LiteralValue; 4242 4243 for (int OpIdx : OpIndices) { 4244 if (OpIdx == -1) 4245 continue; 4246 4247 const MCOperand &MO = Inst.getOperand(OpIdx); 4248 if (!MO.isImm() && !MO.isExpr()) 4249 continue; 4250 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4251 continue; 4252 4253 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4254 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4255 Error(getConstLoc(Operands), 4256 "inline constants are not allowed for this operand"); 4257 return false; 4258 } 4259 4260 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4261 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4262 if (NumLiterals == 0 || LiteralValue != Value) { 4263 LiteralValue = Value; 4264 ++NumLiterals; 4265 } 4266 } else if (MO.isExpr()) { 4267 ++NumExprs; 4268 } 4269 } 4270 NumLiterals += NumExprs; 4271 4272 if (!NumLiterals) 4273 return true; 4274 4275 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4276 Error(getLitLoc(Operands), "literal operands are not supported"); 4277 return false; 4278 } 4279 4280 if (NumLiterals > 1) { 4281 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4282 return false; 4283 } 4284 4285 return true; 4286 } 4287 4288 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4289 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4290 const MCRegisterInfo *MRI) { 4291 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4292 if (OpIdx < 0) 4293 return -1; 4294 4295 const MCOperand &Op = Inst.getOperand(OpIdx); 4296 if (!Op.isReg()) 4297 return -1; 4298 4299 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4300 auto Reg = Sub ? Sub : Op.getReg(); 4301 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4302 return AGPR32.contains(Reg) ? 1 : 0; 4303 } 4304 4305 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4306 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4307 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4308 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4309 SIInstrFlags::DS)) == 0) 4310 return true; 4311 4312 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4313 : AMDGPU::OpName::vdata; 4314 4315 const MCRegisterInfo *MRI = getMRI(); 4316 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4317 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4318 4319 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4320 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4321 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4322 return false; 4323 } 4324 4325 auto FB = getFeatureBits(); 4326 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4327 if (DataAreg < 0 || DstAreg < 0) 4328 return true; 4329 return DstAreg == DataAreg; 4330 } 4331 4332 return DstAreg < 1 && DataAreg < 1; 4333 } 4334 4335 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4336 auto FB = getFeatureBits(); 4337 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4338 return true; 4339 4340 const MCRegisterInfo *MRI = getMRI(); 4341 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4342 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4343 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4344 const MCOperand &Op = Inst.getOperand(I); 4345 if (!Op.isReg()) 4346 continue; 4347 4348 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4349 if (!Sub) 4350 continue; 4351 4352 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4353 return false; 4354 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4355 return false; 4356 } 4357 4358 return true; 4359 } 4360 4361 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4362 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4363 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4364 if (Op.isBLGP()) 4365 return Op.getStartLoc(); 4366 } 4367 return SMLoc(); 4368 } 4369 4370 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4371 const OperandVector &Operands) { 4372 unsigned Opc = Inst.getOpcode(); 4373 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4374 if (BlgpIdx == -1) 4375 return true; 4376 SMLoc BLGPLoc = getBLGPLoc(Operands); 4377 if (!BLGPLoc.isValid()) 4378 return true; 4379 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4380 auto FB = getFeatureBits(); 4381 bool UsesNeg = false; 4382 if (FB[AMDGPU::FeatureGFX940Insts]) { 4383 switch (Opc) { 4384 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4385 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4386 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4387 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4388 UsesNeg = true; 4389 } 4390 } 4391 4392 if (IsNeg == UsesNeg) 4393 return true; 4394 4395 Error(BLGPLoc, 4396 UsesNeg ? "invalid modifier: blgp is not supported" 4397 : "invalid modifier: neg is not supported"); 4398 4399 return false; 4400 } 4401 4402 // gfx90a has an undocumented limitation: 4403 // DS_GWS opcodes must use even aligned registers. 4404 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4405 const OperandVector &Operands) { 4406 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4407 return true; 4408 4409 int Opc = Inst.getOpcode(); 4410 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4411 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4412 return true; 4413 4414 const MCRegisterInfo *MRI = getMRI(); 4415 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4416 int Data0Pos = 4417 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4418 assert(Data0Pos != -1); 4419 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4420 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4421 if (RegIdx & 1) { 4422 SMLoc RegLoc = getRegLoc(Reg, Operands); 4423 Error(RegLoc, "vgpr must be even aligned"); 4424 return false; 4425 } 4426 4427 return true; 4428 } 4429 4430 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4431 const OperandVector &Operands, 4432 const SMLoc &IDLoc) { 4433 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4434 AMDGPU::OpName::cpol); 4435 if (CPolPos == -1) 4436 return true; 4437 4438 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4439 4440 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4441 if (TSFlags & SIInstrFlags::SMRD) { 4442 if (CPol && (isSI() || isCI())) { 4443 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4444 Error(S, "cache policy is not supported for SMRD instructions"); 4445 return false; 4446 } 4447 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4448 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4449 return false; 4450 } 4451 } 4452 4453 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4454 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4455 StringRef CStr(S.getPointer()); 4456 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4457 Error(S, "scc is not supported on this GPU"); 4458 return false; 4459 } 4460 4461 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4462 return true; 4463 4464 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4465 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4466 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4467 : "instruction must use glc"); 4468 return false; 4469 } 4470 } else { 4471 if (CPol & CPol::GLC) { 4472 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4473 StringRef CStr(S.getPointer()); 4474 S = SMLoc::getFromPointer( 4475 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4476 Error(S, isGFX940() ? "instruction must not use sc0" 4477 : "instruction must not use glc"); 4478 return false; 4479 } 4480 } 4481 4482 return true; 4483 } 4484 4485 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst, 4486 const OperandVector &Operands, 4487 const SMLoc &IDLoc) { 4488 if (isGFX940()) 4489 return true; 4490 4491 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4492 if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) != 4493 (SIInstrFlags::VALU | SIInstrFlags::FLAT)) 4494 return true; 4495 // This is FLAT LDS DMA. 4496 4497 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands); 4498 StringRef CStr(S.getPointer()); 4499 if (!CStr.startswith("lds")) { 4500 // This is incorrectly selected LDS DMA version of a FLAT load opcode. 4501 // And LDS version should have 'lds' modifier, but it follows optional 4502 // operands so its absense is ignored by the matcher. 4503 Error(IDLoc, "invalid operands for instruction"); 4504 return false; 4505 } 4506 4507 return true; 4508 } 4509 4510 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) { 4511 if (!isGFX11Plus()) 4512 return true; 4513 for (auto &Operand : Operands) { 4514 if (!Operand->isReg()) 4515 continue; 4516 unsigned Reg = Operand->getReg(); 4517 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) { 4518 Error(getRegLoc(Reg, Operands), 4519 "execz and vccz are not supported on this GPU"); 4520 return false; 4521 } 4522 } 4523 return true; 4524 } 4525 4526 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4527 const SMLoc &IDLoc, 4528 const OperandVector &Operands) { 4529 if (auto ErrMsg = validateLdsDirect(Inst)) { 4530 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4531 return false; 4532 } 4533 if (!validateSOPLiteral(Inst)) { 4534 Error(getLitLoc(Operands), 4535 "only one literal operand is allowed"); 4536 return false; 4537 } 4538 if (!validateVOPLiteral(Inst, Operands)) { 4539 return false; 4540 } 4541 if (!validateConstantBusLimitations(Inst, Operands)) { 4542 return false; 4543 } 4544 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4545 return false; 4546 } 4547 if (!validateIntClampSupported(Inst)) { 4548 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4549 "integer clamping is not supported on this GPU"); 4550 return false; 4551 } 4552 if (!validateOpSel(Inst)) { 4553 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4554 "invalid op_sel operand"); 4555 return false; 4556 } 4557 if (!validateDPP(Inst, Operands)) { 4558 return false; 4559 } 4560 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4561 if (!validateMIMGD16(Inst)) { 4562 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4563 "d16 modifier is not supported on this GPU"); 4564 return false; 4565 } 4566 if (!validateMIMGDim(Inst)) { 4567 Error(IDLoc, "dim modifier is required on this GPU"); 4568 return false; 4569 } 4570 if (!validateMIMGMSAA(Inst)) { 4571 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4572 "invalid dim; must be MSAA type"); 4573 return false; 4574 } 4575 if (auto ErrMsg = validateMIMGDataSize(Inst)) { 4576 Error(IDLoc, *ErrMsg); 4577 return false; 4578 } 4579 if (!validateMIMGAddrSize(Inst)) { 4580 Error(IDLoc, 4581 "image address size does not match dim and a16"); 4582 return false; 4583 } 4584 if (!validateMIMGAtomicDMask(Inst)) { 4585 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4586 "invalid atomic image dmask"); 4587 return false; 4588 } 4589 if (!validateMIMGGatherDMask(Inst)) { 4590 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4591 "invalid image_gather dmask: only one bit must be set"); 4592 return false; 4593 } 4594 if (!validateMovrels(Inst, Operands)) { 4595 return false; 4596 } 4597 if (!validateFlatOffset(Inst, Operands)) { 4598 return false; 4599 } 4600 if (!validateSMEMOffset(Inst, Operands)) { 4601 return false; 4602 } 4603 if (!validateMAIAccWrite(Inst, Operands)) { 4604 return false; 4605 } 4606 if (!validateMFMA(Inst, Operands)) { 4607 return false; 4608 } 4609 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4610 return false; 4611 } 4612 4613 if (!validateAGPRLdSt(Inst)) { 4614 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4615 ? "invalid register class: data and dst should be all VGPR or AGPR" 4616 : "invalid register class: agpr loads and stores not supported on this GPU" 4617 ); 4618 return false; 4619 } 4620 if (!validateVGPRAlign(Inst)) { 4621 Error(IDLoc, 4622 "invalid register class: vgpr tuples must be 64 bit aligned"); 4623 return false; 4624 } 4625 if (!validateGWS(Inst, Operands)) { 4626 return false; 4627 } 4628 4629 if (!validateBLGP(Inst, Operands)) { 4630 return false; 4631 } 4632 4633 if (!validateDivScale(Inst)) { 4634 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4635 return false; 4636 } 4637 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4638 return false; 4639 } 4640 if (!validateExeczVcczOperands(Operands)) { 4641 return false; 4642 } 4643 4644 if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) { 4645 return false; 4646 } 4647 4648 return true; 4649 } 4650 4651 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4652 const FeatureBitset &FBS, 4653 unsigned VariantID = 0); 4654 4655 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4656 const FeatureBitset &AvailableFeatures, 4657 unsigned VariantID); 4658 4659 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4660 const FeatureBitset &FBS) { 4661 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4662 } 4663 4664 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4665 const FeatureBitset &FBS, 4666 ArrayRef<unsigned> Variants) { 4667 for (auto Variant : Variants) { 4668 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4669 return true; 4670 } 4671 4672 return false; 4673 } 4674 4675 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4676 const SMLoc &IDLoc) { 4677 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4678 4679 // Check if requested instruction variant is supported. 4680 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4681 return false; 4682 4683 // This instruction is not supported. 4684 // Clear any other pending errors because they are no longer relevant. 4685 getParser().clearPendingErrors(); 4686 4687 // Requested instruction variant is not supported. 4688 // Check if any other variants are supported. 4689 StringRef VariantName = getMatchedVariantName(); 4690 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4691 return Error(IDLoc, 4692 Twine(VariantName, 4693 " variant of this instruction is not supported")); 4694 } 4695 4696 // Finally check if this instruction is supported on any other GPU. 4697 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4698 return Error(IDLoc, "instruction not supported on this GPU"); 4699 } 4700 4701 // Instruction not supported on any GPU. Probably a typo. 4702 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4703 return Error(IDLoc, "invalid instruction" + Suggestion); 4704 } 4705 4706 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4707 OperandVector &Operands, 4708 MCStreamer &Out, 4709 uint64_t &ErrorInfo, 4710 bool MatchingInlineAsm) { 4711 MCInst Inst; 4712 unsigned Result = Match_Success; 4713 for (auto Variant : getMatchedVariants()) { 4714 uint64_t EI; 4715 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4716 Variant); 4717 // We order match statuses from least to most specific. We use most specific 4718 // status as resulting 4719 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4720 if ((R == Match_Success) || 4721 (R == Match_PreferE32) || 4722 (R == Match_MissingFeature && Result != Match_PreferE32) || 4723 (R == Match_InvalidOperand && Result != Match_MissingFeature 4724 && Result != Match_PreferE32) || 4725 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4726 && Result != Match_MissingFeature 4727 && Result != Match_PreferE32)) { 4728 Result = R; 4729 ErrorInfo = EI; 4730 } 4731 if (R == Match_Success) 4732 break; 4733 } 4734 4735 if (Result == Match_Success) { 4736 if (!validateInstruction(Inst, IDLoc, Operands)) { 4737 return true; 4738 } 4739 Inst.setLoc(IDLoc); 4740 Out.emitInstruction(Inst, getSTI()); 4741 return false; 4742 } 4743 4744 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4745 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4746 return true; 4747 } 4748 4749 switch (Result) { 4750 default: break; 4751 case Match_MissingFeature: 4752 // It has been verified that the specified instruction 4753 // mnemonic is valid. A match was found but it requires 4754 // features which are not supported on this GPU. 4755 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4756 4757 case Match_InvalidOperand: { 4758 SMLoc ErrorLoc = IDLoc; 4759 if (ErrorInfo != ~0ULL) { 4760 if (ErrorInfo >= Operands.size()) { 4761 return Error(IDLoc, "too few operands for instruction"); 4762 } 4763 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4764 if (ErrorLoc == SMLoc()) 4765 ErrorLoc = IDLoc; 4766 } 4767 return Error(ErrorLoc, "invalid operand for instruction"); 4768 } 4769 4770 case Match_PreferE32: 4771 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4772 "should be encoded as e32"); 4773 case Match_MnemonicFail: 4774 llvm_unreachable("Invalid instructions should have been handled already"); 4775 } 4776 llvm_unreachable("Implement any new match types added!"); 4777 } 4778 4779 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4780 int64_t Tmp = -1; 4781 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4782 return true; 4783 } 4784 if (getParser().parseAbsoluteExpression(Tmp)) { 4785 return true; 4786 } 4787 Ret = static_cast<uint32_t>(Tmp); 4788 return false; 4789 } 4790 4791 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4792 uint32_t &Minor) { 4793 if (ParseAsAbsoluteExpression(Major)) 4794 return TokError("invalid major version"); 4795 4796 if (!trySkipToken(AsmToken::Comma)) 4797 return TokError("minor version number required, comma expected"); 4798 4799 if (ParseAsAbsoluteExpression(Minor)) 4800 return TokError("invalid minor version"); 4801 4802 return false; 4803 } 4804 4805 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4806 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4807 return TokError("directive only supported for amdgcn architecture"); 4808 4809 std::string TargetIDDirective; 4810 SMLoc TargetStart = getTok().getLoc(); 4811 if (getParser().parseEscapedString(TargetIDDirective)) 4812 return true; 4813 4814 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4815 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4816 return getParser().Error(TargetRange.Start, 4817 (Twine(".amdgcn_target directive's target id ") + 4818 Twine(TargetIDDirective) + 4819 Twine(" does not match the specified target id ") + 4820 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4821 4822 return false; 4823 } 4824 4825 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4826 return Error(Range.Start, "value out of range", Range); 4827 } 4828 4829 bool AMDGPUAsmParser::calculateGPRBlocks( 4830 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4831 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4832 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4833 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4834 // TODO(scott.linder): These calculations are duplicated from 4835 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4836 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4837 4838 unsigned NumVGPRs = NextFreeVGPR; 4839 unsigned NumSGPRs = NextFreeSGPR; 4840 4841 if (Version.Major >= 10) 4842 NumSGPRs = 0; 4843 else { 4844 unsigned MaxAddressableNumSGPRs = 4845 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4846 4847 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4848 NumSGPRs > MaxAddressableNumSGPRs) 4849 return OutOfRangeError(SGPRRange); 4850 4851 NumSGPRs += 4852 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4853 4854 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4855 NumSGPRs > MaxAddressableNumSGPRs) 4856 return OutOfRangeError(SGPRRange); 4857 4858 if (Features.test(FeatureSGPRInitBug)) 4859 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4860 } 4861 4862 VGPRBlocks = 4863 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4864 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4865 4866 return false; 4867 } 4868 4869 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4870 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4871 return TokError("directive only supported for amdgcn architecture"); 4872 4873 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4874 return TokError("directive only supported for amdhsa OS"); 4875 4876 StringRef KernelName; 4877 if (getParser().parseIdentifier(KernelName)) 4878 return true; 4879 4880 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4881 4882 StringSet<> Seen; 4883 4884 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4885 4886 SMRange VGPRRange; 4887 uint64_t NextFreeVGPR = 0; 4888 uint64_t AccumOffset = 0; 4889 uint64_t SharedVGPRCount = 0; 4890 SMRange SGPRRange; 4891 uint64_t NextFreeSGPR = 0; 4892 4893 // Count the number of user SGPRs implied from the enabled feature bits. 4894 unsigned ImpliedUserSGPRCount = 0; 4895 4896 // Track if the asm explicitly contains the directive for the user SGPR 4897 // count. 4898 Optional<unsigned> ExplicitUserSGPRCount; 4899 bool ReserveVCC = true; 4900 bool ReserveFlatScr = true; 4901 Optional<bool> EnableWavefrontSize32; 4902 4903 while (true) { 4904 while (trySkipToken(AsmToken::EndOfStatement)); 4905 4906 StringRef ID; 4907 SMRange IDRange = getTok().getLocRange(); 4908 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4909 return true; 4910 4911 if (ID == ".end_amdhsa_kernel") 4912 break; 4913 4914 if (Seen.find(ID) != Seen.end()) 4915 return TokError(".amdhsa_ directives cannot be repeated"); 4916 Seen.insert(ID); 4917 4918 SMLoc ValStart = getLoc(); 4919 int64_t IVal; 4920 if (getParser().parseAbsoluteExpression(IVal)) 4921 return true; 4922 SMLoc ValEnd = getLoc(); 4923 SMRange ValRange = SMRange(ValStart, ValEnd); 4924 4925 if (IVal < 0) 4926 return OutOfRangeError(ValRange); 4927 4928 uint64_t Val = IVal; 4929 4930 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4931 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4932 return OutOfRangeError(RANGE); \ 4933 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4934 4935 if (ID == ".amdhsa_group_segment_fixed_size") { 4936 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4937 return OutOfRangeError(ValRange); 4938 KD.group_segment_fixed_size = Val; 4939 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4940 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4941 return OutOfRangeError(ValRange); 4942 KD.private_segment_fixed_size = Val; 4943 } else if (ID == ".amdhsa_kernarg_size") { 4944 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4945 return OutOfRangeError(ValRange); 4946 KD.kernarg_size = Val; 4947 } else if (ID == ".amdhsa_user_sgpr_count") { 4948 ExplicitUserSGPRCount = Val; 4949 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4950 if (hasArchitectedFlatScratch()) 4951 return Error(IDRange.Start, 4952 "directive is not supported with architected flat scratch", 4953 IDRange); 4954 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4955 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4956 Val, ValRange); 4957 if (Val) 4958 ImpliedUserSGPRCount += 4; 4959 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4960 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4961 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4962 ValRange); 4963 if (Val) 4964 ImpliedUserSGPRCount += 2; 4965 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4966 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4967 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4968 ValRange); 4969 if (Val) 4970 ImpliedUserSGPRCount += 2; 4971 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4972 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4973 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4974 Val, ValRange); 4975 if (Val) 4976 ImpliedUserSGPRCount += 2; 4977 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4978 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4979 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4980 ValRange); 4981 if (Val) 4982 ImpliedUserSGPRCount += 2; 4983 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4984 if (hasArchitectedFlatScratch()) 4985 return Error(IDRange.Start, 4986 "directive is not supported with architected flat scratch", 4987 IDRange); 4988 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4989 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4990 ValRange); 4991 if (Val) 4992 ImpliedUserSGPRCount += 2; 4993 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4994 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4995 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4996 Val, ValRange); 4997 if (Val) 4998 ImpliedUserSGPRCount += 1; 4999 } else if (ID == ".amdhsa_wavefront_size32") { 5000 if (IVersion.Major < 10) 5001 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5002 EnableWavefrontSize32 = Val; 5003 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5004 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 5005 Val, ValRange); 5006 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 5007 if (hasArchitectedFlatScratch()) 5008 return Error(IDRange.Start, 5009 "directive is not supported with architected flat scratch", 5010 IDRange); 5011 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5012 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5013 } else if (ID == ".amdhsa_enable_private_segment") { 5014 if (!hasArchitectedFlatScratch()) 5015 return Error( 5016 IDRange.Start, 5017 "directive is not supported without architected flat scratch", 5018 IDRange); 5019 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5020 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5021 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 5022 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5023 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 5024 ValRange); 5025 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 5026 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5027 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 5028 ValRange); 5029 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 5030 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5031 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 5032 ValRange); 5033 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 5034 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5035 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 5036 ValRange); 5037 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 5038 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5039 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 5040 ValRange); 5041 } else if (ID == ".amdhsa_next_free_vgpr") { 5042 VGPRRange = ValRange; 5043 NextFreeVGPR = Val; 5044 } else if (ID == ".amdhsa_next_free_sgpr") { 5045 SGPRRange = ValRange; 5046 NextFreeSGPR = Val; 5047 } else if (ID == ".amdhsa_accum_offset") { 5048 if (!isGFX90A()) 5049 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5050 AccumOffset = Val; 5051 } else if (ID == ".amdhsa_reserve_vcc") { 5052 if (!isUInt<1>(Val)) 5053 return OutOfRangeError(ValRange); 5054 ReserveVCC = Val; 5055 } else if (ID == ".amdhsa_reserve_flat_scratch") { 5056 if (IVersion.Major < 7) 5057 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 5058 if (hasArchitectedFlatScratch()) 5059 return Error(IDRange.Start, 5060 "directive is not supported with architected flat scratch", 5061 IDRange); 5062 if (!isUInt<1>(Val)) 5063 return OutOfRangeError(ValRange); 5064 ReserveFlatScr = Val; 5065 } else if (ID == ".amdhsa_reserve_xnack_mask") { 5066 if (IVersion.Major < 8) 5067 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 5068 if (!isUInt<1>(Val)) 5069 return OutOfRangeError(ValRange); 5070 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5071 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5072 IDRange); 5073 } else if (ID == ".amdhsa_float_round_mode_32") { 5074 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5075 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 5076 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5077 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5078 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 5079 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5080 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5081 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5082 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5083 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5084 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5085 ValRange); 5086 } else if (ID == ".amdhsa_dx10_clamp") { 5087 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5088 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 5089 } else if (ID == ".amdhsa_ieee_mode") { 5090 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 5091 Val, ValRange); 5092 } else if (ID == ".amdhsa_fp16_overflow") { 5093 if (IVersion.Major < 9) 5094 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5095 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 5096 ValRange); 5097 } else if (ID == ".amdhsa_tg_split") { 5098 if (!isGFX90A()) 5099 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5100 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5101 ValRange); 5102 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5103 if (IVersion.Major < 10) 5104 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5105 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 5106 ValRange); 5107 } else if (ID == ".amdhsa_memory_ordered") { 5108 if (IVersion.Major < 10) 5109 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5110 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 5111 ValRange); 5112 } else if (ID == ".amdhsa_forward_progress") { 5113 if (IVersion.Major < 10) 5114 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5115 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 5116 ValRange); 5117 } else if (ID == ".amdhsa_shared_vgpr_count") { 5118 if (IVersion.Major < 10) 5119 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5120 SharedVGPRCount = Val; 5121 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5122 COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val, 5123 ValRange); 5124 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5125 PARSE_BITS_ENTRY( 5126 KD.compute_pgm_rsrc2, 5127 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5128 ValRange); 5129 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5130 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5131 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5132 Val, ValRange); 5133 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5134 PARSE_BITS_ENTRY( 5135 KD.compute_pgm_rsrc2, 5136 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5137 ValRange); 5138 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5139 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5140 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5141 Val, ValRange); 5142 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5143 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5144 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5145 Val, ValRange); 5146 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5147 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5148 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5149 Val, ValRange); 5150 } else if (ID == ".amdhsa_exception_int_div_zero") { 5151 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5152 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5153 Val, ValRange); 5154 } else { 5155 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5156 } 5157 5158 #undef PARSE_BITS_ENTRY 5159 } 5160 5161 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5162 return TokError(".amdhsa_next_free_vgpr directive is required"); 5163 5164 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5165 return TokError(".amdhsa_next_free_sgpr directive is required"); 5166 5167 unsigned VGPRBlocks; 5168 unsigned SGPRBlocks; 5169 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5170 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5171 EnableWavefrontSize32, NextFreeVGPR, 5172 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5173 SGPRBlocks)) 5174 return true; 5175 5176 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5177 VGPRBlocks)) 5178 return OutOfRangeError(VGPRRange); 5179 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5180 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5181 5182 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5183 SGPRBlocks)) 5184 return OutOfRangeError(SGPRRange); 5185 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5186 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5187 SGPRBlocks); 5188 5189 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5190 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5191 "enabled user SGPRs"); 5192 5193 unsigned UserSGPRCount = 5194 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5195 5196 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5197 return TokError("too many user SGPRs enabled"); 5198 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5199 UserSGPRCount); 5200 5201 if (isGFX90A()) { 5202 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5203 return TokError(".amdhsa_accum_offset directive is required"); 5204 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5205 return TokError("accum_offset should be in range [4..256] in " 5206 "increments of 4"); 5207 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5208 return TokError("accum_offset exceeds total VGPR allocation"); 5209 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5210 (AccumOffset / 4 - 1)); 5211 } 5212 5213 if (IVersion.Major == 10) { 5214 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5215 if (SharedVGPRCount && EnableWavefrontSize32) { 5216 return TokError("shared_vgpr_count directive not valid on " 5217 "wavefront size 32"); 5218 } 5219 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5220 return TokError("shared_vgpr_count*2 + " 5221 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5222 "exceed 63\n"); 5223 } 5224 } 5225 5226 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5227 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5228 ReserveFlatScr); 5229 return false; 5230 } 5231 5232 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5233 uint32_t Major; 5234 uint32_t Minor; 5235 5236 if (ParseDirectiveMajorMinor(Major, Minor)) 5237 return true; 5238 5239 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5240 return false; 5241 } 5242 5243 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5244 uint32_t Major; 5245 uint32_t Minor; 5246 uint32_t Stepping; 5247 StringRef VendorName; 5248 StringRef ArchName; 5249 5250 // If this directive has no arguments, then use the ISA version for the 5251 // targeted GPU. 5252 if (isToken(AsmToken::EndOfStatement)) { 5253 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5254 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5255 ISA.Stepping, 5256 "AMD", "AMDGPU"); 5257 return false; 5258 } 5259 5260 if (ParseDirectiveMajorMinor(Major, Minor)) 5261 return true; 5262 5263 if (!trySkipToken(AsmToken::Comma)) 5264 return TokError("stepping version number required, comma expected"); 5265 5266 if (ParseAsAbsoluteExpression(Stepping)) 5267 return TokError("invalid stepping version"); 5268 5269 if (!trySkipToken(AsmToken::Comma)) 5270 return TokError("vendor name required, comma expected"); 5271 5272 if (!parseString(VendorName, "invalid vendor name")) 5273 return true; 5274 5275 if (!trySkipToken(AsmToken::Comma)) 5276 return TokError("arch name required, comma expected"); 5277 5278 if (!parseString(ArchName, "invalid arch name")) 5279 return true; 5280 5281 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5282 VendorName, ArchName); 5283 return false; 5284 } 5285 5286 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5287 amd_kernel_code_t &Header) { 5288 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5289 // assembly for backwards compatibility. 5290 if (ID == "max_scratch_backing_memory_byte_size") { 5291 Parser.eatToEndOfStatement(); 5292 return false; 5293 } 5294 5295 SmallString<40> ErrStr; 5296 raw_svector_ostream Err(ErrStr); 5297 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5298 return TokError(Err.str()); 5299 } 5300 Lex(); 5301 5302 if (ID == "enable_wavefront_size32") { 5303 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5304 if (!isGFX10Plus()) 5305 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5306 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5307 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5308 } else { 5309 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5310 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5311 } 5312 } 5313 5314 if (ID == "wavefront_size") { 5315 if (Header.wavefront_size == 5) { 5316 if (!isGFX10Plus()) 5317 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5318 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5319 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5320 } else if (Header.wavefront_size == 6) { 5321 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5322 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5323 } 5324 } 5325 5326 if (ID == "enable_wgp_mode") { 5327 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5328 !isGFX10Plus()) 5329 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5330 } 5331 5332 if (ID == "enable_mem_ordered") { 5333 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5334 !isGFX10Plus()) 5335 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5336 } 5337 5338 if (ID == "enable_fwd_progress") { 5339 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5340 !isGFX10Plus()) 5341 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5342 } 5343 5344 return false; 5345 } 5346 5347 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5348 amd_kernel_code_t Header; 5349 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5350 5351 while (true) { 5352 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5353 // will set the current token to EndOfStatement. 5354 while(trySkipToken(AsmToken::EndOfStatement)); 5355 5356 StringRef ID; 5357 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5358 return true; 5359 5360 if (ID == ".end_amd_kernel_code_t") 5361 break; 5362 5363 if (ParseAMDKernelCodeTValue(ID, Header)) 5364 return true; 5365 } 5366 5367 getTargetStreamer().EmitAMDKernelCodeT(Header); 5368 5369 return false; 5370 } 5371 5372 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5373 StringRef KernelName; 5374 if (!parseId(KernelName, "expected symbol name")) 5375 return true; 5376 5377 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5378 ELF::STT_AMDGPU_HSA_KERNEL); 5379 5380 KernelScope.initialize(getContext()); 5381 return false; 5382 } 5383 5384 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5385 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5386 return Error(getLoc(), 5387 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5388 "architectures"); 5389 } 5390 5391 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5392 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5393 return Error(getParser().getTok().getLoc(), "target id must match options"); 5394 5395 getTargetStreamer().EmitISAVersion(); 5396 Lex(); 5397 5398 return false; 5399 } 5400 5401 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5402 const char *AssemblerDirectiveBegin; 5403 const char *AssemblerDirectiveEnd; 5404 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5405 isHsaAbiVersion3AndAbove(&getSTI()) 5406 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5407 HSAMD::V3::AssemblerDirectiveEnd) 5408 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5409 HSAMD::AssemblerDirectiveEnd); 5410 5411 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5412 return Error(getLoc(), 5413 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5414 "not available on non-amdhsa OSes")).str()); 5415 } 5416 5417 std::string HSAMetadataString; 5418 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5419 HSAMetadataString)) 5420 return true; 5421 5422 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5423 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5424 return Error(getLoc(), "invalid HSA metadata"); 5425 } else { 5426 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5427 return Error(getLoc(), "invalid HSA metadata"); 5428 } 5429 5430 return false; 5431 } 5432 5433 /// Common code to parse out a block of text (typically YAML) between start and 5434 /// end directives. 5435 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5436 const char *AssemblerDirectiveEnd, 5437 std::string &CollectString) { 5438 5439 raw_string_ostream CollectStream(CollectString); 5440 5441 getLexer().setSkipSpace(false); 5442 5443 bool FoundEnd = false; 5444 while (!isToken(AsmToken::Eof)) { 5445 while (isToken(AsmToken::Space)) { 5446 CollectStream << getTokenStr(); 5447 Lex(); 5448 } 5449 5450 if (trySkipId(AssemblerDirectiveEnd)) { 5451 FoundEnd = true; 5452 break; 5453 } 5454 5455 CollectStream << Parser.parseStringToEndOfStatement() 5456 << getContext().getAsmInfo()->getSeparatorString(); 5457 5458 Parser.eatToEndOfStatement(); 5459 } 5460 5461 getLexer().setSkipSpace(true); 5462 5463 if (isToken(AsmToken::Eof) && !FoundEnd) { 5464 return TokError(Twine("expected directive ") + 5465 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5466 } 5467 5468 CollectStream.flush(); 5469 return false; 5470 } 5471 5472 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5473 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5474 std::string String; 5475 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5476 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5477 return true; 5478 5479 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5480 if (!PALMetadata->setFromString(String)) 5481 return Error(getLoc(), "invalid PAL metadata"); 5482 return false; 5483 } 5484 5485 /// Parse the assembler directive for old linear-format PAL metadata. 5486 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5487 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5488 return Error(getLoc(), 5489 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5490 "not available on non-amdpal OSes")).str()); 5491 } 5492 5493 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5494 PALMetadata->setLegacy(); 5495 for (;;) { 5496 uint32_t Key, Value; 5497 if (ParseAsAbsoluteExpression(Key)) { 5498 return TokError(Twine("invalid value in ") + 5499 Twine(PALMD::AssemblerDirective)); 5500 } 5501 if (!trySkipToken(AsmToken::Comma)) { 5502 return TokError(Twine("expected an even number of values in ") + 5503 Twine(PALMD::AssemblerDirective)); 5504 } 5505 if (ParseAsAbsoluteExpression(Value)) { 5506 return TokError(Twine("invalid value in ") + 5507 Twine(PALMD::AssemblerDirective)); 5508 } 5509 PALMetadata->setRegister(Key, Value); 5510 if (!trySkipToken(AsmToken::Comma)) 5511 break; 5512 } 5513 return false; 5514 } 5515 5516 /// ParseDirectiveAMDGPULDS 5517 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5518 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5519 if (getParser().checkForValidSection()) 5520 return true; 5521 5522 StringRef Name; 5523 SMLoc NameLoc = getLoc(); 5524 if (getParser().parseIdentifier(Name)) 5525 return TokError("expected identifier in directive"); 5526 5527 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5528 if (parseToken(AsmToken::Comma, "expected ','")) 5529 return true; 5530 5531 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5532 5533 int64_t Size; 5534 SMLoc SizeLoc = getLoc(); 5535 if (getParser().parseAbsoluteExpression(Size)) 5536 return true; 5537 if (Size < 0) 5538 return Error(SizeLoc, "size must be non-negative"); 5539 if (Size > LocalMemorySize) 5540 return Error(SizeLoc, "size is too large"); 5541 5542 int64_t Alignment = 4; 5543 if (trySkipToken(AsmToken::Comma)) { 5544 SMLoc AlignLoc = getLoc(); 5545 if (getParser().parseAbsoluteExpression(Alignment)) 5546 return true; 5547 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5548 return Error(AlignLoc, "alignment must be a power of two"); 5549 5550 // Alignment larger than the size of LDS is possible in theory, as long 5551 // as the linker manages to place to symbol at address 0, but we do want 5552 // to make sure the alignment fits nicely into a 32-bit integer. 5553 if (Alignment >= 1u << 31) 5554 return Error(AlignLoc, "alignment is too large"); 5555 } 5556 5557 if (parseEOL()) 5558 return true; 5559 5560 Symbol->redefineIfPossible(); 5561 if (!Symbol->isUndefined()) 5562 return Error(NameLoc, "invalid symbol redefinition"); 5563 5564 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5565 return false; 5566 } 5567 5568 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5569 StringRef IDVal = DirectiveID.getString(); 5570 5571 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5572 if (IDVal == ".amdhsa_kernel") 5573 return ParseDirectiveAMDHSAKernel(); 5574 5575 // TODO: Restructure/combine with PAL metadata directive. 5576 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5577 return ParseDirectiveHSAMetadata(); 5578 } else { 5579 if (IDVal == ".hsa_code_object_version") 5580 return ParseDirectiveHSACodeObjectVersion(); 5581 5582 if (IDVal == ".hsa_code_object_isa") 5583 return ParseDirectiveHSACodeObjectISA(); 5584 5585 if (IDVal == ".amd_kernel_code_t") 5586 return ParseDirectiveAMDKernelCodeT(); 5587 5588 if (IDVal == ".amdgpu_hsa_kernel") 5589 return ParseDirectiveAMDGPUHsaKernel(); 5590 5591 if (IDVal == ".amd_amdgpu_isa") 5592 return ParseDirectiveISAVersion(); 5593 5594 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5595 return ParseDirectiveHSAMetadata(); 5596 } 5597 5598 if (IDVal == ".amdgcn_target") 5599 return ParseDirectiveAMDGCNTarget(); 5600 5601 if (IDVal == ".amdgpu_lds") 5602 return ParseDirectiveAMDGPULDS(); 5603 5604 if (IDVal == PALMD::AssemblerDirectiveBegin) 5605 return ParseDirectivePALMetadataBegin(); 5606 5607 if (IDVal == PALMD::AssemblerDirective) 5608 return ParseDirectivePALMetadata(); 5609 5610 return true; 5611 } 5612 5613 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5614 unsigned RegNo) { 5615 5616 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5617 return isGFX9Plus(); 5618 5619 // GFX10 has 2 more SGPRs 104 and 105. 5620 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5621 return hasSGPR104_SGPR105(); 5622 5623 switch (RegNo) { 5624 case AMDGPU::SRC_SHARED_BASE: 5625 case AMDGPU::SRC_SHARED_LIMIT: 5626 case AMDGPU::SRC_PRIVATE_BASE: 5627 case AMDGPU::SRC_PRIVATE_LIMIT: 5628 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5629 return isGFX9Plus(); 5630 case AMDGPU::TBA: 5631 case AMDGPU::TBA_LO: 5632 case AMDGPU::TBA_HI: 5633 case AMDGPU::TMA: 5634 case AMDGPU::TMA_LO: 5635 case AMDGPU::TMA_HI: 5636 return !isGFX9Plus(); 5637 case AMDGPU::XNACK_MASK: 5638 case AMDGPU::XNACK_MASK_LO: 5639 case AMDGPU::XNACK_MASK_HI: 5640 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5641 case AMDGPU::SGPR_NULL: 5642 return isGFX10Plus(); 5643 default: 5644 break; 5645 } 5646 5647 if (isCI()) 5648 return true; 5649 5650 if (isSI() || isGFX10Plus()) { 5651 // No flat_scr on SI. 5652 // On GFX10 flat scratch is not a valid register operand and can only be 5653 // accessed with s_setreg/s_getreg. 5654 switch (RegNo) { 5655 case AMDGPU::FLAT_SCR: 5656 case AMDGPU::FLAT_SCR_LO: 5657 case AMDGPU::FLAT_SCR_HI: 5658 return false; 5659 default: 5660 return true; 5661 } 5662 } 5663 5664 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5665 // SI/CI have. 5666 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5667 return hasSGPR102_SGPR103(); 5668 5669 return true; 5670 } 5671 5672 OperandMatchResultTy 5673 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5674 OperandMode Mode) { 5675 // Try to parse with a custom parser 5676 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5677 5678 // If we successfully parsed the operand or if there as an error parsing, 5679 // we are done. 5680 // 5681 // If we are parsing after we reach EndOfStatement then this means we 5682 // are appending default values to the Operands list. This is only done 5683 // by custom parser, so we shouldn't continue on to the generic parsing. 5684 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5685 isToken(AsmToken::EndOfStatement)) 5686 return ResTy; 5687 5688 SMLoc RBraceLoc; 5689 SMLoc LBraceLoc = getLoc(); 5690 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5691 unsigned Prefix = Operands.size(); 5692 5693 for (;;) { 5694 auto Loc = getLoc(); 5695 ResTy = parseReg(Operands); 5696 if (ResTy == MatchOperand_NoMatch) 5697 Error(Loc, "expected a register"); 5698 if (ResTy != MatchOperand_Success) 5699 return MatchOperand_ParseFail; 5700 5701 RBraceLoc = getLoc(); 5702 if (trySkipToken(AsmToken::RBrac)) 5703 break; 5704 5705 if (!skipToken(AsmToken::Comma, 5706 "expected a comma or a closing square bracket")) { 5707 return MatchOperand_ParseFail; 5708 } 5709 } 5710 5711 if (Operands.size() - Prefix > 1) { 5712 Operands.insert(Operands.begin() + Prefix, 5713 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5714 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5715 } 5716 5717 return MatchOperand_Success; 5718 } 5719 5720 return parseRegOrImm(Operands); 5721 } 5722 5723 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5724 // Clear any forced encodings from the previous instruction. 5725 setForcedEncodingSize(0); 5726 setForcedDPP(false); 5727 setForcedSDWA(false); 5728 5729 if (Name.endswith("_e64_dpp")) { 5730 setForcedDPP(true); 5731 setForcedEncodingSize(64); 5732 return Name.substr(0, Name.size() - 8); 5733 } else if (Name.endswith("_e64")) { 5734 setForcedEncodingSize(64); 5735 return Name.substr(0, Name.size() - 4); 5736 } else if (Name.endswith("_e32")) { 5737 setForcedEncodingSize(32); 5738 return Name.substr(0, Name.size() - 4); 5739 } else if (Name.endswith("_dpp")) { 5740 setForcedDPP(true); 5741 return Name.substr(0, Name.size() - 4); 5742 } else if (Name.endswith("_sdwa")) { 5743 setForcedSDWA(true); 5744 return Name.substr(0, Name.size() - 5); 5745 } 5746 return Name; 5747 } 5748 5749 static void applyMnemonicAliases(StringRef &Mnemonic, 5750 const FeatureBitset &Features, 5751 unsigned VariantID); 5752 5753 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5754 StringRef Name, 5755 SMLoc NameLoc, OperandVector &Operands) { 5756 // Add the instruction mnemonic 5757 Name = parseMnemonicSuffix(Name); 5758 5759 // If the target architecture uses MnemonicAlias, call it here to parse 5760 // operands correctly. 5761 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 5762 5763 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5764 5765 bool IsMIMG = Name.startswith("image_"); 5766 5767 while (!trySkipToken(AsmToken::EndOfStatement)) { 5768 OperandMode Mode = OperandMode_Default; 5769 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5770 Mode = OperandMode_NSA; 5771 CPolSeen = 0; 5772 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5773 5774 if (Res != MatchOperand_Success) { 5775 checkUnsupportedInstruction(Name, NameLoc); 5776 if (!Parser.hasPendingError()) { 5777 // FIXME: use real operand location rather than the current location. 5778 StringRef Msg = 5779 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5780 "not a valid operand."; 5781 Error(getLoc(), Msg); 5782 } 5783 while (!trySkipToken(AsmToken::EndOfStatement)) { 5784 lex(); 5785 } 5786 return true; 5787 } 5788 5789 // Eat the comma or space if there is one. 5790 trySkipToken(AsmToken::Comma); 5791 } 5792 5793 return false; 5794 } 5795 5796 //===----------------------------------------------------------------------===// 5797 // Utility functions 5798 //===----------------------------------------------------------------------===// 5799 5800 OperandMatchResultTy 5801 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5802 5803 if (!trySkipId(Prefix, AsmToken::Colon)) 5804 return MatchOperand_NoMatch; 5805 5806 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5807 } 5808 5809 OperandMatchResultTy 5810 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5811 AMDGPUOperand::ImmTy ImmTy, 5812 bool (*ConvertResult)(int64_t&)) { 5813 SMLoc S = getLoc(); 5814 int64_t Value = 0; 5815 5816 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5817 if (Res != MatchOperand_Success) 5818 return Res; 5819 5820 if (ConvertResult && !ConvertResult(Value)) { 5821 Error(S, "invalid " + StringRef(Prefix) + " value."); 5822 } 5823 5824 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5825 return MatchOperand_Success; 5826 } 5827 5828 OperandMatchResultTy 5829 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5830 OperandVector &Operands, 5831 AMDGPUOperand::ImmTy ImmTy, 5832 bool (*ConvertResult)(int64_t&)) { 5833 SMLoc S = getLoc(); 5834 if (!trySkipId(Prefix, AsmToken::Colon)) 5835 return MatchOperand_NoMatch; 5836 5837 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5838 return MatchOperand_ParseFail; 5839 5840 unsigned Val = 0; 5841 const unsigned MaxSize = 4; 5842 5843 // FIXME: How to verify the number of elements matches the number of src 5844 // operands? 5845 for (int I = 0; ; ++I) { 5846 int64_t Op; 5847 SMLoc Loc = getLoc(); 5848 if (!parseExpr(Op)) 5849 return MatchOperand_ParseFail; 5850 5851 if (Op != 0 && Op != 1) { 5852 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5853 return MatchOperand_ParseFail; 5854 } 5855 5856 Val |= (Op << I); 5857 5858 if (trySkipToken(AsmToken::RBrac)) 5859 break; 5860 5861 if (I + 1 == MaxSize) { 5862 Error(getLoc(), "expected a closing square bracket"); 5863 return MatchOperand_ParseFail; 5864 } 5865 5866 if (!skipToken(AsmToken::Comma, "expected a comma")) 5867 return MatchOperand_ParseFail; 5868 } 5869 5870 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5871 return MatchOperand_Success; 5872 } 5873 5874 OperandMatchResultTy 5875 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5876 AMDGPUOperand::ImmTy ImmTy) { 5877 int64_t Bit; 5878 SMLoc S = getLoc(); 5879 5880 if (trySkipId(Name)) { 5881 Bit = 1; 5882 } else if (trySkipId("no", Name)) { 5883 Bit = 0; 5884 } else { 5885 return MatchOperand_NoMatch; 5886 } 5887 5888 if (Name == "r128" && !hasMIMG_R128()) { 5889 Error(S, "r128 modifier is not supported on this GPU"); 5890 return MatchOperand_ParseFail; 5891 } 5892 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5893 Error(S, "a16 modifier is not supported on this GPU"); 5894 return MatchOperand_ParseFail; 5895 } 5896 5897 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5898 ImmTy = AMDGPUOperand::ImmTyR128A16; 5899 5900 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5901 return MatchOperand_Success; 5902 } 5903 5904 OperandMatchResultTy 5905 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5906 unsigned CPolOn = 0; 5907 unsigned CPolOff = 0; 5908 SMLoc S = getLoc(); 5909 5910 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5911 if (isGFX940() && !Mnemo.startswith("s_")) { 5912 if (trySkipId("sc0")) 5913 CPolOn = AMDGPU::CPol::SC0; 5914 else if (trySkipId("nosc0")) 5915 CPolOff = AMDGPU::CPol::SC0; 5916 else if (trySkipId("nt")) 5917 CPolOn = AMDGPU::CPol::NT; 5918 else if (trySkipId("nont")) 5919 CPolOff = AMDGPU::CPol::NT; 5920 else if (trySkipId("sc1")) 5921 CPolOn = AMDGPU::CPol::SC1; 5922 else if (trySkipId("nosc1")) 5923 CPolOff = AMDGPU::CPol::SC1; 5924 else 5925 return MatchOperand_NoMatch; 5926 } 5927 else if (trySkipId("glc")) 5928 CPolOn = AMDGPU::CPol::GLC; 5929 else if (trySkipId("noglc")) 5930 CPolOff = AMDGPU::CPol::GLC; 5931 else if (trySkipId("slc")) 5932 CPolOn = AMDGPU::CPol::SLC; 5933 else if (trySkipId("noslc")) 5934 CPolOff = AMDGPU::CPol::SLC; 5935 else if (trySkipId("dlc")) 5936 CPolOn = AMDGPU::CPol::DLC; 5937 else if (trySkipId("nodlc")) 5938 CPolOff = AMDGPU::CPol::DLC; 5939 else if (trySkipId("scc")) 5940 CPolOn = AMDGPU::CPol::SCC; 5941 else if (trySkipId("noscc")) 5942 CPolOff = AMDGPU::CPol::SCC; 5943 else 5944 return MatchOperand_NoMatch; 5945 5946 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5947 Error(S, "dlc modifier is not supported on this GPU"); 5948 return MatchOperand_ParseFail; 5949 } 5950 5951 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5952 Error(S, "scc modifier is not supported on this GPU"); 5953 return MatchOperand_ParseFail; 5954 } 5955 5956 if (CPolSeen & (CPolOn | CPolOff)) { 5957 Error(S, "duplicate cache policy modifier"); 5958 return MatchOperand_ParseFail; 5959 } 5960 5961 CPolSeen |= (CPolOn | CPolOff); 5962 5963 for (unsigned I = 1; I != Operands.size(); ++I) { 5964 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5965 if (Op.isCPol()) { 5966 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5967 return MatchOperand_Success; 5968 } 5969 } 5970 5971 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5972 AMDGPUOperand::ImmTyCPol)); 5973 5974 return MatchOperand_Success; 5975 } 5976 5977 static void addOptionalImmOperand( 5978 MCInst& Inst, const OperandVector& Operands, 5979 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5980 AMDGPUOperand::ImmTy ImmT, 5981 int64_t Default = 0) { 5982 auto i = OptionalIdx.find(ImmT); 5983 if (i != OptionalIdx.end()) { 5984 unsigned Idx = i->second; 5985 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5986 } else { 5987 Inst.addOperand(MCOperand::createImm(Default)); 5988 } 5989 } 5990 5991 OperandMatchResultTy 5992 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5993 StringRef &Value, 5994 SMLoc &StringLoc) { 5995 if (!trySkipId(Prefix, AsmToken::Colon)) 5996 return MatchOperand_NoMatch; 5997 5998 StringLoc = getLoc(); 5999 return parseId(Value, "expected an identifier") ? MatchOperand_Success 6000 : MatchOperand_ParseFail; 6001 } 6002 6003 //===----------------------------------------------------------------------===// 6004 // MTBUF format 6005 //===----------------------------------------------------------------------===// 6006 6007 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 6008 int64_t MaxVal, 6009 int64_t &Fmt) { 6010 int64_t Val; 6011 SMLoc Loc = getLoc(); 6012 6013 auto Res = parseIntWithPrefix(Pref, Val); 6014 if (Res == MatchOperand_ParseFail) 6015 return false; 6016 if (Res == MatchOperand_NoMatch) 6017 return true; 6018 6019 if (Val < 0 || Val > MaxVal) { 6020 Error(Loc, Twine("out of range ", StringRef(Pref))); 6021 return false; 6022 } 6023 6024 Fmt = Val; 6025 return true; 6026 } 6027 6028 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 6029 // values to live in a joint format operand in the MCInst encoding. 6030 OperandMatchResultTy 6031 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 6032 using namespace llvm::AMDGPU::MTBUFFormat; 6033 6034 int64_t Dfmt = DFMT_UNDEF; 6035 int64_t Nfmt = NFMT_UNDEF; 6036 6037 // dfmt and nfmt can appear in either order, and each is optional. 6038 for (int I = 0; I < 2; ++I) { 6039 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 6040 return MatchOperand_ParseFail; 6041 6042 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 6043 return MatchOperand_ParseFail; 6044 } 6045 // Skip optional comma between dfmt/nfmt 6046 // but guard against 2 commas following each other. 6047 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 6048 !peekToken().is(AsmToken::Comma)) { 6049 trySkipToken(AsmToken::Comma); 6050 } 6051 } 6052 6053 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 6054 return MatchOperand_NoMatch; 6055 6056 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6057 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6058 6059 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6060 return MatchOperand_Success; 6061 } 6062 6063 OperandMatchResultTy 6064 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 6065 using namespace llvm::AMDGPU::MTBUFFormat; 6066 6067 int64_t Fmt = UFMT_UNDEF; 6068 6069 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 6070 return MatchOperand_ParseFail; 6071 6072 if (Fmt == UFMT_UNDEF) 6073 return MatchOperand_NoMatch; 6074 6075 Format = Fmt; 6076 return MatchOperand_Success; 6077 } 6078 6079 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6080 int64_t &Nfmt, 6081 StringRef FormatStr, 6082 SMLoc Loc) { 6083 using namespace llvm::AMDGPU::MTBUFFormat; 6084 int64_t Format; 6085 6086 Format = getDfmt(FormatStr); 6087 if (Format != DFMT_UNDEF) { 6088 Dfmt = Format; 6089 return true; 6090 } 6091 6092 Format = getNfmt(FormatStr, getSTI()); 6093 if (Format != NFMT_UNDEF) { 6094 Nfmt = Format; 6095 return true; 6096 } 6097 6098 Error(Loc, "unsupported format"); 6099 return false; 6100 } 6101 6102 OperandMatchResultTy 6103 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6104 SMLoc FormatLoc, 6105 int64_t &Format) { 6106 using namespace llvm::AMDGPU::MTBUFFormat; 6107 6108 int64_t Dfmt = DFMT_UNDEF; 6109 int64_t Nfmt = NFMT_UNDEF; 6110 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6111 return MatchOperand_ParseFail; 6112 6113 if (trySkipToken(AsmToken::Comma)) { 6114 StringRef Str; 6115 SMLoc Loc = getLoc(); 6116 if (!parseId(Str, "expected a format string") || 6117 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 6118 return MatchOperand_ParseFail; 6119 } 6120 if (Dfmt == DFMT_UNDEF) { 6121 Error(Loc, "duplicate numeric format"); 6122 return MatchOperand_ParseFail; 6123 } else if (Nfmt == NFMT_UNDEF) { 6124 Error(Loc, "duplicate data format"); 6125 return MatchOperand_ParseFail; 6126 } 6127 } 6128 6129 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6130 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6131 6132 if (isGFX10Plus()) { 6133 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6134 if (Ufmt == UFMT_UNDEF) { 6135 Error(FormatLoc, "unsupported format"); 6136 return MatchOperand_ParseFail; 6137 } 6138 Format = Ufmt; 6139 } else { 6140 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6141 } 6142 6143 return MatchOperand_Success; 6144 } 6145 6146 OperandMatchResultTy 6147 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6148 SMLoc Loc, 6149 int64_t &Format) { 6150 using namespace llvm::AMDGPU::MTBUFFormat; 6151 6152 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6153 if (Id == UFMT_UNDEF) 6154 return MatchOperand_NoMatch; 6155 6156 if (!isGFX10Plus()) { 6157 Error(Loc, "unified format is not supported on this GPU"); 6158 return MatchOperand_ParseFail; 6159 } 6160 6161 Format = Id; 6162 return MatchOperand_Success; 6163 } 6164 6165 OperandMatchResultTy 6166 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6167 using namespace llvm::AMDGPU::MTBUFFormat; 6168 SMLoc Loc = getLoc(); 6169 6170 if (!parseExpr(Format)) 6171 return MatchOperand_ParseFail; 6172 if (!isValidFormatEncoding(Format, getSTI())) { 6173 Error(Loc, "out of range format"); 6174 return MatchOperand_ParseFail; 6175 } 6176 6177 return MatchOperand_Success; 6178 } 6179 6180 OperandMatchResultTy 6181 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6182 using namespace llvm::AMDGPU::MTBUFFormat; 6183 6184 if (!trySkipId("format", AsmToken::Colon)) 6185 return MatchOperand_NoMatch; 6186 6187 if (trySkipToken(AsmToken::LBrac)) { 6188 StringRef FormatStr; 6189 SMLoc Loc = getLoc(); 6190 if (!parseId(FormatStr, "expected a format string")) 6191 return MatchOperand_ParseFail; 6192 6193 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6194 if (Res == MatchOperand_NoMatch) 6195 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6196 if (Res != MatchOperand_Success) 6197 return Res; 6198 6199 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6200 return MatchOperand_ParseFail; 6201 6202 return MatchOperand_Success; 6203 } 6204 6205 return parseNumericFormat(Format); 6206 } 6207 6208 OperandMatchResultTy 6209 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6210 using namespace llvm::AMDGPU::MTBUFFormat; 6211 6212 int64_t Format = getDefaultFormatEncoding(getSTI()); 6213 OperandMatchResultTy Res; 6214 SMLoc Loc = getLoc(); 6215 6216 // Parse legacy format syntax. 6217 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6218 if (Res == MatchOperand_ParseFail) 6219 return Res; 6220 6221 bool FormatFound = (Res == MatchOperand_Success); 6222 6223 Operands.push_back( 6224 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6225 6226 if (FormatFound) 6227 trySkipToken(AsmToken::Comma); 6228 6229 if (isToken(AsmToken::EndOfStatement)) { 6230 // We are expecting an soffset operand, 6231 // but let matcher handle the error. 6232 return MatchOperand_Success; 6233 } 6234 6235 // Parse soffset. 6236 Res = parseRegOrImm(Operands); 6237 if (Res != MatchOperand_Success) 6238 return Res; 6239 6240 trySkipToken(AsmToken::Comma); 6241 6242 if (!FormatFound) { 6243 Res = parseSymbolicOrNumericFormat(Format); 6244 if (Res == MatchOperand_ParseFail) 6245 return Res; 6246 if (Res == MatchOperand_Success) { 6247 auto Size = Operands.size(); 6248 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6249 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6250 Op.setImm(Format); 6251 } 6252 return MatchOperand_Success; 6253 } 6254 6255 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6256 Error(getLoc(), "duplicate format"); 6257 return MatchOperand_ParseFail; 6258 } 6259 return MatchOperand_Success; 6260 } 6261 6262 //===----------------------------------------------------------------------===// 6263 // ds 6264 //===----------------------------------------------------------------------===// 6265 6266 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6267 const OperandVector &Operands) { 6268 OptionalImmIndexMap OptionalIdx; 6269 6270 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6271 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6272 6273 // Add the register arguments 6274 if (Op.isReg()) { 6275 Op.addRegOperands(Inst, 1); 6276 continue; 6277 } 6278 6279 // Handle optional arguments 6280 OptionalIdx[Op.getImmTy()] = i; 6281 } 6282 6283 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6284 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6285 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6286 6287 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6288 } 6289 6290 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6291 bool IsGdsHardcoded) { 6292 OptionalImmIndexMap OptionalIdx; 6293 6294 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6295 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6296 6297 // Add the register arguments 6298 if (Op.isReg()) { 6299 Op.addRegOperands(Inst, 1); 6300 continue; 6301 } 6302 6303 if (Op.isToken() && Op.getToken() == "gds") { 6304 IsGdsHardcoded = true; 6305 continue; 6306 } 6307 6308 // Handle optional arguments 6309 OptionalIdx[Op.getImmTy()] = i; 6310 } 6311 6312 AMDGPUOperand::ImmTy OffsetType = 6313 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6314 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6315 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6316 AMDGPUOperand::ImmTyOffset; 6317 6318 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6319 6320 if (!IsGdsHardcoded) { 6321 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6322 } 6323 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6324 } 6325 6326 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6327 OptionalImmIndexMap OptionalIdx; 6328 6329 unsigned OperandIdx[4]; 6330 unsigned EnMask = 0; 6331 int SrcIdx = 0; 6332 6333 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6334 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6335 6336 // Add the register arguments 6337 if (Op.isReg()) { 6338 assert(SrcIdx < 4); 6339 OperandIdx[SrcIdx] = Inst.size(); 6340 Op.addRegOperands(Inst, 1); 6341 ++SrcIdx; 6342 continue; 6343 } 6344 6345 if (Op.isOff()) { 6346 assert(SrcIdx < 4); 6347 OperandIdx[SrcIdx] = Inst.size(); 6348 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6349 ++SrcIdx; 6350 continue; 6351 } 6352 6353 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6354 Op.addImmOperands(Inst, 1); 6355 continue; 6356 } 6357 6358 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 6359 continue; 6360 6361 // Handle optional arguments 6362 OptionalIdx[Op.getImmTy()] = i; 6363 } 6364 6365 assert(SrcIdx == 4); 6366 6367 bool Compr = false; 6368 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6369 Compr = true; 6370 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6371 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6372 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6373 } 6374 6375 for (auto i = 0; i < SrcIdx; ++i) { 6376 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6377 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6378 } 6379 } 6380 6381 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6382 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6383 6384 Inst.addOperand(MCOperand::createImm(EnMask)); 6385 } 6386 6387 //===----------------------------------------------------------------------===// 6388 // s_waitcnt 6389 //===----------------------------------------------------------------------===// 6390 6391 static bool 6392 encodeCnt( 6393 const AMDGPU::IsaVersion ISA, 6394 int64_t &IntVal, 6395 int64_t CntVal, 6396 bool Saturate, 6397 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6398 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6399 { 6400 bool Failed = false; 6401 6402 IntVal = encode(ISA, IntVal, CntVal); 6403 if (CntVal != decode(ISA, IntVal)) { 6404 if (Saturate) { 6405 IntVal = encode(ISA, IntVal, -1); 6406 } else { 6407 Failed = true; 6408 } 6409 } 6410 return Failed; 6411 } 6412 6413 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6414 6415 SMLoc CntLoc = getLoc(); 6416 StringRef CntName = getTokenStr(); 6417 6418 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6419 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6420 return false; 6421 6422 int64_t CntVal; 6423 SMLoc ValLoc = getLoc(); 6424 if (!parseExpr(CntVal)) 6425 return false; 6426 6427 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6428 6429 bool Failed = true; 6430 bool Sat = CntName.endswith("_sat"); 6431 6432 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6433 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6434 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6435 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6436 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6437 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6438 } else { 6439 Error(CntLoc, "invalid counter name " + CntName); 6440 return false; 6441 } 6442 6443 if (Failed) { 6444 Error(ValLoc, "too large value for " + CntName); 6445 return false; 6446 } 6447 6448 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6449 return false; 6450 6451 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6452 if (isToken(AsmToken::EndOfStatement)) { 6453 Error(getLoc(), "expected a counter name"); 6454 return false; 6455 } 6456 } 6457 6458 return true; 6459 } 6460 6461 OperandMatchResultTy 6462 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6463 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6464 int64_t Waitcnt = getWaitcntBitMask(ISA); 6465 SMLoc S = getLoc(); 6466 6467 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6468 while (!isToken(AsmToken::EndOfStatement)) { 6469 if (!parseCnt(Waitcnt)) 6470 return MatchOperand_ParseFail; 6471 } 6472 } else { 6473 if (!parseExpr(Waitcnt)) 6474 return MatchOperand_ParseFail; 6475 } 6476 6477 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6478 return MatchOperand_Success; 6479 } 6480 6481 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6482 SMLoc FieldLoc = getLoc(); 6483 StringRef FieldName = getTokenStr(); 6484 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6485 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6486 return false; 6487 6488 SMLoc ValueLoc = getLoc(); 6489 StringRef ValueName = getTokenStr(); 6490 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6491 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6492 return false; 6493 6494 unsigned Shift; 6495 if (FieldName == "instid0") { 6496 Shift = 0; 6497 } else if (FieldName == "instskip") { 6498 Shift = 4; 6499 } else if (FieldName == "instid1") { 6500 Shift = 7; 6501 } else { 6502 Error(FieldLoc, "invalid field name " + FieldName); 6503 return false; 6504 } 6505 6506 int Value; 6507 if (Shift == 4) { 6508 // Parse values for instskip. 6509 Value = StringSwitch<int>(ValueName) 6510 .Case("SAME", 0) 6511 .Case("NEXT", 1) 6512 .Case("SKIP_1", 2) 6513 .Case("SKIP_2", 3) 6514 .Case("SKIP_3", 4) 6515 .Case("SKIP_4", 5) 6516 .Default(-1); 6517 } else { 6518 // Parse values for instid0 and instid1. 6519 Value = StringSwitch<int>(ValueName) 6520 .Case("NO_DEP", 0) 6521 .Case("VALU_DEP_1", 1) 6522 .Case("VALU_DEP_2", 2) 6523 .Case("VALU_DEP_3", 3) 6524 .Case("VALU_DEP_4", 4) 6525 .Case("TRANS32_DEP_1", 5) 6526 .Case("TRANS32_DEP_2", 6) 6527 .Case("TRANS32_DEP_3", 7) 6528 .Case("FMA_ACCUM_CYCLE_1", 8) 6529 .Case("SALU_CYCLE_1", 9) 6530 .Case("SALU_CYCLE_2", 10) 6531 .Case("SALU_CYCLE_3", 11) 6532 .Default(-1); 6533 } 6534 if (Value < 0) { 6535 Error(ValueLoc, "invalid value name " + ValueName); 6536 return false; 6537 } 6538 6539 Delay |= Value << Shift; 6540 return true; 6541 } 6542 6543 OperandMatchResultTy 6544 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) { 6545 int64_t Delay = 0; 6546 SMLoc S = getLoc(); 6547 6548 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6549 do { 6550 if (!parseDelay(Delay)) 6551 return MatchOperand_ParseFail; 6552 } while (trySkipToken(AsmToken::Pipe)); 6553 } else { 6554 if (!parseExpr(Delay)) 6555 return MatchOperand_ParseFail; 6556 } 6557 6558 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6559 return MatchOperand_Success; 6560 } 6561 6562 bool 6563 AMDGPUOperand::isSWaitCnt() const { 6564 return isImm(); 6565 } 6566 6567 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); } 6568 6569 //===----------------------------------------------------------------------===// 6570 // DepCtr 6571 //===----------------------------------------------------------------------===// 6572 6573 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6574 StringRef DepCtrName) { 6575 switch (ErrorId) { 6576 case OPR_ID_UNKNOWN: 6577 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6578 return; 6579 case OPR_ID_UNSUPPORTED: 6580 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6581 return; 6582 case OPR_ID_DUPLICATE: 6583 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6584 return; 6585 case OPR_VAL_INVALID: 6586 Error(Loc, Twine("invalid value for ", DepCtrName)); 6587 return; 6588 default: 6589 assert(false); 6590 } 6591 } 6592 6593 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6594 6595 using namespace llvm::AMDGPU::DepCtr; 6596 6597 SMLoc DepCtrLoc = getLoc(); 6598 StringRef DepCtrName = getTokenStr(); 6599 6600 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6601 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6602 return false; 6603 6604 int64_t ExprVal; 6605 if (!parseExpr(ExprVal)) 6606 return false; 6607 6608 unsigned PrevOprMask = UsedOprMask; 6609 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6610 6611 if (CntVal < 0) { 6612 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6613 return false; 6614 } 6615 6616 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6617 return false; 6618 6619 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6620 if (isToken(AsmToken::EndOfStatement)) { 6621 Error(getLoc(), "expected a counter name"); 6622 return false; 6623 } 6624 } 6625 6626 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6627 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6628 return true; 6629 } 6630 6631 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6632 using namespace llvm::AMDGPU::DepCtr; 6633 6634 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6635 SMLoc Loc = getLoc(); 6636 6637 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6638 unsigned UsedOprMask = 0; 6639 while (!isToken(AsmToken::EndOfStatement)) { 6640 if (!parseDepCtr(DepCtr, UsedOprMask)) 6641 return MatchOperand_ParseFail; 6642 } 6643 } else { 6644 if (!parseExpr(DepCtr)) 6645 return MatchOperand_ParseFail; 6646 } 6647 6648 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6649 return MatchOperand_Success; 6650 } 6651 6652 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6653 6654 //===----------------------------------------------------------------------===// 6655 // hwreg 6656 //===----------------------------------------------------------------------===// 6657 6658 bool 6659 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6660 OperandInfoTy &Offset, 6661 OperandInfoTy &Width) { 6662 using namespace llvm::AMDGPU::Hwreg; 6663 6664 // The register may be specified by name or using a numeric code 6665 HwReg.Loc = getLoc(); 6666 if (isToken(AsmToken::Identifier) && 6667 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6668 HwReg.IsSymbolic = true; 6669 lex(); // skip register name 6670 } else if (!parseExpr(HwReg.Id, "a register name")) { 6671 return false; 6672 } 6673 6674 if (trySkipToken(AsmToken::RParen)) 6675 return true; 6676 6677 // parse optional params 6678 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6679 return false; 6680 6681 Offset.Loc = getLoc(); 6682 if (!parseExpr(Offset.Id)) 6683 return false; 6684 6685 if (!skipToken(AsmToken::Comma, "expected a comma")) 6686 return false; 6687 6688 Width.Loc = getLoc(); 6689 return parseExpr(Width.Id) && 6690 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6691 } 6692 6693 bool 6694 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6695 const OperandInfoTy &Offset, 6696 const OperandInfoTy &Width) { 6697 6698 using namespace llvm::AMDGPU::Hwreg; 6699 6700 if (HwReg.IsSymbolic) { 6701 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6702 Error(HwReg.Loc, 6703 "specified hardware register is not supported on this GPU"); 6704 return false; 6705 } 6706 } else { 6707 if (!isValidHwreg(HwReg.Id)) { 6708 Error(HwReg.Loc, 6709 "invalid code of hardware register: only 6-bit values are legal"); 6710 return false; 6711 } 6712 } 6713 if (!isValidHwregOffset(Offset.Id)) { 6714 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6715 return false; 6716 } 6717 if (!isValidHwregWidth(Width.Id)) { 6718 Error(Width.Loc, 6719 "invalid bitfield width: only values from 1 to 32 are legal"); 6720 return false; 6721 } 6722 return true; 6723 } 6724 6725 OperandMatchResultTy 6726 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6727 using namespace llvm::AMDGPU::Hwreg; 6728 6729 int64_t ImmVal = 0; 6730 SMLoc Loc = getLoc(); 6731 6732 if (trySkipId("hwreg", AsmToken::LParen)) { 6733 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6734 OperandInfoTy Offset(OFFSET_DEFAULT_); 6735 OperandInfoTy Width(WIDTH_DEFAULT_); 6736 if (parseHwregBody(HwReg, Offset, Width) && 6737 validateHwreg(HwReg, Offset, Width)) { 6738 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6739 } else { 6740 return MatchOperand_ParseFail; 6741 } 6742 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6743 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6744 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6745 return MatchOperand_ParseFail; 6746 } 6747 } else { 6748 return MatchOperand_ParseFail; 6749 } 6750 6751 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6752 return MatchOperand_Success; 6753 } 6754 6755 bool AMDGPUOperand::isHwreg() const { 6756 return isImmTy(ImmTyHwreg); 6757 } 6758 6759 //===----------------------------------------------------------------------===// 6760 // sendmsg 6761 //===----------------------------------------------------------------------===// 6762 6763 bool 6764 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6765 OperandInfoTy &Op, 6766 OperandInfoTy &Stream) { 6767 using namespace llvm::AMDGPU::SendMsg; 6768 6769 Msg.Loc = getLoc(); 6770 if (isToken(AsmToken::Identifier) && 6771 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6772 Msg.IsSymbolic = true; 6773 lex(); // skip message name 6774 } else if (!parseExpr(Msg.Id, "a message name")) { 6775 return false; 6776 } 6777 6778 if (trySkipToken(AsmToken::Comma)) { 6779 Op.IsDefined = true; 6780 Op.Loc = getLoc(); 6781 if (isToken(AsmToken::Identifier) && 6782 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6783 lex(); // skip operation name 6784 } else if (!parseExpr(Op.Id, "an operation name")) { 6785 return false; 6786 } 6787 6788 if (trySkipToken(AsmToken::Comma)) { 6789 Stream.IsDefined = true; 6790 Stream.Loc = getLoc(); 6791 if (!parseExpr(Stream.Id)) 6792 return false; 6793 } 6794 } 6795 6796 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6797 } 6798 6799 bool 6800 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6801 const OperandInfoTy &Op, 6802 const OperandInfoTy &Stream) { 6803 using namespace llvm::AMDGPU::SendMsg; 6804 6805 // Validation strictness depends on whether message is specified 6806 // in a symbolic or in a numeric form. In the latter case 6807 // only encoding possibility is checked. 6808 bool Strict = Msg.IsSymbolic; 6809 6810 if (Strict) { 6811 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6812 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6813 return false; 6814 } 6815 } else { 6816 if (!isValidMsgId(Msg.Id, getSTI())) { 6817 Error(Msg.Loc, "invalid message id"); 6818 return false; 6819 } 6820 } 6821 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 6822 if (Op.IsDefined) { 6823 Error(Op.Loc, "message does not support operations"); 6824 } else { 6825 Error(Msg.Loc, "missing message operation"); 6826 } 6827 return false; 6828 } 6829 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6830 Error(Op.Loc, "invalid operation id"); 6831 return false; 6832 } 6833 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 6834 Stream.IsDefined) { 6835 Error(Stream.Loc, "message operation does not support streams"); 6836 return false; 6837 } 6838 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6839 Error(Stream.Loc, "invalid message stream id"); 6840 return false; 6841 } 6842 return true; 6843 } 6844 6845 OperandMatchResultTy 6846 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6847 using namespace llvm::AMDGPU::SendMsg; 6848 6849 int64_t ImmVal = 0; 6850 SMLoc Loc = getLoc(); 6851 6852 if (trySkipId("sendmsg", AsmToken::LParen)) { 6853 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6854 OperandInfoTy Op(OP_NONE_); 6855 OperandInfoTy Stream(STREAM_ID_NONE_); 6856 if (parseSendMsgBody(Msg, Op, Stream) && 6857 validateSendMsg(Msg, Op, Stream)) { 6858 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6859 } else { 6860 return MatchOperand_ParseFail; 6861 } 6862 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6863 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6864 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6865 return MatchOperand_ParseFail; 6866 } 6867 } else { 6868 return MatchOperand_ParseFail; 6869 } 6870 6871 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6872 return MatchOperand_Success; 6873 } 6874 6875 bool AMDGPUOperand::isSendMsg() const { 6876 return isImmTy(ImmTySendMsg); 6877 } 6878 6879 //===----------------------------------------------------------------------===// 6880 // v_interp 6881 //===----------------------------------------------------------------------===// 6882 6883 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6884 StringRef Str; 6885 SMLoc S = getLoc(); 6886 6887 if (!parseId(Str)) 6888 return MatchOperand_NoMatch; 6889 6890 int Slot = StringSwitch<int>(Str) 6891 .Case("p10", 0) 6892 .Case("p20", 1) 6893 .Case("p0", 2) 6894 .Default(-1); 6895 6896 if (Slot == -1) { 6897 Error(S, "invalid interpolation slot"); 6898 return MatchOperand_ParseFail; 6899 } 6900 6901 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6902 AMDGPUOperand::ImmTyInterpSlot)); 6903 return MatchOperand_Success; 6904 } 6905 6906 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6907 StringRef Str; 6908 SMLoc S = getLoc(); 6909 6910 if (!parseId(Str)) 6911 return MatchOperand_NoMatch; 6912 6913 if (!Str.startswith("attr")) { 6914 Error(S, "invalid interpolation attribute"); 6915 return MatchOperand_ParseFail; 6916 } 6917 6918 StringRef Chan = Str.take_back(2); 6919 int AttrChan = StringSwitch<int>(Chan) 6920 .Case(".x", 0) 6921 .Case(".y", 1) 6922 .Case(".z", 2) 6923 .Case(".w", 3) 6924 .Default(-1); 6925 if (AttrChan == -1) { 6926 Error(S, "invalid or missing interpolation attribute channel"); 6927 return MatchOperand_ParseFail; 6928 } 6929 6930 Str = Str.drop_back(2).drop_front(4); 6931 6932 uint8_t Attr; 6933 if (Str.getAsInteger(10, Attr)) { 6934 Error(S, "invalid or missing interpolation attribute number"); 6935 return MatchOperand_ParseFail; 6936 } 6937 6938 if (Attr > 63) { 6939 Error(S, "out of bounds interpolation attribute number"); 6940 return MatchOperand_ParseFail; 6941 } 6942 6943 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6944 6945 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6946 AMDGPUOperand::ImmTyInterpAttr)); 6947 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6948 AMDGPUOperand::ImmTyAttrChan)); 6949 return MatchOperand_Success; 6950 } 6951 6952 //===----------------------------------------------------------------------===// 6953 // exp 6954 //===----------------------------------------------------------------------===// 6955 6956 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6957 using namespace llvm::AMDGPU::Exp; 6958 6959 StringRef Str; 6960 SMLoc S = getLoc(); 6961 6962 if (!parseId(Str)) 6963 return MatchOperand_NoMatch; 6964 6965 unsigned Id = getTgtId(Str); 6966 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6967 Error(S, (Id == ET_INVALID) ? 6968 "invalid exp target" : 6969 "exp target is not supported on this GPU"); 6970 return MatchOperand_ParseFail; 6971 } 6972 6973 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6974 AMDGPUOperand::ImmTyExpTgt)); 6975 return MatchOperand_Success; 6976 } 6977 6978 //===----------------------------------------------------------------------===// 6979 // parser helpers 6980 //===----------------------------------------------------------------------===// 6981 6982 bool 6983 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6984 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6985 } 6986 6987 bool 6988 AMDGPUAsmParser::isId(const StringRef Id) const { 6989 return isId(getToken(), Id); 6990 } 6991 6992 bool 6993 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6994 return getTokenKind() == Kind; 6995 } 6996 6997 bool 6998 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6999 if (isId(Id)) { 7000 lex(); 7001 return true; 7002 } 7003 return false; 7004 } 7005 7006 bool 7007 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 7008 if (isToken(AsmToken::Identifier)) { 7009 StringRef Tok = getTokenStr(); 7010 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 7011 lex(); 7012 return true; 7013 } 7014 } 7015 return false; 7016 } 7017 7018 bool 7019 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 7020 if (isId(Id) && peekToken().is(Kind)) { 7021 lex(); 7022 lex(); 7023 return true; 7024 } 7025 return false; 7026 } 7027 7028 bool 7029 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 7030 if (isToken(Kind)) { 7031 lex(); 7032 return true; 7033 } 7034 return false; 7035 } 7036 7037 bool 7038 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 7039 const StringRef ErrMsg) { 7040 if (!trySkipToken(Kind)) { 7041 Error(getLoc(), ErrMsg); 7042 return false; 7043 } 7044 return true; 7045 } 7046 7047 bool 7048 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 7049 SMLoc S = getLoc(); 7050 7051 const MCExpr *Expr; 7052 if (Parser.parseExpression(Expr)) 7053 return false; 7054 7055 if (Expr->evaluateAsAbsolute(Imm)) 7056 return true; 7057 7058 if (Expected.empty()) { 7059 Error(S, "expected absolute expression"); 7060 } else { 7061 Error(S, Twine("expected ", Expected) + 7062 Twine(" or an absolute expression")); 7063 } 7064 return false; 7065 } 7066 7067 bool 7068 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 7069 SMLoc S = getLoc(); 7070 7071 const MCExpr *Expr; 7072 if (Parser.parseExpression(Expr)) 7073 return false; 7074 7075 int64_t IntVal; 7076 if (Expr->evaluateAsAbsolute(IntVal)) { 7077 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7078 } else { 7079 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7080 } 7081 return true; 7082 } 7083 7084 bool 7085 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7086 if (isToken(AsmToken::String)) { 7087 Val = getToken().getStringContents(); 7088 lex(); 7089 return true; 7090 } else { 7091 Error(getLoc(), ErrMsg); 7092 return false; 7093 } 7094 } 7095 7096 bool 7097 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7098 if (isToken(AsmToken::Identifier)) { 7099 Val = getTokenStr(); 7100 lex(); 7101 return true; 7102 } else { 7103 if (!ErrMsg.empty()) 7104 Error(getLoc(), ErrMsg); 7105 return false; 7106 } 7107 } 7108 7109 AsmToken 7110 AMDGPUAsmParser::getToken() const { 7111 return Parser.getTok(); 7112 } 7113 7114 AsmToken 7115 AMDGPUAsmParser::peekToken() { 7116 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 7117 } 7118 7119 void 7120 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7121 auto TokCount = getLexer().peekTokens(Tokens); 7122 7123 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7124 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7125 } 7126 7127 AsmToken::TokenKind 7128 AMDGPUAsmParser::getTokenKind() const { 7129 return getLexer().getKind(); 7130 } 7131 7132 SMLoc 7133 AMDGPUAsmParser::getLoc() const { 7134 return getToken().getLoc(); 7135 } 7136 7137 StringRef 7138 AMDGPUAsmParser::getTokenStr() const { 7139 return getToken().getString(); 7140 } 7141 7142 void 7143 AMDGPUAsmParser::lex() { 7144 Parser.Lex(); 7145 } 7146 7147 SMLoc 7148 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7149 const OperandVector &Operands) const { 7150 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7151 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7152 if (Test(Op)) 7153 return Op.getStartLoc(); 7154 } 7155 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7156 } 7157 7158 SMLoc 7159 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7160 const OperandVector &Operands) const { 7161 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7162 return getOperandLoc(Test, Operands); 7163 } 7164 7165 SMLoc 7166 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7167 const OperandVector &Operands) const { 7168 auto Test = [=](const AMDGPUOperand& Op) { 7169 return Op.isRegKind() && Op.getReg() == Reg; 7170 }; 7171 return getOperandLoc(Test, Operands); 7172 } 7173 7174 SMLoc 7175 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 7176 auto Test = [](const AMDGPUOperand& Op) { 7177 return Op.IsImmKindLiteral() || Op.isExpr(); 7178 }; 7179 return getOperandLoc(Test, Operands); 7180 } 7181 7182 SMLoc 7183 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7184 auto Test = [](const AMDGPUOperand& Op) { 7185 return Op.isImmKindConst(); 7186 }; 7187 return getOperandLoc(Test, Operands); 7188 } 7189 7190 //===----------------------------------------------------------------------===// 7191 // swizzle 7192 //===----------------------------------------------------------------------===// 7193 7194 LLVM_READNONE 7195 static unsigned 7196 encodeBitmaskPerm(const unsigned AndMask, 7197 const unsigned OrMask, 7198 const unsigned XorMask) { 7199 using namespace llvm::AMDGPU::Swizzle; 7200 7201 return BITMASK_PERM_ENC | 7202 (AndMask << BITMASK_AND_SHIFT) | 7203 (OrMask << BITMASK_OR_SHIFT) | 7204 (XorMask << BITMASK_XOR_SHIFT); 7205 } 7206 7207 bool 7208 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7209 const unsigned MinVal, 7210 const unsigned MaxVal, 7211 const StringRef ErrMsg, 7212 SMLoc &Loc) { 7213 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7214 return false; 7215 } 7216 Loc = getLoc(); 7217 if (!parseExpr(Op)) { 7218 return false; 7219 } 7220 if (Op < MinVal || Op > MaxVal) { 7221 Error(Loc, ErrMsg); 7222 return false; 7223 } 7224 7225 return true; 7226 } 7227 7228 bool 7229 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7230 const unsigned MinVal, 7231 const unsigned MaxVal, 7232 const StringRef ErrMsg) { 7233 SMLoc Loc; 7234 for (unsigned i = 0; i < OpNum; ++i) { 7235 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7236 return false; 7237 } 7238 7239 return true; 7240 } 7241 7242 bool 7243 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7244 using namespace llvm::AMDGPU::Swizzle; 7245 7246 int64_t Lane[LANE_NUM]; 7247 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7248 "expected a 2-bit lane id")) { 7249 Imm = QUAD_PERM_ENC; 7250 for (unsigned I = 0; I < LANE_NUM; ++I) { 7251 Imm |= Lane[I] << (LANE_SHIFT * I); 7252 } 7253 return true; 7254 } 7255 return false; 7256 } 7257 7258 bool 7259 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7260 using namespace llvm::AMDGPU::Swizzle; 7261 7262 SMLoc Loc; 7263 int64_t GroupSize; 7264 int64_t LaneIdx; 7265 7266 if (!parseSwizzleOperand(GroupSize, 7267 2, 32, 7268 "group size must be in the interval [2,32]", 7269 Loc)) { 7270 return false; 7271 } 7272 if (!isPowerOf2_64(GroupSize)) { 7273 Error(Loc, "group size must be a power of two"); 7274 return false; 7275 } 7276 if (parseSwizzleOperand(LaneIdx, 7277 0, GroupSize - 1, 7278 "lane id must be in the interval [0,group size - 1]", 7279 Loc)) { 7280 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7281 return true; 7282 } 7283 return false; 7284 } 7285 7286 bool 7287 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7288 using namespace llvm::AMDGPU::Swizzle; 7289 7290 SMLoc Loc; 7291 int64_t GroupSize; 7292 7293 if (!parseSwizzleOperand(GroupSize, 7294 2, 32, 7295 "group size must be in the interval [2,32]", 7296 Loc)) { 7297 return false; 7298 } 7299 if (!isPowerOf2_64(GroupSize)) { 7300 Error(Loc, "group size must be a power of two"); 7301 return false; 7302 } 7303 7304 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7305 return true; 7306 } 7307 7308 bool 7309 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7310 using namespace llvm::AMDGPU::Swizzle; 7311 7312 SMLoc Loc; 7313 int64_t GroupSize; 7314 7315 if (!parseSwizzleOperand(GroupSize, 7316 1, 16, 7317 "group size must be in the interval [1,16]", 7318 Loc)) { 7319 return false; 7320 } 7321 if (!isPowerOf2_64(GroupSize)) { 7322 Error(Loc, "group size must be a power of two"); 7323 return false; 7324 } 7325 7326 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7327 return true; 7328 } 7329 7330 bool 7331 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7332 using namespace llvm::AMDGPU::Swizzle; 7333 7334 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7335 return false; 7336 } 7337 7338 StringRef Ctl; 7339 SMLoc StrLoc = getLoc(); 7340 if (!parseString(Ctl)) { 7341 return false; 7342 } 7343 if (Ctl.size() != BITMASK_WIDTH) { 7344 Error(StrLoc, "expected a 5-character mask"); 7345 return false; 7346 } 7347 7348 unsigned AndMask = 0; 7349 unsigned OrMask = 0; 7350 unsigned XorMask = 0; 7351 7352 for (size_t i = 0; i < Ctl.size(); ++i) { 7353 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7354 switch(Ctl[i]) { 7355 default: 7356 Error(StrLoc, "invalid mask"); 7357 return false; 7358 case '0': 7359 break; 7360 case '1': 7361 OrMask |= Mask; 7362 break; 7363 case 'p': 7364 AndMask |= Mask; 7365 break; 7366 case 'i': 7367 AndMask |= Mask; 7368 XorMask |= Mask; 7369 break; 7370 } 7371 } 7372 7373 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7374 return true; 7375 } 7376 7377 bool 7378 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7379 7380 SMLoc OffsetLoc = getLoc(); 7381 7382 if (!parseExpr(Imm, "a swizzle macro")) { 7383 return false; 7384 } 7385 if (!isUInt<16>(Imm)) { 7386 Error(OffsetLoc, "expected a 16-bit offset"); 7387 return false; 7388 } 7389 return true; 7390 } 7391 7392 bool 7393 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7394 using namespace llvm::AMDGPU::Swizzle; 7395 7396 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7397 7398 SMLoc ModeLoc = getLoc(); 7399 bool Ok = false; 7400 7401 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7402 Ok = parseSwizzleQuadPerm(Imm); 7403 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7404 Ok = parseSwizzleBitmaskPerm(Imm); 7405 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7406 Ok = parseSwizzleBroadcast(Imm); 7407 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7408 Ok = parseSwizzleSwap(Imm); 7409 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7410 Ok = parseSwizzleReverse(Imm); 7411 } else { 7412 Error(ModeLoc, "expected a swizzle mode"); 7413 } 7414 7415 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7416 } 7417 7418 return false; 7419 } 7420 7421 OperandMatchResultTy 7422 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7423 SMLoc S = getLoc(); 7424 int64_t Imm = 0; 7425 7426 if (trySkipId("offset")) { 7427 7428 bool Ok = false; 7429 if (skipToken(AsmToken::Colon, "expected a colon")) { 7430 if (trySkipId("swizzle")) { 7431 Ok = parseSwizzleMacro(Imm); 7432 } else { 7433 Ok = parseSwizzleOffset(Imm); 7434 } 7435 } 7436 7437 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7438 7439 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7440 } else { 7441 // Swizzle "offset" operand is optional. 7442 // If it is omitted, try parsing other optional operands. 7443 return parseOptionalOpr(Operands); 7444 } 7445 } 7446 7447 bool 7448 AMDGPUOperand::isSwizzle() const { 7449 return isImmTy(ImmTySwizzle); 7450 } 7451 7452 //===----------------------------------------------------------------------===// 7453 // VGPR Index Mode 7454 //===----------------------------------------------------------------------===// 7455 7456 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7457 7458 using namespace llvm::AMDGPU::VGPRIndexMode; 7459 7460 if (trySkipToken(AsmToken::RParen)) { 7461 return OFF; 7462 } 7463 7464 int64_t Imm = 0; 7465 7466 while (true) { 7467 unsigned Mode = 0; 7468 SMLoc S = getLoc(); 7469 7470 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7471 if (trySkipId(IdSymbolic[ModeId])) { 7472 Mode = 1 << ModeId; 7473 break; 7474 } 7475 } 7476 7477 if (Mode == 0) { 7478 Error(S, (Imm == 0)? 7479 "expected a VGPR index mode or a closing parenthesis" : 7480 "expected a VGPR index mode"); 7481 return UNDEF; 7482 } 7483 7484 if (Imm & Mode) { 7485 Error(S, "duplicate VGPR index mode"); 7486 return UNDEF; 7487 } 7488 Imm |= Mode; 7489 7490 if (trySkipToken(AsmToken::RParen)) 7491 break; 7492 if (!skipToken(AsmToken::Comma, 7493 "expected a comma or a closing parenthesis")) 7494 return UNDEF; 7495 } 7496 7497 return Imm; 7498 } 7499 7500 OperandMatchResultTy 7501 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7502 7503 using namespace llvm::AMDGPU::VGPRIndexMode; 7504 7505 int64_t Imm = 0; 7506 SMLoc S = getLoc(); 7507 7508 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7509 Imm = parseGPRIdxMacro(); 7510 if (Imm == UNDEF) 7511 return MatchOperand_ParseFail; 7512 } else { 7513 if (getParser().parseAbsoluteExpression(Imm)) 7514 return MatchOperand_ParseFail; 7515 if (Imm < 0 || !isUInt<4>(Imm)) { 7516 Error(S, "invalid immediate: only 4-bit values are legal"); 7517 return MatchOperand_ParseFail; 7518 } 7519 } 7520 7521 Operands.push_back( 7522 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7523 return MatchOperand_Success; 7524 } 7525 7526 bool AMDGPUOperand::isGPRIdxMode() const { 7527 return isImmTy(ImmTyGprIdxMode); 7528 } 7529 7530 //===----------------------------------------------------------------------===// 7531 // sopp branch targets 7532 //===----------------------------------------------------------------------===// 7533 7534 OperandMatchResultTy 7535 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7536 7537 // Make sure we are not parsing something 7538 // that looks like a label or an expression but is not. 7539 // This will improve error messages. 7540 if (isRegister() || isModifier()) 7541 return MatchOperand_NoMatch; 7542 7543 if (!parseExpr(Operands)) 7544 return MatchOperand_ParseFail; 7545 7546 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7547 assert(Opr.isImm() || Opr.isExpr()); 7548 SMLoc Loc = Opr.getStartLoc(); 7549 7550 // Currently we do not support arbitrary expressions as branch targets. 7551 // Only labels and absolute expressions are accepted. 7552 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7553 Error(Loc, "expected an absolute expression or a label"); 7554 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7555 Error(Loc, "expected a 16-bit signed jump offset"); 7556 } 7557 7558 return MatchOperand_Success; 7559 } 7560 7561 //===----------------------------------------------------------------------===// 7562 // Boolean holding registers 7563 //===----------------------------------------------------------------------===// 7564 7565 OperandMatchResultTy 7566 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7567 return parseReg(Operands); 7568 } 7569 7570 //===----------------------------------------------------------------------===// 7571 // mubuf 7572 //===----------------------------------------------------------------------===// 7573 7574 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7575 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7576 } 7577 7578 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7579 const OperandVector &Operands, 7580 bool IsAtomic, 7581 bool IsLds) { 7582 OptionalImmIndexMap OptionalIdx; 7583 unsigned FirstOperandIdx = 1; 7584 bool IsAtomicReturn = false; 7585 7586 if (IsAtomic) { 7587 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7588 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7589 if (!Op.isCPol()) 7590 continue; 7591 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7592 break; 7593 } 7594 7595 if (!IsAtomicReturn) { 7596 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7597 if (NewOpc != -1) 7598 Inst.setOpcode(NewOpc); 7599 } 7600 7601 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7602 SIInstrFlags::IsAtomicRet; 7603 } 7604 7605 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7606 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7607 7608 // Add the register arguments 7609 if (Op.isReg()) { 7610 Op.addRegOperands(Inst, 1); 7611 // Insert a tied src for atomic return dst. 7612 // This cannot be postponed as subsequent calls to 7613 // addImmOperands rely on correct number of MC operands. 7614 if (IsAtomicReturn && i == FirstOperandIdx) 7615 Op.addRegOperands(Inst, 1); 7616 continue; 7617 } 7618 7619 // Handle the case where soffset is an immediate 7620 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7621 Op.addImmOperands(Inst, 1); 7622 continue; 7623 } 7624 7625 // Handle tokens like 'offen' which are sometimes hard-coded into the 7626 // asm string. There are no MCInst operands for these. 7627 if (Op.isToken()) { 7628 continue; 7629 } 7630 assert(Op.isImm()); 7631 7632 // Handle optional arguments 7633 OptionalIdx[Op.getImmTy()] = i; 7634 } 7635 7636 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7637 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7638 7639 if (!IsLds) { // tfe is not legal with lds opcodes 7640 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7641 } 7642 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7643 } 7644 7645 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7646 OptionalImmIndexMap OptionalIdx; 7647 7648 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7649 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7650 7651 // Add the register arguments 7652 if (Op.isReg()) { 7653 Op.addRegOperands(Inst, 1); 7654 continue; 7655 } 7656 7657 // Handle the case where soffset is an immediate 7658 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7659 Op.addImmOperands(Inst, 1); 7660 continue; 7661 } 7662 7663 // Handle tokens like 'offen' which are sometimes hard-coded into the 7664 // asm string. There are no MCInst operands for these. 7665 if (Op.isToken()) { 7666 continue; 7667 } 7668 assert(Op.isImm()); 7669 7670 // Handle optional arguments 7671 OptionalIdx[Op.getImmTy()] = i; 7672 } 7673 7674 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7675 AMDGPUOperand::ImmTyOffset); 7676 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7677 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7678 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7679 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7680 } 7681 7682 //===----------------------------------------------------------------------===// 7683 // mimg 7684 //===----------------------------------------------------------------------===// 7685 7686 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7687 bool IsAtomic) { 7688 unsigned I = 1; 7689 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7690 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7691 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7692 } 7693 7694 if (IsAtomic) { 7695 // Add src, same as dst 7696 assert(Desc.getNumDefs() == 1); 7697 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7698 } 7699 7700 OptionalImmIndexMap OptionalIdx; 7701 7702 for (unsigned E = Operands.size(); I != E; ++I) { 7703 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7704 7705 // Add the register arguments 7706 if (Op.isReg()) { 7707 Op.addRegOperands(Inst, 1); 7708 } else if (Op.isImmModifier()) { 7709 OptionalIdx[Op.getImmTy()] = I; 7710 } else if (!Op.isToken()) { 7711 llvm_unreachable("unexpected operand type"); 7712 } 7713 } 7714 7715 bool IsGFX10Plus = isGFX10Plus(); 7716 7717 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7718 if (IsGFX10Plus) 7719 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7720 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7721 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7722 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7723 if (IsGFX10Plus) 7724 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7725 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7726 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7727 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7728 if (!IsGFX10Plus) 7729 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7730 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7731 } 7732 7733 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7734 cvtMIMG(Inst, Operands, true); 7735 } 7736 7737 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7738 OptionalImmIndexMap OptionalIdx; 7739 bool IsAtomicReturn = false; 7740 7741 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7742 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7743 if (!Op.isCPol()) 7744 continue; 7745 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7746 break; 7747 } 7748 7749 if (!IsAtomicReturn) { 7750 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7751 if (NewOpc != -1) 7752 Inst.setOpcode(NewOpc); 7753 } 7754 7755 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7756 SIInstrFlags::IsAtomicRet; 7757 7758 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7759 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7760 7761 // Add the register arguments 7762 if (Op.isReg()) { 7763 Op.addRegOperands(Inst, 1); 7764 if (IsAtomicReturn && i == 1) 7765 Op.addRegOperands(Inst, 1); 7766 continue; 7767 } 7768 7769 // Handle the case where soffset is an immediate 7770 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7771 Op.addImmOperands(Inst, 1); 7772 continue; 7773 } 7774 7775 // Handle tokens like 'offen' which are sometimes hard-coded into the 7776 // asm string. There are no MCInst operands for these. 7777 if (Op.isToken()) { 7778 continue; 7779 } 7780 assert(Op.isImm()); 7781 7782 // Handle optional arguments 7783 OptionalIdx[Op.getImmTy()] = i; 7784 } 7785 7786 if ((int)Inst.getNumOperands() <= 7787 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7788 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7789 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7790 } 7791 7792 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7793 const OperandVector &Operands) { 7794 for (unsigned I = 1; I < Operands.size(); ++I) { 7795 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7796 if (Operand.isReg()) 7797 Operand.addRegOperands(Inst, 1); 7798 } 7799 7800 Inst.addOperand(MCOperand::createImm(1)); // a16 7801 } 7802 7803 //===----------------------------------------------------------------------===// 7804 // smrd 7805 //===----------------------------------------------------------------------===// 7806 7807 bool AMDGPUOperand::isSMRDOffset8() const { 7808 return isImm() && isUInt<8>(getImm()); 7809 } 7810 7811 bool AMDGPUOperand::isSMEMOffset() const { 7812 return isImmTy(ImmTyNone) || 7813 isImmTy(ImmTyOffset); // Offset range is checked later by validator. 7814 } 7815 7816 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7817 // 32-bit literals are only supported on CI and we only want to use them 7818 // when the offset is > 8-bits. 7819 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7820 } 7821 7822 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7823 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7824 } 7825 7826 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7827 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7828 } 7829 7830 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7831 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7832 } 7833 7834 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7835 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7836 } 7837 7838 //===----------------------------------------------------------------------===// 7839 // vop3 7840 //===----------------------------------------------------------------------===// 7841 7842 static bool ConvertOmodMul(int64_t &Mul) { 7843 if (Mul != 1 && Mul != 2 && Mul != 4) 7844 return false; 7845 7846 Mul >>= 1; 7847 return true; 7848 } 7849 7850 static bool ConvertOmodDiv(int64_t &Div) { 7851 if (Div == 1) { 7852 Div = 0; 7853 return true; 7854 } 7855 7856 if (Div == 2) { 7857 Div = 3; 7858 return true; 7859 } 7860 7861 return false; 7862 } 7863 7864 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7865 // This is intentional and ensures compatibility with sp3. 7866 // See bug 35397 for details. 7867 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7868 if (BoundCtrl == 0 || BoundCtrl == 1) { 7869 BoundCtrl = 1; 7870 return true; 7871 } 7872 return false; 7873 } 7874 7875 // Note: the order in this table matches the order of operands in AsmString. 7876 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7877 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7878 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7879 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7880 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7881 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7882 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7883 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7884 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7885 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7886 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7887 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7888 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7889 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7890 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7891 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7892 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7893 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7894 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7895 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7896 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7897 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7898 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7899 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7900 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7901 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7902 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7903 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7904 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7905 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7906 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7907 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7908 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7909 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7910 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7911 {"dpp8", AMDGPUOperand::ImmTyDPP8, false, nullptr}, 7912 {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr}, 7913 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7914 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7915 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7916 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7917 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7918 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7919 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}, 7920 {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr}, 7921 {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr} 7922 }; 7923 7924 void AMDGPUAsmParser::onBeginOfFile() { 7925 if (!getParser().getStreamer().getTargetStreamer() || 7926 getSTI().getTargetTriple().getArch() == Triple::r600) 7927 return; 7928 7929 if (!getTargetStreamer().getTargetID()) 7930 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7931 7932 if (isHsaAbiVersion3AndAbove(&getSTI())) 7933 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7934 } 7935 7936 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7937 7938 OperandMatchResultTy res = parseOptionalOpr(Operands); 7939 7940 // This is a hack to enable hardcoded mandatory operands which follow 7941 // optional operands. 7942 // 7943 // Current design assumes that all operands after the first optional operand 7944 // are also optional. However implementation of some instructions violates 7945 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7946 // 7947 // To alleviate this problem, we have to (implicitly) parse extra operands 7948 // to make sure autogenerated parser of custom operands never hit hardcoded 7949 // mandatory operands. 7950 7951 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7952 if (res != MatchOperand_Success || 7953 isToken(AsmToken::EndOfStatement)) 7954 break; 7955 7956 trySkipToken(AsmToken::Comma); 7957 res = parseOptionalOpr(Operands); 7958 } 7959 7960 return res; 7961 } 7962 7963 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7964 OperandMatchResultTy res; 7965 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7966 // try to parse any optional operand here 7967 if (Op.IsBit) { 7968 res = parseNamedBit(Op.Name, Operands, Op.Type); 7969 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7970 res = parseOModOperand(Operands); 7971 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7972 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7973 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7974 res = parseSDWASel(Operands, Op.Name, Op.Type); 7975 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7976 res = parseSDWADstUnused(Operands); 7977 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7978 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7979 Op.Type == AMDGPUOperand::ImmTyNegLo || 7980 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7981 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7982 Op.ConvertResult); 7983 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7984 res = parseDim(Operands); 7985 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7986 res = parseCPol(Operands); 7987 } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) { 7988 res = parseDPP8(Operands); 7989 } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) { 7990 res = parseDPPCtrl(Operands); 7991 } else { 7992 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7993 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 7994 res = parseOperandArrayWithPrefix("neg", Operands, 7995 AMDGPUOperand::ImmTyBLGP, 7996 nullptr); 7997 } 7998 } 7999 if (res != MatchOperand_NoMatch) { 8000 return res; 8001 } 8002 } 8003 return MatchOperand_NoMatch; 8004 } 8005 8006 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 8007 StringRef Name = getTokenStr(); 8008 if (Name == "mul") { 8009 return parseIntWithPrefix("mul", Operands, 8010 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 8011 } 8012 8013 if (Name == "div") { 8014 return parseIntWithPrefix("div", Operands, 8015 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 8016 } 8017 8018 return MatchOperand_NoMatch; 8019 } 8020 8021 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 8022 cvtVOP3P(Inst, Operands); 8023 8024 int Opc = Inst.getOpcode(); 8025 8026 int SrcNum; 8027 const int Ops[] = { AMDGPU::OpName::src0, 8028 AMDGPU::OpName::src1, 8029 AMDGPU::OpName::src2 }; 8030 for (SrcNum = 0; 8031 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 8032 ++SrcNum); 8033 assert(SrcNum > 0); 8034 8035 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8036 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8037 8038 if ((OpSel & (1 << SrcNum)) != 0) { 8039 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 8040 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8041 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 8042 } 8043 } 8044 8045 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 8046 // 1. This operand is input modifiers 8047 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 8048 // 2. This is not last operand 8049 && Desc.NumOperands > (OpNum + 1) 8050 // 3. Next operand is register class 8051 && Desc.OpInfo[OpNum + 1].RegClass != -1 8052 // 4. Next register is not tied to any other operand 8053 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 8054 } 8055 8056 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 8057 { 8058 OptionalImmIndexMap OptionalIdx; 8059 unsigned Opc = Inst.getOpcode(); 8060 8061 unsigned I = 1; 8062 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8063 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8064 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8065 } 8066 8067 for (unsigned E = Operands.size(); I != E; ++I) { 8068 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8069 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8070 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8071 } else if (Op.isInterpSlot() || 8072 Op.isInterpAttr() || 8073 Op.isAttrChan()) { 8074 Inst.addOperand(MCOperand::createImm(Op.getImm())); 8075 } else if (Op.isImmModifier()) { 8076 OptionalIdx[Op.getImmTy()] = I; 8077 } else { 8078 llvm_unreachable("unhandled operand type"); 8079 } 8080 } 8081 8082 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 8083 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 8084 } 8085 8086 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8087 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8088 } 8089 8090 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8091 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8092 } 8093 } 8094 8095 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 8096 { 8097 OptionalImmIndexMap OptionalIdx; 8098 unsigned Opc = Inst.getOpcode(); 8099 8100 unsigned I = 1; 8101 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8102 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8103 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8104 } 8105 8106 for (unsigned E = Operands.size(); I != E; ++I) { 8107 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8108 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8109 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8110 } else if (Op.isImmModifier()) { 8111 OptionalIdx[Op.getImmTy()] = I; 8112 } else { 8113 llvm_unreachable("unhandled operand type"); 8114 } 8115 } 8116 8117 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8118 8119 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8120 if (OpSelIdx != -1) 8121 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8122 8123 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 8124 8125 if (OpSelIdx == -1) 8126 return; 8127 8128 const int Ops[] = { AMDGPU::OpName::src0, 8129 AMDGPU::OpName::src1, 8130 AMDGPU::OpName::src2 }; 8131 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8132 AMDGPU::OpName::src1_modifiers, 8133 AMDGPU::OpName::src2_modifiers }; 8134 8135 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8136 8137 for (int J = 0; J < 3; ++J) { 8138 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8139 if (OpIdx == -1) 8140 break; 8141 8142 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8143 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8144 8145 if ((OpSel & (1 << J)) != 0) 8146 ModVal |= SISrcMods::OP_SEL_0; 8147 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 8148 (OpSel & (1 << 3)) != 0) 8149 ModVal |= SISrcMods::DST_OP_SEL; 8150 8151 Inst.getOperand(ModIdx).setImm(ModVal); 8152 } 8153 } 8154 8155 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8156 OptionalImmIndexMap &OptionalIdx) { 8157 unsigned Opc = Inst.getOpcode(); 8158 8159 unsigned I = 1; 8160 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8161 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8162 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8163 } 8164 8165 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 8166 // This instruction has src modifiers 8167 for (unsigned E = Operands.size(); I != E; ++I) { 8168 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8169 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8170 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8171 } else if (Op.isImmModifier()) { 8172 OptionalIdx[Op.getImmTy()] = I; 8173 } else if (Op.isRegOrImm()) { 8174 Op.addRegOrImmOperands(Inst, 1); 8175 } else { 8176 llvm_unreachable("unhandled operand type"); 8177 } 8178 } 8179 } else { 8180 // No src modifiers 8181 for (unsigned E = Operands.size(); I != E; ++I) { 8182 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8183 if (Op.isMod()) { 8184 OptionalIdx[Op.getImmTy()] = I; 8185 } else { 8186 Op.addRegOrImmOperands(Inst, 1); 8187 } 8188 } 8189 } 8190 8191 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8192 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8193 } 8194 8195 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8196 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8197 } 8198 8199 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8200 // it has src2 register operand that is tied to dst operand 8201 // we don't allow modifiers for this operand in assembler so src2_modifiers 8202 // should be 0. 8203 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 8204 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 8205 Opc == AMDGPU::V_MAC_F32_e64_vi || 8206 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 8207 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 8208 Opc == AMDGPU::V_MAC_F16_e64_vi || 8209 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 8210 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 8211 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || 8212 Opc == AMDGPU::V_FMAC_F32_e64_vi || 8213 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 8214 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || 8215 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || 8216 Opc == AMDGPU::V_FMAC_F16_e64_gfx11) { 8217 auto it = Inst.begin(); 8218 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8219 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8220 ++it; 8221 // Copy the operand to ensure it's not invalidated when Inst grows. 8222 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8223 } 8224 } 8225 8226 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8227 OptionalImmIndexMap OptionalIdx; 8228 cvtVOP3(Inst, Operands, OptionalIdx); 8229 } 8230 8231 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8232 OptionalImmIndexMap &OptIdx) { 8233 const int Opc = Inst.getOpcode(); 8234 const MCInstrDesc &Desc = MII.get(Opc); 8235 8236 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8237 8238 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 8239 assert(!IsPacked); 8240 Inst.addOperand(Inst.getOperand(0)); 8241 } 8242 8243 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8244 // instruction, and then figure out where to actually put the modifiers 8245 8246 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8247 if (OpSelIdx != -1) { 8248 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8249 } 8250 8251 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8252 if (OpSelHiIdx != -1) { 8253 int DefaultVal = IsPacked ? -1 : 0; 8254 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8255 DefaultVal); 8256 } 8257 8258 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8259 if (NegLoIdx != -1) { 8260 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8261 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8262 } 8263 8264 const int Ops[] = { AMDGPU::OpName::src0, 8265 AMDGPU::OpName::src1, 8266 AMDGPU::OpName::src2 }; 8267 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8268 AMDGPU::OpName::src1_modifiers, 8269 AMDGPU::OpName::src2_modifiers }; 8270 8271 unsigned OpSel = 0; 8272 unsigned OpSelHi = 0; 8273 unsigned NegLo = 0; 8274 unsigned NegHi = 0; 8275 8276 if (OpSelIdx != -1) 8277 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8278 8279 if (OpSelHiIdx != -1) 8280 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8281 8282 if (NegLoIdx != -1) { 8283 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8284 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8285 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8286 } 8287 8288 for (int J = 0; J < 3; ++J) { 8289 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8290 if (OpIdx == -1) 8291 break; 8292 8293 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8294 8295 if (ModIdx == -1) 8296 continue; 8297 8298 uint32_t ModVal = 0; 8299 8300 if ((OpSel & (1 << J)) != 0) 8301 ModVal |= SISrcMods::OP_SEL_0; 8302 8303 if ((OpSelHi & (1 << J)) != 0) 8304 ModVal |= SISrcMods::OP_SEL_1; 8305 8306 if ((NegLo & (1 << J)) != 0) 8307 ModVal |= SISrcMods::NEG; 8308 8309 if ((NegHi & (1 << J)) != 0) 8310 ModVal |= SISrcMods::NEG_HI; 8311 8312 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8313 } 8314 } 8315 8316 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8317 OptionalImmIndexMap OptIdx; 8318 cvtVOP3(Inst, Operands, OptIdx); 8319 cvtVOP3P(Inst, Operands, OptIdx); 8320 } 8321 8322 //===----------------------------------------------------------------------===// 8323 // dpp 8324 //===----------------------------------------------------------------------===// 8325 8326 bool AMDGPUOperand::isDPP8() const { 8327 return isImmTy(ImmTyDPP8); 8328 } 8329 8330 bool AMDGPUOperand::isDPPCtrl() const { 8331 using namespace AMDGPU::DPP; 8332 8333 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8334 if (result) { 8335 int64_t Imm = getImm(); 8336 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8337 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8338 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8339 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8340 (Imm == DppCtrl::WAVE_SHL1) || 8341 (Imm == DppCtrl::WAVE_ROL1) || 8342 (Imm == DppCtrl::WAVE_SHR1) || 8343 (Imm == DppCtrl::WAVE_ROR1) || 8344 (Imm == DppCtrl::ROW_MIRROR) || 8345 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8346 (Imm == DppCtrl::BCAST15) || 8347 (Imm == DppCtrl::BCAST31) || 8348 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8349 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8350 } 8351 return false; 8352 } 8353 8354 //===----------------------------------------------------------------------===// 8355 // mAI 8356 //===----------------------------------------------------------------------===// 8357 8358 bool AMDGPUOperand::isBLGP() const { 8359 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8360 } 8361 8362 bool AMDGPUOperand::isCBSZ() const { 8363 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8364 } 8365 8366 bool AMDGPUOperand::isABID() const { 8367 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8368 } 8369 8370 bool AMDGPUOperand::isS16Imm() const { 8371 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8372 } 8373 8374 bool AMDGPUOperand::isU16Imm() const { 8375 return isImm() && isUInt<16>(getImm()); 8376 } 8377 8378 //===----------------------------------------------------------------------===// 8379 // dim 8380 //===----------------------------------------------------------------------===// 8381 8382 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8383 // We want to allow "dim:1D" etc., 8384 // but the initial 1 is tokenized as an integer. 8385 std::string Token; 8386 if (isToken(AsmToken::Integer)) { 8387 SMLoc Loc = getToken().getEndLoc(); 8388 Token = std::string(getTokenStr()); 8389 lex(); 8390 if (getLoc() != Loc) 8391 return false; 8392 } 8393 8394 StringRef Suffix; 8395 if (!parseId(Suffix)) 8396 return false; 8397 Token += Suffix; 8398 8399 StringRef DimId = Token; 8400 if (DimId.startswith("SQ_RSRC_IMG_")) 8401 DimId = DimId.drop_front(12); 8402 8403 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8404 if (!DimInfo) 8405 return false; 8406 8407 Encoding = DimInfo->Encoding; 8408 return true; 8409 } 8410 8411 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8412 if (!isGFX10Plus()) 8413 return MatchOperand_NoMatch; 8414 8415 SMLoc S = getLoc(); 8416 8417 if (!trySkipId("dim", AsmToken::Colon)) 8418 return MatchOperand_NoMatch; 8419 8420 unsigned Encoding; 8421 SMLoc Loc = getLoc(); 8422 if (!parseDimId(Encoding)) { 8423 Error(Loc, "invalid dim value"); 8424 return MatchOperand_ParseFail; 8425 } 8426 8427 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8428 AMDGPUOperand::ImmTyDim)); 8429 return MatchOperand_Success; 8430 } 8431 8432 //===----------------------------------------------------------------------===// 8433 // dpp 8434 //===----------------------------------------------------------------------===// 8435 8436 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8437 SMLoc S = getLoc(); 8438 8439 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8440 return MatchOperand_NoMatch; 8441 8442 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8443 8444 int64_t Sels[8]; 8445 8446 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8447 return MatchOperand_ParseFail; 8448 8449 for (size_t i = 0; i < 8; ++i) { 8450 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8451 return MatchOperand_ParseFail; 8452 8453 SMLoc Loc = getLoc(); 8454 if (getParser().parseAbsoluteExpression(Sels[i])) 8455 return MatchOperand_ParseFail; 8456 if (0 > Sels[i] || 7 < Sels[i]) { 8457 Error(Loc, "expected a 3-bit value"); 8458 return MatchOperand_ParseFail; 8459 } 8460 } 8461 8462 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8463 return MatchOperand_ParseFail; 8464 8465 unsigned DPP8 = 0; 8466 for (size_t i = 0; i < 8; ++i) 8467 DPP8 |= (Sels[i] << (i * 3)); 8468 8469 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8470 return MatchOperand_Success; 8471 } 8472 8473 bool 8474 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8475 const OperandVector &Operands) { 8476 if (Ctrl == "row_newbcast") 8477 return isGFX90A(); 8478 8479 if (Ctrl == "row_share" || 8480 Ctrl == "row_xmask") 8481 return isGFX10Plus(); 8482 8483 if (Ctrl == "wave_shl" || 8484 Ctrl == "wave_shr" || 8485 Ctrl == "wave_rol" || 8486 Ctrl == "wave_ror" || 8487 Ctrl == "row_bcast") 8488 return isVI() || isGFX9(); 8489 8490 return Ctrl == "row_mirror" || 8491 Ctrl == "row_half_mirror" || 8492 Ctrl == "quad_perm" || 8493 Ctrl == "row_shl" || 8494 Ctrl == "row_shr" || 8495 Ctrl == "row_ror"; 8496 } 8497 8498 int64_t 8499 AMDGPUAsmParser::parseDPPCtrlPerm() { 8500 // quad_perm:[%d,%d,%d,%d] 8501 8502 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8503 return -1; 8504 8505 int64_t Val = 0; 8506 for (int i = 0; i < 4; ++i) { 8507 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8508 return -1; 8509 8510 int64_t Temp; 8511 SMLoc Loc = getLoc(); 8512 if (getParser().parseAbsoluteExpression(Temp)) 8513 return -1; 8514 if (Temp < 0 || Temp > 3) { 8515 Error(Loc, "expected a 2-bit value"); 8516 return -1; 8517 } 8518 8519 Val += (Temp << i * 2); 8520 } 8521 8522 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8523 return -1; 8524 8525 return Val; 8526 } 8527 8528 int64_t 8529 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8530 using namespace AMDGPU::DPP; 8531 8532 // sel:%d 8533 8534 int64_t Val; 8535 SMLoc Loc = getLoc(); 8536 8537 if (getParser().parseAbsoluteExpression(Val)) 8538 return -1; 8539 8540 struct DppCtrlCheck { 8541 int64_t Ctrl; 8542 int Lo; 8543 int Hi; 8544 }; 8545 8546 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8547 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8548 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8549 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8550 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8551 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8552 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8553 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8554 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8555 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8556 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8557 .Default({-1, 0, 0}); 8558 8559 bool Valid; 8560 if (Check.Ctrl == -1) { 8561 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8562 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8563 } else { 8564 Valid = Check.Lo <= Val && Val <= Check.Hi; 8565 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8566 } 8567 8568 if (!Valid) { 8569 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8570 return -1; 8571 } 8572 8573 return Val; 8574 } 8575 8576 OperandMatchResultTy 8577 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8578 using namespace AMDGPU::DPP; 8579 8580 if (!isToken(AsmToken::Identifier) || 8581 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8582 return MatchOperand_NoMatch; 8583 8584 SMLoc S = getLoc(); 8585 int64_t Val = -1; 8586 StringRef Ctrl; 8587 8588 parseId(Ctrl); 8589 8590 if (Ctrl == "row_mirror") { 8591 Val = DppCtrl::ROW_MIRROR; 8592 } else if (Ctrl == "row_half_mirror") { 8593 Val = DppCtrl::ROW_HALF_MIRROR; 8594 } else { 8595 if (skipToken(AsmToken::Colon, "expected a colon")) { 8596 if (Ctrl == "quad_perm") { 8597 Val = parseDPPCtrlPerm(); 8598 } else { 8599 Val = parseDPPCtrlSel(Ctrl); 8600 } 8601 } 8602 } 8603 8604 if (Val == -1) 8605 return MatchOperand_ParseFail; 8606 8607 Operands.push_back( 8608 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8609 return MatchOperand_Success; 8610 } 8611 8612 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8613 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8614 } 8615 8616 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8617 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8618 } 8619 8620 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8621 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8622 } 8623 8624 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8625 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8626 } 8627 8628 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8629 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8630 } 8631 8632 // Add dummy $old operand 8633 void AMDGPUAsmParser::cvtVOPC64NoDstDPP(MCInst &Inst, 8634 const OperandVector &Operands, 8635 bool IsDPP8) { 8636 Inst.addOperand(MCOperand::createReg(0)); 8637 cvtVOP3DPP(Inst, Operands, IsDPP8); 8638 } 8639 8640 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8641 OptionalImmIndexMap OptionalIdx; 8642 unsigned Opc = Inst.getOpcode(); 8643 bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8644 unsigned I = 1; 8645 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8646 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8647 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8648 } 8649 8650 int Fi = 0; 8651 for (unsigned E = Operands.size(); I != E; ++I) { 8652 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8653 MCOI::TIED_TO); 8654 if (TiedTo != -1) { 8655 assert((unsigned)TiedTo < Inst.getNumOperands()); 8656 // handle tied old or src2 for MAC instructions 8657 Inst.addOperand(Inst.getOperand(TiedTo)); 8658 } 8659 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8660 // Add the register arguments 8661 if (IsDPP8 && Op.isFI()) { 8662 Fi = Op.getImm(); 8663 } else if (HasModifiers && 8664 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8665 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8666 } else if (Op.isReg()) { 8667 Op.addRegOperands(Inst, 1); 8668 } else if (Op.isImm() && 8669 Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) { 8670 assert(!HasModifiers && "Case should be unreachable with modifiers"); 8671 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); 8672 Op.addImmOperands(Inst, 1); 8673 } else if (Op.isImm()) { 8674 OptionalIdx[Op.getImmTy()] = I; 8675 } else { 8676 llvm_unreachable("unhandled operand type"); 8677 } 8678 } 8679 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8680 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8681 } 8682 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8683 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8684 } 8685 if (Desc.TSFlags & SIInstrFlags::VOP3P) 8686 cvtVOP3P(Inst, Operands, OptionalIdx); 8687 else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { 8688 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8689 } 8690 8691 if (IsDPP8) { 8692 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); 8693 using namespace llvm::AMDGPU::DPP; 8694 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8695 } else { 8696 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); 8697 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8698 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8699 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8700 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8701 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8702 } 8703 } 8704 } 8705 8706 // Add dummy $old operand 8707 void AMDGPUAsmParser::cvtVOPCNoDstDPP(MCInst &Inst, 8708 const OperandVector &Operands, 8709 bool IsDPP8) { 8710 Inst.addOperand(MCOperand::createReg(0)); 8711 cvtDPP(Inst, Operands, IsDPP8); 8712 } 8713 8714 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8715 OptionalImmIndexMap OptionalIdx; 8716 8717 unsigned Opc = Inst.getOpcode(); 8718 bool HasModifiers = 8719 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8720 unsigned I = 1; 8721 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8722 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8723 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8724 } 8725 8726 int Fi = 0; 8727 for (unsigned E = Operands.size(); I != E; ++I) { 8728 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8729 MCOI::TIED_TO); 8730 if (TiedTo != -1) { 8731 assert((unsigned)TiedTo < Inst.getNumOperands()); 8732 // handle tied old or src2 for MAC instructions 8733 Inst.addOperand(Inst.getOperand(TiedTo)); 8734 } 8735 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8736 // Add the register arguments 8737 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8738 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8739 // Skip it. 8740 continue; 8741 } 8742 8743 if (IsDPP8) { 8744 if (Op.isDPP8()) { 8745 Op.addImmOperands(Inst, 1); 8746 } else if (HasModifiers && 8747 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8748 Op.addRegWithFPInputModsOperands(Inst, 2); 8749 } else if (Op.isFI()) { 8750 Fi = Op.getImm(); 8751 } else if (Op.isReg()) { 8752 Op.addRegOperands(Inst, 1); 8753 } else { 8754 llvm_unreachable("Invalid operand type"); 8755 } 8756 } else { 8757 if (HasModifiers && 8758 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8759 Op.addRegWithFPInputModsOperands(Inst, 2); 8760 } else if (Op.isReg()) { 8761 Op.addRegOperands(Inst, 1); 8762 } else if (Op.isDPPCtrl()) { 8763 Op.addImmOperands(Inst, 1); 8764 } else if (Op.isImm()) { 8765 // Handle optional arguments 8766 OptionalIdx[Op.getImmTy()] = I; 8767 } else { 8768 llvm_unreachable("Invalid operand type"); 8769 } 8770 } 8771 } 8772 8773 if (IsDPP8) { 8774 using namespace llvm::AMDGPU::DPP; 8775 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8776 } else { 8777 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8778 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8779 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8780 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8781 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8782 } 8783 } 8784 } 8785 8786 //===----------------------------------------------------------------------===// 8787 // sdwa 8788 //===----------------------------------------------------------------------===// 8789 8790 OperandMatchResultTy 8791 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8792 AMDGPUOperand::ImmTy Type) { 8793 using namespace llvm::AMDGPU::SDWA; 8794 8795 SMLoc S = getLoc(); 8796 StringRef Value; 8797 OperandMatchResultTy res; 8798 8799 SMLoc StringLoc; 8800 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8801 if (res != MatchOperand_Success) { 8802 return res; 8803 } 8804 8805 int64_t Int; 8806 Int = StringSwitch<int64_t>(Value) 8807 .Case("BYTE_0", SdwaSel::BYTE_0) 8808 .Case("BYTE_1", SdwaSel::BYTE_1) 8809 .Case("BYTE_2", SdwaSel::BYTE_2) 8810 .Case("BYTE_3", SdwaSel::BYTE_3) 8811 .Case("WORD_0", SdwaSel::WORD_0) 8812 .Case("WORD_1", SdwaSel::WORD_1) 8813 .Case("DWORD", SdwaSel::DWORD) 8814 .Default(0xffffffff); 8815 8816 if (Int == 0xffffffff) { 8817 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8818 return MatchOperand_ParseFail; 8819 } 8820 8821 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8822 return MatchOperand_Success; 8823 } 8824 8825 OperandMatchResultTy 8826 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8827 using namespace llvm::AMDGPU::SDWA; 8828 8829 SMLoc S = getLoc(); 8830 StringRef Value; 8831 OperandMatchResultTy res; 8832 8833 SMLoc StringLoc; 8834 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8835 if (res != MatchOperand_Success) { 8836 return res; 8837 } 8838 8839 int64_t Int; 8840 Int = StringSwitch<int64_t>(Value) 8841 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8842 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8843 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8844 .Default(0xffffffff); 8845 8846 if (Int == 0xffffffff) { 8847 Error(StringLoc, "invalid dst_unused value"); 8848 return MatchOperand_ParseFail; 8849 } 8850 8851 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8852 return MatchOperand_Success; 8853 } 8854 8855 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8856 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8857 } 8858 8859 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8860 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8861 } 8862 8863 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8864 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8865 } 8866 8867 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8868 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8869 } 8870 8871 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8872 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8873 } 8874 8875 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8876 uint64_t BasicInstType, 8877 bool SkipDstVcc, 8878 bool SkipSrcVcc) { 8879 using namespace llvm::AMDGPU::SDWA; 8880 8881 OptionalImmIndexMap OptionalIdx; 8882 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8883 bool SkippedVcc = false; 8884 8885 unsigned I = 1; 8886 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8887 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8888 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8889 } 8890 8891 for (unsigned E = Operands.size(); I != E; ++I) { 8892 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8893 if (SkipVcc && !SkippedVcc && Op.isReg() && 8894 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8895 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8896 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8897 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8898 // Skip VCC only if we didn't skip it on previous iteration. 8899 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8900 if (BasicInstType == SIInstrFlags::VOP2 && 8901 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8902 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8903 SkippedVcc = true; 8904 continue; 8905 } else if (BasicInstType == SIInstrFlags::VOPC && 8906 Inst.getNumOperands() == 0) { 8907 SkippedVcc = true; 8908 continue; 8909 } 8910 } 8911 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8912 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8913 } else if (Op.isImm()) { 8914 // Handle optional arguments 8915 OptionalIdx[Op.getImmTy()] = I; 8916 } else { 8917 llvm_unreachable("Invalid operand type"); 8918 } 8919 SkippedVcc = false; 8920 } 8921 8922 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8923 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8924 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8925 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8926 switch (BasicInstType) { 8927 case SIInstrFlags::VOP1: 8928 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8929 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8930 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8931 } 8932 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8933 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8934 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8935 break; 8936 8937 case SIInstrFlags::VOP2: 8938 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8939 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8940 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8941 } 8942 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8943 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8944 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8945 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8946 break; 8947 8948 case SIInstrFlags::VOPC: 8949 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8950 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8951 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8952 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8953 break; 8954 8955 default: 8956 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8957 } 8958 } 8959 8960 // special case v_mac_{f16, f32}: 8961 // it has src2 register operand that is tied to dst operand 8962 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8963 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8964 auto it = Inst.begin(); 8965 std::advance( 8966 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8967 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8968 } 8969 } 8970 8971 //===----------------------------------------------------------------------===// 8972 // mAI 8973 //===----------------------------------------------------------------------===// 8974 8975 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8976 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8977 } 8978 8979 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8980 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8981 } 8982 8983 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8984 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8985 } 8986 8987 /// Force static initialization. 8988 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8989 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8990 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8991 } 8992 8993 #define GET_REGISTER_MATCHER 8994 #define GET_MATCHER_IMPLEMENTATION 8995 #define GET_MNEMONIC_SPELL_CHECKER 8996 #define GET_MNEMONIC_CHECKER 8997 #include "AMDGPUGenAsmMatcher.inc" 8998 8999 // This function should be defined after auto-generated include so that we have 9000 // MatchClassKind enum defined 9001 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 9002 unsigned Kind) { 9003 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 9004 // But MatchInstructionImpl() expects to meet token and fails to validate 9005 // operand. This method checks if we are given immediate operand but expect to 9006 // get corresponding token. 9007 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 9008 switch (Kind) { 9009 case MCK_addr64: 9010 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 9011 case MCK_gds: 9012 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 9013 case MCK_lds: 9014 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 9015 case MCK_idxen: 9016 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 9017 case MCK_offen: 9018 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 9019 case MCK_SSrcB32: 9020 // When operands have expression values, they will return true for isToken, 9021 // because it is not possible to distinguish between a token and an 9022 // expression at parse time. MatchInstructionImpl() will always try to 9023 // match an operand as a token, when isToken returns true, and when the 9024 // name of the expression is not a valid token, the match will fail, 9025 // so we need to handle it here. 9026 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 9027 case MCK_SSrcF32: 9028 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 9029 case MCK_SoppBrTarget: 9030 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 9031 case MCK_VReg32OrOff: 9032 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 9033 case MCK_InterpSlot: 9034 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 9035 case MCK_Attr: 9036 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 9037 case MCK_AttrChan: 9038 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 9039 case MCK_ImmSMEMOffset: 9040 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 9041 case MCK_SReg_64: 9042 case MCK_SReg_64_XEXEC: 9043 // Null is defined as a 32-bit register but 9044 // it should also be enabled with 64-bit operands. 9045 // The following code enables it for SReg_64 operands 9046 // used as source and destination. Remaining source 9047 // operands are handled in isInlinableImm. 9048 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 9049 default: 9050 return Match_InvalidOperand; 9051 } 9052 } 9053 9054 //===----------------------------------------------------------------------===// 9055 // endpgm 9056 //===----------------------------------------------------------------------===// 9057 9058 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 9059 SMLoc S = getLoc(); 9060 int64_t Imm = 0; 9061 9062 if (!parseExpr(Imm)) { 9063 // The operand is optional, if not present default to 0 9064 Imm = 0; 9065 } 9066 9067 if (!isUInt<16>(Imm)) { 9068 Error(S, "expected a 16-bit value"); 9069 return MatchOperand_ParseFail; 9070 } 9071 9072 Operands.push_back( 9073 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 9074 return MatchOperand_Success; 9075 } 9076 9077 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 9078 9079 //===----------------------------------------------------------------------===// 9080 // LDSDIR 9081 //===----------------------------------------------------------------------===// 9082 9083 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const { 9084 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST); 9085 } 9086 9087 bool AMDGPUOperand::isWaitVDST() const { 9088 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); 9089 } 9090 9091 //===----------------------------------------------------------------------===// 9092 // VINTERP 9093 //===----------------------------------------------------------------------===// 9094 9095 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const { 9096 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP); 9097 } 9098 9099 bool AMDGPUOperand::isWaitEXP() const { 9100 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); 9101 } 9102